#!/usr/bin/env python """Compiles C into assembly for the practicum processor (PP2) Copyright (C) 2011-2014 Peter Wu Licensed under the MIT license . """ import sys, re, os, operator from pycparser import c_parser, c_ast, parse_file from Asm import Asm from Registers import Registers from LinkedNode import LinkedNode from Function import Function from Variables import Variables, GlobalVariables from AsmParser import AsmParser __author__ = "Peter Wu" __copyright__ = "Copyright (C) 2011-2014 Peter Wu" __credits__ = ["Xander Houtman"] __license__ = "MIT" __version__ = "1.0" __maintainer__ = "Peter Wu" __email__ = "lekensteyn@gmail.com" class Logger(object): def __init__(self): pass def info(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="info") def warning(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="warning") def error(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="error") while linked_node: print(" in {} {}".format(linked_node.node.coord, linked_node.type)) linked_node = linked_node.parent raise def log(self, message, linked_node=None, type="log"): source = "" if isinstance(linked_node, LinkedNode): source = str(linked_node.getLocation()) + ": " print("{}:{} {}".format(type, source, message)) class Parse(object): def __init__(self): self.logger = Logger() self.node = None # word size in bits self.WORDSIZE = 18 # preload operators and their instructions self.math_ops = { "+": "ADD", "-": "SUB", "*": "MULS", "/": "DIV", "%": "MOD", "&": "AND", "|": "OR", "^": "XOR" } self.comparison_ops = { "<" : "BLT", ">" : "BGT", "<=": "BLE", ">=": "BGE", "==": "BEQ", "!=": "BNE" } self.shift_ops = { ">>": "DIV", "<<": "MULS" } self.binary_ops = self.math_ops.copy() self.binary_ops.update(self.comparison_ops) self.binary_ops.update(self.shift_ops) # global variable names to be defined in @DATA. key: name, value: list # of initializers self.varNames = {} self.functions = {} # holds instructions for initializing the global variables self.globalInit = [] # holds the miscellaneous instructions for @CODE self.codeSegment = [] self.labels = set() # helper for asm code generation self.asm = Asm() # watches which registers are used and which are not self.registers = Registers() def loadFile(self, filename, use_cpp=True, cpp_args=[]): """Loads a file and parses the contents of it""" # reset state self.asm_node = self.node = None ext = os.path.splitext(filename)[1].lower() if ext: ext = ext[1:] if ext == "i": # C source code which should not be preprocessed self.node = parse_file(filename, use_cpp=False) elif ext in ("c", "h"): self.node = parse_file(filename, use_cpp=use_cpp, cpp_args=cpp_args) elif ext == "asm": self.asm_node = AsmParser(filename) else: self.logger.error("Unrecognized file extension '{}'".format(ext)) def show(self): self.node.show(showcoord=True) def uniqLbl(self, labelname): """Generates an unique label name, save and return it""" lbl = labelname if labelname in self.labels: i = 0 while True: lbl = labelname + "_" + str(i) if not lbl in self.labels: break i += 1 self.addLabel(lbl) return lbl def addLabel(self, labelname): if labelname in self.labels: self.logger.error("Redefinition of label '{}'".format(labelname)) self.labels.add(labelname) def compile(self): """Processes a loaded file and adds it to the assembly output""" if self.node is not None: self.compileAST() elif self.asm_node is not None: self.compileASM() else: self.logger.error("No AST node nor assembly lines found to be" " processed") def compileAST(self): """Loops through the nodes in an AST syntax tree and generates ASM""" root_node = LinkedNode(self.node) variables = GlobalVariables(self.varNames) root_node.setVariablesObj(variables) for thing in self.node.ext: if not isinstance(thing, c_ast.Decl) and not isinstance(thing, c_ast.FuncDef): linked_node = LinkedNode(thing, parent=root_node) self.logger.warning("I expected a variable or function" " definition or declaration in the root" " node, but also found " + str(thing), linked_node=linked_node) self.codeSegment += self.parseStatement(thing, root_node) # hack: static functions are local to a file, so remove them for funcname in list(self.functions.keys()): if self.functions[funcname].isStatic(): del self.functions[funcname] def compileASM(self): """Processes lines of assembly and merge it into the output""" for label in self.asm_node.labels: new_label = label # rename existing names if label in self.labels: new_label = self.uniqLbl(label) self.asm_node.renameId(label, new_label) self.labels.add(new_label) for name, init_vals in self.asm_node.getDataDefinitions().items(): if name in self.varNames: old_count = len(self.varNames[name]) new_count = len(init_vals) if old_count != new_count: self.logger.error("Global variable '{}' was defined before" ", but the length of the initializers" " are not equal. Old: {}, new: {}" .format(name, old_count, new_count)) elif self.varNames[name] != init_vals: self.logger.warning("The initialization of global variable" " '{}' contains different values" .format(name)) self.varNames[name] = init_vals self.codeSegment += self.asm_node.getCodeLines() def getSource(self): """Retrieves the ASM source. You need to compile it first""" output = [] output.append("@DATA") for varName, initializers in self.varNames.items(): padding = " " * (16 - len(varName) - 1) assert len(initializers) > 0, "Size of '{}' must be at least 1".format(varName) output.append(varName + padding + " DW " + ",".join(initializers)) output.append("") output.append("@CODE") # initialization of global variables output += self.globalInit if not "main" in self.functions: if "fn_main" in self.labels: self.logger.info("No main function was found in C, but label" " 'fn_main' exists in assembly") else: self.logger.warning("No main function found with label 'fn_main'") for function in self.functions.values(): if not function.isLinked(): if "fn_" + function.name in self.labels: self.logger.info("Function '{}' is declared and found in" " assembly".format(function.name)) else: self.logger.warning("Function '{}' is declared but not defined" .format(function.name)) output.append(self.asm.branch_op("BRA", "fn_main")) output.append("") output += self.codeSegment output.append("") output.append("@END") output.append("") return os.linesep.join(output) def parseFuncDef(self, linked_node): node = linked_node.node linked_node.incrementLevel() self.asm.level = linked_node.level # parse function declaration (which will parse params as well) assert not self.parseStatement(node.decl, linked_node), ("Function" " declaration should not return a line of assembly") funcname = node.decl.name function = self.functions[funcname] if function.isLinked(): self.logger.error("Redefinition of function '{}'".format(funcname), linked_node=linked_node) lbl_func = function.labelBegin() lbl_end = function.labelEnd() linked_node.setFunction(function) # save Base Pointer lines = [self.asm.push(self.registers.BP, lbl_func)] lines.append(self.asm.binary_op("LOAD", self.registers.BP, "SP")) body = self.parseStatement(node.body, linked_node) self.asm.level = linked_node.level # Reserve space on the stack for local variables if necessary if function.reserved_stack: lines.append(self.asm.binary_op("SUB", "SP", function.reserved_stack)) lines += body # restore SP lines.append(self.asm.binary_op("LOAD", "SP", self.registers.BP, label=lbl_end)) else: lines += body lines.append(self.asm.noop(lbl_end)) # restore Base Pointer lines.append(self.asm.pull(self.registers.BP)) # return from function lines.append(self.asm.format_line("RTS")) # add an extra newline lines.append("") return lines def functionLabel(self, name): if name in self.functions: function = self.functions[name] return function.labelBegin() return None def parseCompound(self, linked_node): linked_node.incrementLevel() lines = [] if linked_node.node.block_items: for cn in linked_node.node.block_items: lines += self.parseStatement(cn, linked_node) return lines def parseBinaryLogicalOp(self, linked_node): """Returns lines of assembly for a logical OR or AND operation ASM for logical OR: operand1 BEQ fal ; if operand1 is false operand2 BEQ fal ; if operand2 is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP ASM for logical AND: operand1 BNE tru ; if operand1 is true operand2 BNE tru ; if operand2 is true fal:LOAD R0 0 BRA end tru:LOAD R0 1 end:NOOP """ node = linked_node.node assert node.op in ("&&", "||"), "Expected a && or || operator, found '{}' instead".format(node.op) isAnd = node.op == "&&" reg_result = "R0" lbl_false = self.uniqLbl("lOp_false") lbl_true = self.uniqLbl("lOp_true") lbl_end = self.uniqLbl("lOp_end") # branch instruction to be inserted if && yields false or if || is true if isAnd: branch_early = self.asm.branch_op("BEQ", lbl_false) else: branch_early = self.asm.branch_op("BNE", lbl_true) result_true = self.asm.binary_op("LOAD", reg_result, 1, label=lbl_true) result_false = self.asm.binary_op("LOAD", reg_result, 0, label=lbl_false) lines = self.parseExpression(node.left, linked_node, level_increment=True) lines.append(branch_early) lines += self.parseExpression(node.right, linked_node, level_increment=True) lines.append(branch_early) # fix indentation self.asm.level = linked_node.level # if 'true && true' or 'false || false' if isAnd: lines.append(result_true) else: lines.append(result_false) lines.append(self.asm.branch_op("BRA", lbl_end)) # if 'false && x' or 'true && false' or 'x || true' or 'true || x' if isAnd: lines.append(result_false) else: lines.append(result_true) lines.append(self.asm.noop(lbl_end, reg_result)) return lines def parseBinaryOp(self, linked_node): node = linked_node.node lines = [] if node.op in self.binary_ops: op = self.binary_ops[node.op] elif node.op in ("&&", "||"): return self.parseBinaryLogicalOp(linked_node) else: self.logger.error("Binary op is not implemented yet: " + node.op, linked_node=linked_node) # process the first operand, the result is likely in R0 or R1 ops_first = self.parseExpression(node.left, linked_node, level_increment=True) assert len(ops_first), "Empty operand for binary operation (first part)" lines += ops_first reg_first = self.registers.find_register(ops_first, fatal=True) # fix indentation after running parseStatement self.asm.level = linked_node.level # optimization, but this should probably be done afterwards if isinstance(node.right, c_ast.Constant): operand = self.convertInteger(node=node.right) else: lines.append(self.asm.push(reg_first)) ops_second = self.parseExpression(node.right, linked_node, level_increment=True) assert len(ops_second), "Empty operand for binary operation (second part)" lines += ops_second # the register storing the result of the second operand reg_second = self.registers.find_register(ops_second, fatal=True) operand = reg_second # since we use the stack for calculations, we can use less # registers (2). Use the Registers class for determining the next # free register self.registers.alloc(reg_second) reg_first = self.registers.alloc() self.registers.free(reg_second) self.registers.free(reg_first) # put the data back in the free register lines.append(self.asm.pull(reg_first)) # perform binary operation if node.op in self.comparison_ops: bin_end = self.uniqLbl("cmpEnd") bin_true = self.uniqLbl("cmpTrue") lines.append(self.asm.binary_op("CMP", reg_first, operand)) # a comparison is more likely to be true, so optimize for that: # true will follow the path CMP - - LOAD - NOOP, where false # will follow: CMP - - LOAD - BRA - NOOP. This just happened # to be the easiest to implement, so don't blame me for premature # optimization ;) lines.append(self.asm.branch_op(op, bin_true)) lines.append(self.asm.binary_op("LOAD", reg_first, 0)) lines.append(self.asm.branch_op("BRA", bin_end)) lines.append(self.asm.binary_op("LOAD", reg_first, 1, label=bin_true)) lines.append(self.asm.noop(bin_end, register=reg_first)) elif node.op in ("<<", ">>"): lines += self.processShift(node.op, reg_first, operand, linked_node) else: lines.append(self.asm.binary_op(op, reg_first, operand)) # explicitly set the PSW words because DIV and MOD are stupid and set # the Z flag depending on the remainder if op in ("DIV", "MOD"): lines.append(self.asm.binary_op("LOAD", reg_first, reg_first)) return lines def processShift(self, operator, reg_number, operand_shift, linked_node=None): """Returns lines for a shift expression Keyword arguments: operator -- Either >> for right shift or << for left shift reg_number -- A register Rn containing the number to be shifted operand_shift -- An operand for the shift count. If it's a register, the contents of it will be used as the shift count. Otherwise, the string is interpreted as a number from assembly (allowing $ and % notations) linked_node -- An optional LinkedNode for showing the context in warning messages """ lines = [] assert operator in self.shift_ops, "Invalid shift operator '{}'".format(operator) binop = self.shift_ops[operator] if self.registers.is_register(operand_shift): # alias operand_shift for clarity below, reg_shift is a register reg_shift = operand_shift lbl_shift = self.uniqLbl("shift_loop") lbl_shift_end = self.uniqLbl("shift_done") # necessary to set the status flags for the branch conditions lines.append(self.asm.binary_op("CMP", reg_shift, 0)) # if operand 2 is lower than 1, do not bother shifting operand # 1 and jump to the end lines.append(self.asm.branch_op("BLE", lbl_shift_end)) # right shift needs special treatment for the exceptional case -1 if operator == ">>": # see below for an explanation for what's happening here lines.append(self.asm.binary_op("AND", reg_number, "%10")) lines.append(self.asm.binary_op("DIV", reg_number, 2)) mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_number, mask)) # decrease shift count and finish if shift count <= 0 lines.append(self.asm.binary_op("SUB", reg_shift, 1)) lines.append(self.asm.branch_op("BLE", lbl_shift_end)) # do a single shift and decrease remaining shift count lines.append(self.asm.binary_op(binop, reg_number, 2, label=lbl_shift)) lines.append(self.asm.binary_op("SUB", reg_shift, 1)) # if we came from BLE, x <= 0. BGT only jumps if x > 0, so # we can safe another NOOP by adding the label here lines.append(self.asm.branch_op("BGT", lbl_shift, label=lbl_shift_end)) # expression rely on the fact that the result of an expression is # available in the last register. Without the below line, it'd look # in reg_shift lines.append(self.asm.binary_op("LOAD", reg_number, reg_number)) else: shift_bits = self.convertStrToInteger(operand_shift) if shift_bits <= 0: # optimize when shifting by 0, but warn for negative if shift_bits < 0: self.logger.warning("The shift count is negative, no shift" " operation will be performed", linked_node=linked_node) lines.append(self.asm.binary_op("LOAD", reg_number, reg_number)) else: operand = pow(2, shift_bits) if operator == ">>": # because DIV works with signed numbers, right shift is # a bit harder because the sign bit does not vanish # Assume 4-bit numbers in the next example. # binary | decimal # 1111 | -1 # 0111 | 7 # The rightmost bit is insignificant in a right shift # but causes issues with division by two for -1 because # it gets rounded to 0. After removal of the rightmost # bit and division by two, all bits are shifted left, # but the result is still negative. Since we're doing a # zero-padded shift (unsigned), clear the leftmost bit # For positive numbers, this has no side effects lines.append(self.asm.binary_op("AND", reg_number, "%10")) # shift it by one to the right lines.append(self.asm.binary_op("DIV", reg_number, 2)) # clear the leftmost bit by masking it with 011..11 mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_number, mask)) # decrease shift count and finish if nothing to shift operand /= 2 if operand > 1: # process the remaining shift lines.append(self.asm.binary_op(binop, reg_number, operand)) return lines def parseUnaryOpLValue(self, linked_node): lines = [] binops = { "++": "ADD", "--": "SUB" } op = linked_node.node.op linked_cn = LinkedNode(linked_node.node.expr, linked_node) if op == "&": lines = self.parseLValue(linked_cn) elif op in ("p++", "p--"): # XXX support postinc/dec raise RuntimeError("Post increment and post decrement operators" " are not supported.") binop = binops[op[1:]] stmt = linked_node.getStatementNode() if not stmt: self.logger.error("No statement found for post inc/decrement", linked_node=linked_node) # assume that the result register is R0 lvalue = self.parseLValue(linked_cn) lines = lvalue; stmt.post_lines += lvalue stmt.post_lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) stmt.post_lines.append(self.asm.binary_op(binop, "R1", 1)) stmt.post_lines.append(self.asm.binary_op("STOR", "R1", "[R0]")) elif op in ("--", "++"): binop = binops[op] lvalue = self.parseLValue(linked_cn) lines = lvalue; lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) lines.append(self.asm.binary_op(binop, "R1", 1)) lines.append(self.asm.binary_op("STOR", "R1", "[R0]")) else: self.logger.error("Unknown lvalue unary operator '{}'".format(op), linked_node=linked_node) return lines def parseUnaryOp(self, linked_node): """Returns lines of assembly for unary operators Supported operators are logical negation, one's complement (bitwise NOT), plus and minus ASM for logical NOT: operand BEQ fal ; if operand is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP """ op = linked_node.node.op # some operators works on lvalues if op in ("--", "++", "p++", "p--", "&"): return self.parseUnaryOpLValue(linked_node) # first fetch the operand lines = self.parseExpression(linked_node.node.expr, linked_node) # determine the register in which the result is stored reg = self.registers.find_register(lines, fatal=True) if op == "*": # load the value Rn at address into Rn lines.append(self.asm.binary_op("LOAD", reg, "[" + reg + "]")) elif op == "+": # K&R A7.4.4 "[the unary +] was added for symmetry with unary -" pass elif op == "-": lines.append(self.asm.binary_op("MULS", reg, "-1")) elif op == "~": # Conforming K&R A7.4.6, signed types are treated as unsigned when # applying ~ lines.append(self.asm.binary_op("XOR", reg, "%1")) elif op == "!": lbl_false = self.uniqLbl("not_false") lbl_true = self.uniqLbl("not_true") lbl_end = self.uniqLbl("not_end") # assume that the status words of the registers describes the value # of the expression lines.append(self.asm.branch_op("BEQ", lbl_false)) lines.append(self.asm.binary_op("LOAD", reg, 0, label=lbl_true)) lines.append(self.asm.branch_op("BRA", lbl_end)) lines.append(self.asm.binary_op("LOAD", reg, 1, label=lbl_false)) lines.append(self.asm.noop(lbl_end, register=reg)) elif op == "sizeof": self.logger.error("Sizeof operator 'sizeof' is not supported.", linked_node=linked_node) else: self.logger.error("Unknown unary operator '{}'".format(op), linked_node=linked_node) return lines def parseTernaryOp(self, linked_node): """Returns lines of ASM for ternary operation cond?expr_true:expr_false It looks very similar to an If expect that the return value is important """ linked_node.incrementLevel() node = linked_node.node lbl_el = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif lines = self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_el)) # if true... then_block = self.parseExpression(node.iftrue, linked_node) # use a single register for the result, using the one of expr_true result_reg = self.registers.find_register(then_block, fatal=True) lines += then_block lines.append(self.asm.branch_op("BRA", lbl_end)) # else... else_block = self.parseExpression(node.iffalse, linked_node) assert len(else_block), "Else block is empty" # add labels for the else block if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_el)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_el) lines += else_block result_reg_else = self.registers.find_register(else_block, fatal=True) # for consistency with the result of expr_true if result_reg_else != result_reg: lines.append(self.asm.binary_op("LOAD", result_reg, result_reg_else)) lines.append(self.asm.noop(lbl_end, register=result_reg)) return lines def convertInteger(self, value=None, node=None): """Returns a string of the value For integers, the string returned is a base-10 number modulo WORDSIZE """ if value is None: if isinstance(node, c_ast.Constant) and node.type == "int": value = node.value else: self.logger.error("Value is not given and node is not a valid" " integer constant") value = value.upper() if "LL" in value: # long long integer, ignore value = value.replace("LL", "", 1) elif "L" in value: # long integer, ignore value = value.replace("L", "", 1) if "U" in value: # unsigned integer, ignore for now value = value.replace("U", "", 1) if value.startswith("0X"): value = int(value[2:], 16) elif value.startswith("0"): # octal numbers are not understood by PP2 assembly, therefore convert it value = int(value, 8) else: value = int(value) # values are modulo WORDSIZE. If negative, the MSB is 1 value %= pow(2, self.WORDSIZE) return str(value) def convertStrToInteger(self, str): """Converts a PP2 assembly integer string to an integer""" str = str.upper() if str.startswith("$"): if str.startswith("$0"): return int(str[2:], 16) else: # if the leftmost bit of a 4-bit group is set, the preceding # bits should be 1 as well. E.g. $7 end with 0111 and is # therefore interpreted as 7 where $8 ends with 1111 which is # interpreted as -1 (all bits are set) val = str[1:] if int(val[1], 16) & 0b1000: hexsize = len(hex(pow(2, self.WORDSIZE) - 1)[2:]) val = val.rjust(hexsize, "F") return int(val, 16) elif str.startswith("%"): if str.startswith("%0"): return int(str[2:], 8) else: return int(str[1:].rjust(self.WORDSIZE, "1"), 8) return int(str, 10) def parseConstant(self, linked_node, register="R0"): node = linked_node.node if node.type == "int": value = self.convertInteger(node.value) else: self.logger.error("Unsupported constant type: " + node.type, linked_node=linked_node) return [self.asm.binary_op("LOAD", register, value)] def parseID(self, linked_node, register="R0"): """Returns the name for a name""" # XXX this is not made for functions try: addr = "+".join(linked_node.variables.getAddress(linked_node.node.name)) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return [self.asm.binary_op("LOAD", register, "[" + addr + "]")] def parseIf(self, linked_node): linked_node.incrementLevel() node = linked_node.node lines = self.parseExpression(node.cond, linked_node) branch_op = "BEQ" then_block = self.parseStatement(node.iftrue, linked_node) if node.iffalse: else_block = self.parseStatement(node.iffalse, linked_node) # if(x){}else{y} is equivalent to if(!x){} if not then_block: # note: else can still be empty, if(x){}else{}, but that is # handled below with the "if then_block" then_block = else_block else_block = [] # invert result branch_op = "BNE" else: else_block = [] # if(x){}else{} is equivalent to x, which do not need the below if then_block: if else_block: lbl_false = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif else: lbl_end = lbl_false = self.uniqLbl("fi") # endif # if cond is satisfied, go to else (BEQ branches if Z=0, BNE # branches if Z!=0). Do not use CMP, rely on the status flags set # set by the condition lines.append(self.asm.branch_op(branch_op, lbl_false)) # if true... lines += then_block # else... if else_block: # this branch is part of the then block, but include it here to # avoid redundancy (BRA lbl, lbl: NOOP) lines.append(self.asm.branch_op("BRA", lbl_end)) # XXX this is optimization for labelling else, do it later? if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_false)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_false) lines += else_block lines.append(self.asm.noop(lbl_end)) return lines def parseReturn(self, linked_node): """Return from a function. For non-void functions the result is in R0""" func_node = linked_node.getFunctionNode() if not func_node: self.logger.error("return not in function", linked_node=linked_node) return_value = linked_node.node.expr if return_value: lines = self.parseExpression(return_value, linked_node) else: lines = [] if return_value: reg = self.registers.find_register(lines, fatal=True) # ensure that the return value is in R0 for consistency if reg != "R0": lines.append(self.asm.binary_op("LOAD", "R0", reg)) self.asm.level = linked_node.level - 1 lines.append(self.asm.branch_op("BRA", func_node.function.labelEnd())) # don't do the below, it may not cleanup properly #lines.append(self.asm.format_line("RTS")) return lines def parseFuncCall(self, linked_node): lines = [] # node.name is a c_ast.ID, the real function name is in name funcname = linked_node.node.name.name params = linked_node.node.args if params: linked_params = LinkedNode(params, linked_node) # call convention: params in reverse order for expr in reversed(params.exprs): line = self.parseExpression(expr, linked_params) result_reg = self.registers.find_register(line, fatal=True) lines += line lines.append(self.asm.push(result_reg)) lbl_func = self.functionLabel(funcname) if lbl_func is None: # name not found :? perhaps an external declaration lbl_func = "fn_" + funcname self.logger.warning("call to undefined function, assuming label" " '{}'".format(lbl_func), linked_node=linked_node) lines.append(self.asm.branch_op("BRS", lbl_func)) if params: lines.append(self.asm.binary_op("ADD", "SP", len(params.exprs))) # by convention, a function call must return the result in R0, we # also make sure that the PSW flags are set properly lines.append(self.asm.binary_op("LOAD", "R0", "R0")) return lines def parseCast(self, linked_node): #self.logger.warning("Found a type cast, but these are unsupported.", linked_node=linked_node) return self.parseExpression(linked_node.node.expr, linked_node) def parseDoWhile(self, linked_node): node = linked_node.node lbl_do = self.uniqLbl("do") # used for supporting continue/break lbl_cond = self.uniqLbl("doCond") lbl_end = self.uniqLbl("doEnd") linked_node.setBreak(lbl_end) linked_node.setContinue(lbl_doCond) # provides a way to redo the loop lines = [self.asm.noop(lbl_do)] # do { ... }, expect do{/*empty*/} as well lines += self.parseStatement(node.stmt, linked_node) # while (...) # used for supporting continue lines.append(self.asm.noop(lbl_cond)) lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BNE", lbl_do)) # used for supporting break lines.append(self.asm.noop(lbl_end)) return lines def parseWhile(self, linked_node): node = linked_node.node lbl_while = self.uniqLbl("while") lbl_while_end = self.uniqLbl("whileEnd") # used for supporting break/continue linked_node.setBreak(lbl_while_end) linked_node.setContinue(lbl_while) # while ( lines = [self.asm.noop(lbl_while)] # .. lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_while_end)) # ){.. lines += self.parseStatement(node.stmt, linked_node) lines.append(self.asm.branch_op("BRA", lbl_while)) # } lines.append(self.asm.noop(lbl_while_end)) return lines def parseFor(self, linked_node): """Returns ASM for a for loop for (init; cond; next) { body; } is equivalent to: init; while (cond) { body; next; } Note that for (;;){} is allowed too which is equivalent to: while (true) {} """ node = linked_node.node lines = [] lbl_for = self.uniqLbl("for") lbl_for_end = self.uniqLbl("forEnd") # used for supporting break/continue linked_node.setBreak(lbl_for_end) if node.next: lbl_cont = self.uniqLbl("forNext") linked_node.setContinue(lbl_cont) else: linked_node.setContinue(lbl_for) if node.init: # init; if isinstance(node.init, c_ast.DeclList): # loop initial declaration is supported in C99 lines += self.parseDeclList(LinkedNode(node.init, linked_node)) else: lines += self.parseExpression(node.init, linked_node) # while ( lines.append(self.asm.noop(lbl_for)) if node.cond: # if no condition, it's should evaluate to true lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_for_end)) # ){ body; lines += self.parseStatement(node.stmt, linked_node) if node.next: # next; lines.append(self.asm.noop(lbl_cont)) lines += self.parseExpression(node.next, linked_node) lines.append(self.asm.branch_op("BRA", lbl_for)) # } lines.append(self.asm.noop(lbl_for_end)) return lines def parseExprList(self, linked_node): """Parse an expression list. This method should not be used when parsing a list declaration (done by parseDecl) nor function params """ lines = [] for expr in linked_node.node.exprs: lines += self.parseExpression(expr, linked_node) return lines def parseAssignment(self, linked_node): lines = [] node = linked_node.node expr_result = self.parseExpression(node.rvalue, linked_node) lines += expr_result result_reg = self.registers.find_register(expr_result) self.registers.alloc(result_reg) # a register which is available for storing the address of lvalue lvalue_reg = self.registers.alloc() self.registers.free(lvalue_reg) self.registers.free(result_reg) linked_lval = LinkedNode(node.lvalue, linked_node) # this lvalue may be an expression if there is an indirection if (linked_lval.type == "UnaryOp" and node.lvalue.op == "*" or linked_lval.type == "ArrayRef"): if linked_lval.type == "UnaryOp": lvalue = self.parseExpression(linked_lval.node.expr, linked_lval) else: lvalue = self.parseExpression(linked_lval.node, linked_node) lvalue_result_reg = self.registers.find_register(lvalue) for line in lvalue: # if the right value register is modified, we need to pull if self.registers.is_register_changed(line, result_reg): lines.append(self.asm.push(result_reg)) lines += lvalue # if the register of the left value equals the register of # the right value, pull the right value in the register # which was reserved for the left value if lvalue_result_reg == result_reg: result_reg = lvalue_reg lines.append(self.asm.pull(result_reg)) break else: lines += lvalue # update the register for the left value because the expression may # return a different result register lvalue_reg = lvalue_result_reg else: # if not an expression, it must be a single variable name lines += self.parseLValue(linked_lval, register=lvalue_reg) math_and_shift_ops = self.math_ops.copy() math_and_shift_ops.update(self.shift_ops) if node.op == "=": lines.append(self.asm.binary_op("STOR", result_reg, "[" + lvalue_reg + "]")) elif (len(node.op) >= 2 and node.op[-1] == "=" and node.op[0:-1] in math_and_shift_ops): # the operator, like << or + operator = node.op[0:-1] binop = math_and_shift_ops[operator] # abuse lvalue_reg to save a register by storing the value of the # lvalue. This should allow for doing x / y first before assigning # (writing) to y lines.append(self.asm.push(lvalue_reg)) # get the value of lvalue, e.g. the value of x lines.append(self.asm.binary_op("LOAD", lvalue_reg, "["+lvalue_reg+"]")) # perform the operation on the value if operator in self.shift_ops: lines += self.processShift(operator, lvalue_reg, result_reg, linked_node) else: lines.append(self.asm.binary_op(binop, lvalue_reg, result_reg)) # since lvalue_reg is now the integer result, use result_reg as the # register for holding the address of y lines.append(self.asm.pull(result_reg)) # finally store the result from lvalue_reg into result_reg lines.append(self.asm.binary_op("STOR", lvalue_reg, "[" + result_reg + "]")) else: self.logger.error("Unsupported assignment operator: " + node.op, linked_node=linked_node) return lines def parseLValue(self, linked_node, register="R0"): """Returns lines of ASM for a lvalue type. It's assumed that register does not change to another one. If this is changed in the future, please revisit all uses of it. """ lines = [] if linked_node.type == "UnaryOp" and linked_node.node.op == "*": lines += self.parseLValue(LinkedNode(linked_node.node.expr, linked_node), register) lines.append(self.asm.binary_op("LOAD", register, "[" + register + "]")) elif linked_node.type == "ID": try: var_reg, var_disp = linked_node.variables.getAddress(linked_node.node.name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) lines.append(self.asm.binary_op("LOAD", register, var_reg)) lines.append(self.asm.binary_op("ADD", register, var_disp)) else: self.logger.error("Expected a lvalue (pointer or names)", linked_node=linked_node) return lines def parseDecl(self, linked_node): """Parses declarations in the global context and for local variables (not params) """ lines = [] in_function = linked_node.getFunctionNode() is not None size = 1 linked_type = LinkedNode(linked_node.node.type, linked_node) name = linked_node.node.name if linked_type.type == "ArrayDecl": if linked_type.node.dim is not None: linked_dim_node = LinkedNode(linked_type.node.dim, linked_node) try: size = self.determineConstValue(linked_dim_node) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_dim_node) else: size = None node_init = linked_node.node.init # determine the array dimension based on the initializer if node_init and isinstance(node_init, c_ast.ExprList): real_size = len(node_init.exprs) if size is None: size = real_size elif real_size > size: self.logger.warning("excess elements in array initializer", linked_node=linked_node) # it's an error if the array dimension is unavailable, e.g.: # int a[]; if size is None: self.logger.error("Array size missing in '{}'".format(name), linked_node=linked_node) elif linked_type.type == "PtrDecl": # no check whether the pointer is valid or not pass elif linked_type.type == "TypeDecl": if not linked_type.node.type.names[0] in ("int", "void"): self.logger.warning("Only void and int types are supported", linked_node) elif linked_type.type == "FuncDecl": # a function declaration does not accept an initializer, but it's # still special because it has params return self.parseStatement(linked_type.node, linked_node) else: self.logger.error("Unknown declaration type '{}'".format(linked_type.type), linked_node=linked_node) try: # global variables may be declared multiple times, local variables # are not allowed for that if in_function or not linked_node.variables.isDeclared(name): is_static = "static" in linked_node.node.storage linked_node.variables.declName(name, size, is_static=is_static) # address of variable split up in register and displacement var_reg, var_disp = linked_node.variables.getAddress(name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) # if the declaration also has a definition if linked_node.node.init: # global variables needs to be marked as defined if not in_function: try: linked_node.variables.defName(name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) linked_init = LinkedNode(linked_node.node.init, linked_node) if (linked_type.type == "ArrayDecl" and linked_init.type == "ExprList"): # next available register for storing the value reg_value = self.registers.next_free() array_index = 0 for array_elm in linked_init.node.exprs: linked_arrelm = LinkedNode(array_elm, linked_init) try: # only constant expressions are allowed in array init init_val = str(self.determineConstValue(linked_arrelm)) if in_function: lines.append(self.asm.binary_op("LOAD", reg_value, init_val)) else: self.varNames[var_disp][array_index] = init_val except RuntimeError as errmsg: # only functions are allowed to have non-constant # expressions if not in_function: self.logger.error(errmsg, linked_node=linked_arrelm) init_val = self.parseExpression(array_elm, linked_init) # if the result register has changed, update reg_value = self.registers.find_register(init_val, fatal=True) lines += init_val if in_function: assert var_reg == self.registers.BP, ("Expected an" " array definition in a local variable") # an array element at index array_index of variable at # var_disp, relative to the BP. Remember that a local # variable can be accessed with [BP-i] rel_addr = int(var_disp) - array_index lines.append(self.asm.binary_op("STOR", reg_value, "[" + var_reg + "+" + str(rel_addr) + "]")) array_index += 1 # ignore elements which do not fit in the allocated space if array_index >= size: break else: result_reg = self.registers.next_free() try: init_val = str(self.determineConstValue(linked_init)) if in_function: lines.append(self.asm.binary_op("LOAD", result_reg, init_val)) else: self.varNames[var_disp][0] = init_val except RuntimeError as errmsg: # only constant expressions are allowed in global init if not in_function: self.logger.error(errmsg, linked_node=linked_init) init_vals = self.parseExpression(linked_node.node.init, linked_node) result_reg = self.registers.find_register(init_vals) lines += init_vals if in_function: lines.append(self.asm.binary_op("STOR", result_reg, "[" + var_reg + "+" + var_disp + "]")) # if not in a function (in global scope), the only lines that may be # returned are initialization of the array pointer address return lines def parseBreak(self, linked_node): loop_node = linked_node.getBreakNode() if not loop_node: self.logger.error("break not in a loop or switch", linked_node=linked_node) return [self.asm.branch_op("BRA", loop_node.break_label)] def parseContinue(self, linked_node): loop_node = linked_node.getContinueNode() if not loop_node: self.logger.error("continue not in a loop", linked_node=linked_node) return [self.asm.branch_op("BRA", loop_node.continue_label)] def parseEmptyStatement(self, linked_node): """Returns an empty list for an "empty statement" (duh)""" return [] def parseSwitch(self, linked_node): lines = [] lines_cases = [] lines_stmts = [] node = linked_node.node stmt = node.stmt cond = self.parseStatement(node.cond, linked_node) switch_reg = self.registers.find_register(cond, fatal=True) lines += cond if not isinstance(stmt, c_ast.Compound): raise RuntimeError("The switch statement is expected to have a" " compound statement") switch_end = self.uniqLbl("switchEnd") linked_node.setBreak(switch_end) lbl_default = None for cn in stmt.block_items: linked_cn = LinkedNode(cn, linked_node) while linked_cn is not None: cn = linked_cn.node if linked_cn.type == "Case": lbl_case = self.uniqLbl("case") try: case_val = self.determineConstValue(LinkedNode(cn.expr, linked_cn)) except RuntimeError: e = sys.exc_info()[1] print("case label does not reduce to an integer constant") print(e) raise lines_cases.append(self.asm.binary_op("CMP", switch_reg, case_val)) lines_cases.append(self.asm.branch_op("BEQ", lbl_case)) lines_stmts.append(self.asm.noop(lbl_case)) if hasattr(cn, "stmt"): # pycparser-2.05 linked_cn = LinkedNode(cn.stmt, linked_cn.parent) elif linked_cn.type == "Default": lbl_default = self.uniqLbl("default") lines_stmts.append(self.asm.noop(lbl_default)) if hasattr(cn, "stmt"): # pycparser-2.05 linked_cn = LinkedNode(cn.stmt, linked_cn.parent) if linked_cn.type in ("Case", "Default"): # pycparser-2.05: nested case/default labels like case 1: case 2: stmt; if hasattr(cn, "stmts"): # pycparser-2.06: may be empty if there is a following case for node in cn.stmts: # regular body lines_stmts += self.parseStatement(node, linked_cn) linked_cn = None else: # regular body lines_stmts += self.parseStatement(linked_cn.node, linked_cn.parent) linked_cn = None lines += lines_cases if lbl_default: lines.append(self.asm.branch_op("BRA", lbl_default)) else: lines.append(self.asm.branch_op("BRA", switch_end)) lines += lines_stmts lines.append(self.asm.noop(switch_end)) return lines def parseDeclList(self, linked_node): lines = [] for cn in linked_node.decls: lines += self.parseStatement(cn, linked_node) def parseArrayRef(self, linked_node): lines = [] name_expr = self.parseExpression(linked_node.node.name, linked_node) name_reg = self.registers.find_register(name_expr, fatal=True) lines += name_expr index_expr = self.parseExpression(linked_node.node.subscript, linked_node) index_reg = self.registers.find_register(index_expr, fatal=True) for line in index_expr: if self.registers.is_register_changed(line, name_reg): lines.append(self.asm.push(name_reg)) lines += index_expr self.registers.alloc(index_reg) name_reg = self.registers.alloc() self.registers.free(name_reg) self.registers.free(index_reg) lines.append(self.asm.pull(name_reg)) break else: lines += index_expr if (linked_node.parent and linked_node.parent.type == "Assignment" and linked_node.parent.node.lvalue is linked_node.node): lines.append(self.asm.binary_op("ADD", name_reg, index_reg)) else: lines.append(self.asm.binary_op("LOAD", name_reg, "[" + name_reg + "+" + index_reg + "]")) return lines def parseLabel(self, linked_node): lines = [] try: name = linked_node.node.name label_name = self.uniqLbl(name) linked_node.setLabel(name, label_name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) lines.append(self.asm.noop(label_name)) lines += self.parseStatement(linked_node.node.stmt, linked_node) return lines def parseGoto(self, linked_node): """Provides support for the infinitely abused goto statement. Use it for debugging only! """ try: label_name = linked_node.lookupLabel(linked_node.node.name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return [self.asm.branch_op("BRA", label_name)] def parseFuncDecl(self, linked_node): lines = [] node = linked_node.node funcname = node.type.declname if not funcname in self.functions: is_static = "static" in linked_node.parent.node.storage self.functions[funcname] = Function(node, is_static=is_static, uniq_provider=self.uniqLbl) if node.args: argstype = type(node.args).__name__ assert argstype == "ParamList", "Expected function arguments, found '{}' instead".format(argstype) lines += self.parseStatement(node.args, linked_node) return lines def parseParamList(self, linked_node): """Processes function parameters and returns assembly for modifying SP """ lines = [] function = linked_node.getFunctionNode() # function may be None if this is a function declaration without body if function: variables = function.variables params = linked_node.node.params try: for param in params: variables.declName(param.name, is_param=True) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return lines def parseStatement(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) if linked_node.needNewScope(): linked_node.setVariablesObj(Variables(linked_node.parent.variables)) self.asm.level = linked_node.level lines = [] if linked_node.isTypeStatement(): #lines.append(self.asm.format_line("; " + linked_node.type + " " + # linked_node.getLocation())) if hasattr(self, "parse" + linked_node.type): lines += getattr(self, "parse" + linked_node.type)(linked_node) # add for support post increment and decrement lines += linked_node.post_lines else: self.logger.error("Not implemented for type " + repr(node), linked_node=linked_node) else: lines += self.parseExpression(node, parent_linked_node, level_increment) return lines # currently, there is no difference between an expression and statement. This might # change in the future def parseExpression(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) self.asm.level = linked_node.level lines = [] #lines.append(self.asm.format_line("; " + linked_node.type + " " + # linked_node.getLocation())) if linked_node.type in ("ID", "Constant", "UnaryOp", "FuncCall", "Cast", "BinaryOp", "TernaryOp", "Assignment", "ExprList", "ArrayRef"): lines += getattr(self, "parse" + linked_node.type)(linked_node) elif linked_node.isTypeStatement(): self.logger.error("A statement is used in expression context", linked_node=linked_node) elif linked_node.type in ("CompoundLiteral",# won't be supported "IdentifierType", "NamedInitializer",# attributes, remove "StructRef", "Typename"): self.logger.error("Not implemented for expression type " + repr(node), linked_node=linked_node) else: self.logger.error("Not implemented expression and unknown type: " + repr(node), linked_node=linked_node) return lines def determineConstValue(self, linked_node): """Returns the value of the constant expression of linked_node as an integer. An exception is thrown if the expression is not constant """ node = linked_node.node value = None # most significant bit, i.e. the bit that is one if negative msb = pow(2, self.WORDSIZE - 1) if linked_node.type == "Constant": if node.type != "int": raise RuntimeError("Unsupported constant type '{}'".format(node.type)) value = self.convertStrToInteger(self.convertInteger(node.value)) elif linked_node.type == "Cast": #self.logger.warning("Found a type cast, but these are unsupported", # linked_node=linked_node) value = self.determineConstValue(LinkedNode(node.expr, linked_node)) elif linked_node.type == "UnaryOp": op = node.op operators = { "-": "neg", "+": "pos", "!": "not", "~": "invert" } if op not in operators: raise RuntimeError("Operator '{}' is not supported".format(op)) else: operand = self.determineConstValue(LinkedNode(node.expr, linked_node)) if op in ("-", "~"): value = operand ^ (pow(2, self.WORDSIZE) - 1) # negating a value in PP2 is inverting the bits and adding 1 if op == "-": value += 1 else: value = int(getattr(operator, op)(operand)) elif linked_node.type == "BinaryOp": op = node.op operators = { "*": "mul", "/": "div", "%": "mod", "+": "add", "-": "sub", # invert result if the sign bits do not equal "<": "lt", ">": "gt", "<=": "le", ">=": "ge", # end special treatment "==": "eq", "!=": "ne", "&": "and_", "^": "xor", "|": "or_", "<<": "lshift", ">>": "rshift" } if op not in operators: raise RuntimeError("Operator '{}' is not supported".format(op)) else: operand1 = self.determineConstValue(LinkedNode(node.left, linked_node)) operand2 = self.determineConstValue(LinkedNode(node.right, linked_node)) if op == "-": # ensure that the result of substraction is positive by # setting the (WORDSIZE+1)-nth bit on the first operand operand1 |= pow(2, self.WORDSIZE) if op in ("<", ">", "<=", ">="): value = int(getattr(operator, op)(operand1, operand2)) # 0 < 1 and -2 < -1 holds, but -1 < 0 does not. If the sign # bits are not equal, the result needs to be flipped if operand2 & msb != operand2 & msb: value = not value elif op == "/" and operand2 & msb: raise RuntimeError("Division by a negative value is" "undefined", linked_node=linked_node) elif op == "/" and operand2 == 0: raise RuntimeError("Division by zero", linked_node=linked_node) else: value = int(getattr(operator, op)(operand1, operand2)) else: raise RuntimeError("Unsupported node type '{}'".format(linked_node.type)) # The PP2 can only count in 18-bit, therefore AND the result. Note that # overflow may occur for large numbes. This is a non-issue for unary + # - ~ (and ! because it results in 0 or 1). For binary comparison # operators and bitwise operations, it's neither an issue because the # word length does not increase. Right shift works fine, left shift may # change the sign bit (as expected) value &= pow(2, self.WORDSIZE) - 1 return value if __name__ == "__main__": settings = { "input_files": [], "dump_parsed_files": False, "output_filename": "", "cpp_args": [], "use_cpp": True } set_arg = None # arguments processing class ArgumentError(Exception): pass def usage(): print("""Usage: pp2cc.py [options] filename.. Multiple input files can be specified, options can be specified before and after filenames. Options: -o filename The name of the output file. It defaults to the first input file from which the extension is removed, and .asm is added. --tree Instead of compiling the file into assembly, show the parse tree. -D name -D name=definition This option is passed to the cpp program, and acts like adding #define name or #define name definition respectively -U name This option is passed to the cpp program and acts like adding #undef name --no-cpp Disable the use of the C Preprocessor""") try: for arg in sys.argv[1:]: if not set_arg is None: assert set_arg in settings, "The command line option cannot be found in the settings" setting = settings[set_arg] if isinstance(setting, str): settings[set_arg] = arg elif isinstance(setting, list): setting.append(arg) elif isinstance(setting, bool): raise NotImplementedError("Booleans cannot be set in options") else: raise ArgumentError("Invalid type for command line setting" " '{}'".format(type(setting).__name__)) set_arg = None elif len(arg): # if the argument is an option if arg[0] == "-": if arg == "-o": set_arg = "output_filename" elif arg == "--tree": settings["dump_parsed_files"] = True elif arg[1] in ("D", "U"): settings["cpp_args"].append(arg) if len(arg) == 2: # Support -Dmacro=defn and -D macro=defn and # -Umacro and -U macro set_arg = "cpp_args" elif arg == "--no-cpp": settings["use_cpp"] = False elif arg == "-h" or arg == "--help" or arg == "--usage": usage() sys.exit(0) else: raise ArgumentError("Unknown commandline argument '{}'".format(arg)) else: # must be a file settings["input_files"].append(arg) if not set_arg is None: raise ArgumentError("Expected a value for option '{}'".format(sys.argv[-1])) if not settings["input_files"]: raise ArgumentError("No input files") except ArgumentError: e = sys.exc_info()[1] print(e) print("For usage, run: python pp2cc.py --help") sys.exit(255) # end of arguments processing if not settings["output_filename"]: settings["output_filename"] = os.path.splitext(settings["input_files"][0])[0] + ".asm" try: parse = Parse() for filename in settings["input_files"]: parse.loadFile(filename, use_cpp=settings["use_cpp"], cpp_args=settings["cpp_args"]) if settings["dump_parsed_files"]: parse.show() else: parse.compile() if not settings["dump_parsed_files"]: source = parse.getSource() outf = open(settings["output_filename"], "w") outf.write(source) outf.close() print("Compilation succesful") print("The output is written to " + settings["output_filename"]) except c_parser.ParseError: e = sys.exc_info()[1] print("A fatal (syntax) error was found during parsing:" + str(e)) sys.exit(1)