#!/usr/bin/env python """Compiles C into assembly for the practicum processor (PP2) All rights reserved, you may not redistribute or use this program without prior permission from Peter Wu or Xander Houtman. Use of this program is entirely your own risk. In no circumstances can the authors of this program be held responsible for any damage including, but not limited to, financial damage or data loss. Modification of this program is not allowed without prior permission. The generated output (assembly and messages) are not subject to this license. """ import sys, re, os, operator from pycparser import c_parser, c_ast, parse_file from Asm import Asm from Registers import Registers from LinkedNode import LinkedNode from Function import Function from Variables import Variables, GlobalVariables from AsmParser import AsmParser __author__ = "Peter Wu" __copyright__ = "Copyright 2011, Peter Wu" __credits__ = ["Peter Wu", "Xander Houtman"] __license__ = "Proprietary" __version__ = "1.0" __maintainer__ = "Peter Wu" __email__ = "uwretep@gmail.com" class Logger(object): def __init__(self): pass def info(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="info") def warning(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="warning") def error(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="error") while linked_node: print " in", linked_node.node.coord, linked_node.type linked_node = linked_node.parent raise def log(self, message, linked_node=None, type="log"): source = "" if isinstance(linked_node, LinkedNode): source = str(linked_node.getLocation()) + ": " print type + ":" + source, message class Parse(object): def __init__(self): self.logger = Logger() self.node = None # word size in bits self.WORDSIZE = 18 # preload operators and their instructions self.math_ops = { "+": "ADD", "-": "SUB", "*": "MULS", "/": "DIV", "%": "MOD", "&": "AND", "|": "OR", "^": "XOR" } self.comparison_ops = { "<" : "BLT", ">" : "BGT", "<=": "BLE", ">=": "BGE", "==": "BEQ", "!=": "BNE" } self.shift_ops = { ">>": "DIV", "<<": "MULS" } self.binary_ops = self.math_ops.copy() self.binary_ops.update(self.comparison_ops) self.binary_ops.update(self.shift_ops) # global variable names to be defined in @DATA. key: name, value: list # of initializers self.varNames = {} self.functions = {} # holds instructions for initializing the global variables self.globalInit = [] # holds the miscellaneous instructions for @CODE self.codeSegment = [] self.labels = set() # helper for asm code generation self.asm = Asm() # watches which registers are used and which are not self.registers = Registers() def loadFile(self, filename, use_cpp=True, cpp_args=[]): """Loads a file and parses the contents of it""" # reset state self.asm_node = self.node = None ext = os.path.splitext(filename)[1].lower() if ext: ext = ext[1:] if ext == "i": # C source code which should not be preprocessed self.node = parse_file(filename, use_cpp=False) elif ext in ("c", "h"): self.node = parse_file(filename, use_cpp=use_cpp, cpp_args=cpp_args) elif ext == "asm": self.asm_node = AsmParser(filename) else: self.logger.error("Unrecognized file extension '{}'".format(ext)) def show(self): self.node.show(showcoord=True) def uniqLbl(self, labelname): """Generates an unique label name, save and return it""" lbl = labelname if labelname in self.labels: i = 0 while True: lbl = labelname + "_" + str(i) if not lbl in self.labels: break i += 1 self.addLabel(lbl) return lbl def addLabel(self, labelname): if labelname in self.labels: self.logger.error("Redefinition of label '{}'".format(labelname)) self.labels.add(labelname) def compile(self): """Processes a loaded file and adds it to the assembly output""" if self.node is not None: self.compileAST() elif self.asm_node is not None: self.compileASM() else: self.logger.error("No AST node nor assembly lines found to be" " processed") def compileAST(self): """Loops through the nodes in an AST syntax tree and generates ASM""" root_node = LinkedNode(self.node) variables = GlobalVariables(self.varNames) root_node.setVariablesObj(variables) for thing in self.node.ext: if not isinstance(thing, c_ast.Decl) and not isinstance(thing, c_ast.FuncDef): linked_node = LinkedNode(thing, parent=root_node) self.logger.warning("I expected a variable or function" " definition or declaration in the root" " node, but also found " + str(thing), linked_node=linked_node) self.codeSegment += self.parseStatement(thing, root_node) # hack: static functions are local to a file, so remove them for funcname, function in self.functions.items(): if function.isStatic(): del self.functions[funcname] def compileASM(self): """Processes lines of assembly and merge it into the output""" for label in self.asm_node.labels: new_label = label # rename existing names if label in self.labels: new_label = self.uniqLbl(label) self.asm_node.renameId(label, new_label) self.labels.add(new_label) for name, init_vals in self.asm_node.getDataDefinitions().iteritems(): if name in self.varNames: old_count = len(self.varNames[name]) new_count = len(init_vals) if old_count != new_count: self.logger.error("Global variable '{}' was defined before" ", but the length of the initializers" " are not equal. Old: {}, new: {}" .format(name, old_count, new_count)) elif self.varNames[name] != init_vals: self.logger.warning("The initialization of global variable" " '{}' contains different values" .format(name)) self.varNames[name] = init_vals self.codeSegment += self.asm_node.getCodeLines() def getSource(self): """Retrieves the ASM source. You need to compile it first""" output = [] output.append("@DATA") for varName, initializers in self.varNames.iteritems(): padding = " " * (16 - len(varName) - 1) assert len(initializers) > 0, "Size of '{}' must be at least 1".format(varName) output.append(varName + padding + " DW " + ",".join(initializers)) output.append("") output.append("@CODE") # initialization of global variables output += self.globalInit if not "main" in self.functions: if "fn_main" in self.labels: self.logger.info("No main function was found in C, but label" " 'fn_main' exists in assembly") else: self.logger.warning("No main function found with label 'fn_main'") for function in self.functions.values(): if not function.isLinked(): if "fn_" + function.name in self.labels: self.logger.info("Function '{}' is declared and found in" " assembly".format(function.name)) else: self.logger.warning("Function '{}' is declared but not defined" .format(function.name)) output.append(self.asm.branch_op("BRA", "fn_main")) output.append("") output += self.codeSegment output.append("") output.append("@END") output.append("") return os.linesep.join(output) def parseFuncDef(self, linked_node): node = linked_node.node linked_node.incrementLevel() self.asm.level = linked_node.level # parse function declaration (which will parse params as well) assert not self.parseStatement(node.decl, linked_node), ("Function" " declaration should not return a line of assembly") funcname = node.decl.name function = self.functions[funcname] if function.isLinked(): self.logger.error("Redefinition of function '{}'".format(funcname), linked_node=linked_node) lbl_func = function.labelBegin() lbl_end = function.labelEnd() linked_node.setFunction(function) # save Base Pointer lines = [self.asm.push(self.registers.BP, lbl_func)] lines.append(self.asm.binary_op("LOAD", self.registers.BP, "SP")) body = self.parseStatement(node.body, linked_node) self.asm.level = linked_node.level # Reserve space on the stack for local variables if necessary if function.reserved_stack: lines.append(self.asm.binary_op("SUB", "SP", function.reserved_stack)) lines += body # restore SP lines.append(self.asm.binary_op("LOAD", "SP", self.registers.BP, label=lbl_end)) else: lines += body lines.append(self.asm.noop(lbl_end)) # restore Base Pointer lines.append(self.asm.pull(self.registers.BP)) # return from function lines.append(self.asm.format_line("RTS")) # add an extra newline lines.append("") return lines def functionLabel(self, name): if name in self.functions: function = self.functions[name] return function.labelBegin() return None def parseCompound(self, linked_node): linked_node.incrementLevel() lines = [] if linked_node.node.block_items: for cn in linked_node.node.block_items: lines += self.parseStatement(cn, linked_node) return lines def parseBinaryLogicalOp(self, linked_node): """Returns lines of assembly for a logical OR or AND operation ASM for logical OR: operand1 BEQ fal ; if operand1 is false operand2 BEQ fal ; if operand2 is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP ASM for logical AND: operand1 BNE tru ; if operand1 is true operand2 BNE tru ; if operand2 is true fal:LOAD R0 0 BRA end tru:LOAD R0 1 end:NOOP """ node = linked_node.node assert node.op in ("&&", "||"), "Expected a && or || operator, found '{}' instead".format(node.op) isAnd = node.op == "&&" reg_result = "R0" lbl_false = self.uniqLbl("lOp_false") lbl_true = self.uniqLbl("lOp_true") lbl_end = self.uniqLbl("lOp_end") # branch instruction to be inserted if && yields false or if || is true if isAnd: branch_early = self.asm.branch_op("BEQ", lbl_false) else: branch_early = self.asm.branch_op("BNE", lbl_true) result_true = self.asm.binary_op("LOAD", reg_result, 1, label=lbl_true) result_false = self.asm.binary_op("LOAD", reg_result, 0, label=lbl_false) lines = self.parseExpression(node.left, linked_node, level_increment=True) lines.append(branch_early) lines += self.parseExpression(node.right, linked_node, level_increment=True) lines.append(branch_early) # fix indentation self.asm.level = linked_node.level # if 'true && true' or 'false || false' if isAnd: lines.append(result_true) else: lines.append(result_false) lines.append(self.asm.branch_op("BRA", lbl_end)) # if 'false && x' or 'true && false' or 'x || true' or 'true || x' if isAnd: lines.append(result_false) else: lines.append(result_true) lines.append(self.asm.noop(lbl_end, reg_result)) return lines def parseBinaryOp(self, linked_node): node = linked_node.node lines = [] if node.op in self.binary_ops: op = self.binary_ops[node.op] elif node.op in ("&&", "||"): return self.parseBinaryLogicalOp(linked_node) else: self.logger.error("Binary op is not implemented yet: " + node.op, linked_node=linked_node) # process the first operand, the result is likely in R0 or R1 ops_first = self.parseExpression(node.left, linked_node, level_increment=True) assert len(ops_first), "Empty operand for binary operation (first part)" lines += ops_first reg_first = self.registers.find_register(ops_first, fatal=True) # fix indentation after running parseStatement self.asm.level = linked_node.level # optimization, but this should probably be done afterwards if isinstance(node.right, c_ast.Constant): operand = self.convertInteger(node=node.right) else: lines.append(self.asm.push(reg_first)) ops_second = self.parseExpression(node.right, linked_node, level_increment=True) assert len(ops_second), "Empty operand for binary operation (second part)" lines += ops_second # the register storing the result of the second operand reg_second = self.registers.find_register(ops_second, fatal=True) operand = reg_second # since we use the stack for calculations, we can use less # registers (2). Use the Registers class for determining the next # free register self.registers.alloc(reg_second) reg_first = self.registers.alloc() self.registers.free(reg_second) self.registers.free(reg_first) # put the data back in the free register lines.append(self.asm.pull(reg_first)) # perform binary operation if node.op in self.comparison_ops: bin_end = self.uniqLbl("cmpEnd") bin_true = self.uniqLbl("cmpTrue") lines.append(self.asm.binary_op("CMP", reg_first, operand)) # a comparison is more likely to be true, so optimize for that: # true will follow the path CMP - - LOAD - NOOP, where false # will follow: CMP - - LOAD - BRA - NOOP. This just happened # to be the easiest to implement, so don't blame me for premature # optimization ;) lines.append(self.asm.branch_op(op, bin_true)) lines.append(self.asm.binary_op("LOAD", reg_first, 0)) lines.append(self.asm.branch_op("BRA", bin_end)) lines.append(self.asm.binary_op("LOAD", reg_first, 1, label=bin_true)) lines.append(self.asm.noop(bin_end, register=reg_first)) elif node.op in ("<<", ">>"): lines += self.processShift(node.op, reg_first, operand, linked_node) else: lines.append(self.asm.binary_op(op, reg_first, operand)) # explicitly set the PSW words because DIV and MOD are stupid and set # the Z flag depending on the remainder if op in ("DIV", "MOD"): lines.append(self.asm.binary_op("LOAD", reg_first, reg_first)) return lines def processShift(self, operator, reg_number, operand_shift, linked_node=None): """Returns lines for a shift expression Keyword arguments: operator -- Either >> for right shift or << for left shift reg_number -- A register Rn containing the number to be shifted operand_shift -- An operand for the shift count. If it's a register, the contents of it will be used as the shift count. Otherwise, the string is interpreted as a number from assembly (allowing $ and % notations) linked_node -- An optional LinkedNode for showing the context in warning messages """ lines = [] assert operator in self.shift_ops, "Invalid shift operator '{}'".format(operator) binop = self.shift_ops[operator] if self.registers.is_register(operand_shift): # alias operand_shift for clarity below, reg_shift is a register reg_shift = operand_shift lbl_shift = self.uniqLbl("shift_loop") lbl_shift_end = self.uniqLbl("shift_done") # necessary to set the status flags for the branch conditions lines.append(self.asm.binary_op("CMP", reg_shift, 0)) # if operand 2 is lower than 1, do not bother shifting operand # 1 and jump to the end lines.append(self.asm.branch_op("BLE", lbl_shift_end)) # right shift needs special treatment for the exceptional case -1 if operator == ">>": # see below for an explanation for what's happening here lines.append(self.asm.binary_op("AND", reg_number, "%10")) lines.append(self.asm.binary_op("DIV", reg_number, 2)) mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_number, mask)) # decrease shift count and finish if shift count <= 0 lines.append(self.asm.binary_op("SUB", reg_shift, 1)) lines.append(self.asm.branch_op("BLE", lbl_shift_end)) # do a single shift and decrease remaining shift count lines.append(self.asm.binary_op(binop, reg_number, 2, label=lbl_shift)) lines.append(self.asm.binary_op("SUB", reg_shift, 1)) # if we came from BLE, x <= 0. BGT only jumps if x > 0, so # we can safe another NOOP by adding the label here lines.append(self.asm.branch_op("BGT", lbl_shift, label=lbl_shift_end)) # expression rely on the fact that the result of an expression is # available in the last register. Without the below line, it'd look # in reg_shift lines.append(self.asm.binary_op("LOAD", reg_number, reg_number)) else: shift_bits = self.convertStrToInteger(operand_shift) if shift_bits <= 0: # optimize when shifting by 0, but warn for negative if shift_bits < 0: self.logger.warning("The shift count is negative, no shift" " operation will be performed", linked_node=linked_node) lines.append(self.asm.binary_op("LOAD", reg_number, reg_number)) else: operand = pow(2, shift_bits) if operator == ">>": # because DIV works with signed numbers, right shift is # a bit harder because the sign bit does not vanish # Assume 4-bit numbers in the next example. # binary | decimal # 1111 | -1 # 0111 | 7 # The rightmost bit is insignificant in a right shift # but causes issues with division by two for -1 because # it gets rounded to 0. After removal of the rightmost # bit and division by two, all bits are shifted left, # but the result is still negative. Since we're doing a # zero-padded shift (unsigned), clear the leftmost bit # For positive numbers, this has no side effects lines.append(self.asm.binary_op("AND", reg_number, "%10")) # shift it by one to the right lines.append(self.asm.binary_op("DIV", reg_number, 2)) # clear the leftmost bit by masking it with 011..11 mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_number, mask)) # decrease shift count and finish if nothing to shift operand /= 2 if operand > 1: # process the remaining shift lines.append(self.asm.binary_op(binop, reg_number, operand)) return lines def parseUnaryOpLValue(self, linked_node): lines = [] binops = { "++": "ADD", "--": "SUB" } op = linked_node.node.op linked_cn = LinkedNode(linked_node.node.expr, linked_node) if op == "&": lines = self.parseLValue(linked_cn) elif op in ("p++", "p--"): # XXX support postinc/dec raise RuntimeError("Post increment and post decrement operators" " are not supported.") binop = binops[op[1:]] stmt = linked_node.getStatementNode() if not stmt: self.logger.error("No statement found for post inc/decrement", linked_node=linked_node) # assume that the result register is R0 lvalue = self.parseLValue(linked_cn) lines = lvalue; stmt.post_lines += lvalue stmt.post_lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) stmt.post_lines.append(self.asm.binary_op(binop, "R1", 1)) stmt.post_lines.append(self.asm.binary_op("STOR", "R1", "[R0]")) elif op in ("--", "++"): binop = binops[op] lvalue = self.parseLValue(linked_cn) lines = lvalue; lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) lines.append(self.asm.binary_op(binop, "R1", 1)) lines.append(self.asm.binary_op("STOR", "R1", "[R0]")) else: self.logger.error("Unknown lvalue unary operator '{}'".format(op), linked_node=linked_node) return lines def parseUnaryOp(self, linked_node): """Returns lines of assembly for unary operators Supported operators are logical negation, one's complement (bitwise NOT), plus and minus ASM for logical NOT: operand BEQ fal ; if operand is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP """ op = linked_node.node.op # some operators works on lvalues if op in ("--", "++", "p++", "p--", "&"): return self.parseUnaryOpLValue(linked_node) # first fetch the operand lines = self.parseExpression(linked_node.node.expr, linked_node) # determine the register in which the result is stored reg = self.registers.find_register(lines, fatal=True) if op == "*": # load the value Rn at address into Rn lines.append(self.asm.binary_op("LOAD", reg, "[" + reg + "]")) elif op == "+": # K&R A7.4.4 "[the unary +] was added for symmetry with unary -" pass elif op == "-": lines.append(self.asm.binary_op("MULS", reg, "-1")) elif op == "~": # Conforming K&R A7.4.6, signed types are treated as unsigned when # applying ~ lines.append(self.asm.binary_op("XOR", reg, "%1")) elif op == "!": lbl_false = self.uniqLbl("not_false") lbl_true = self.uniqLbl("not_true") lbl_end = self.uniqLbl("not_end") # assume that the status words of the registers describes the value # of the expression lines.append(self.asm.branch_op("BEQ", lbl_false)) lines.append(self.asm.binary_op("LOAD", reg, 1, label=lbl_true)) lines.append(self.asm.branch_op("BRA", lbl_end)) lines.append(self.asm.binary_op("LOAD", reg, 0, label=lbl_false)) lines.append(self.asm.noop(lbl_end, register=reg)) elif op == "sizeof": self.logger.error("Sizeof operator 'sizeof' is not supported.", linked_node=linked_node) else: self.logger.error("Unknown unary operator '{}'".format(op), linked_node=linked_node) return lines def parseTernaryOp(self, linked_node): """Returns lines of ASM for ternary operation cond?expr_true:expr_false It looks very similar to an If expect that the return value is important """ linked_node.incrementLevel() node = linked_node.node lbl_el = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif lines = self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_el)) # if true... then_block = self.parseExpression(node.iftrue, linked_node) # use a single register for the result, using the one of expr_true result_reg = self.registers.find_register(then_block, fatal=True) lines += then_block lines.append(self.asm.branch_op("BRA", lbl_end)) # else... else_block = self.parseExpression(node.iffalse, linked_node) assert len(else_block), "Else block is empty" # add labels for the else block if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_el)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_el) lines += else_block result_reg_else = self.registers.find_register(else_block, fatal=True) # for consistency with the result of expr_true if result_reg_else != result_reg: lines.append(self.asm.binary_op("LOAD", result_reg, result_reg_else)) lines.append(self.asm.noop(lbl_end, register=result_reg)) return lines def convertInteger(self, value=None, node=None): """Returns a string of the value For integers, the string returned is a base-10 number modulo WORDSIZE """ if value is None: if isinstance(node, c_ast.Constant) and node.type == "int": value = node.value else: self.logger.error("Value is not given and node is not a valid" " integer constant") value = value.upper() if "LL" in value: # long long integer, ignore value = value.replace("LL", "", 1) elif "L" in value: # long integer, ignore value = value.replace("L", "", 1) if "U" in value: # unsigned integer, ignore for now value = value.replace("U", "", 1) if value.startswith("0X"): value = int(value[2:], 16) elif value.startswith("0"): # octal numbers are not understood by PP2 assembly, therefore convert it value = int(value, 8) else: value = int(value) # values are modulo WORDSIZE. If negative, the MSB is 1 value %= pow(2, self.WORDSIZE) return str(value) def convertStrToInteger(self, str): """Converts a PP2 assembly integer string to an integer""" str = str.upper() if str.startswith("$"): if str.startswith("$0"): return int(str[2:], 16) else: # if the leftmost bit of a 4-bit group is set, the preceding # bits should be 1 as well. E.g. $7 end with 0111 and is # therefore interpreted as 7 where $8 ends with 1111 which is # interpreted as -1 (all bits are set) val = str[1:] if int(val[1], 16) & 0b1000: hexsize = len(hex(pow(2, self.WORDSIZE) - 1)[2:]) val = val.rjust(hexsize, "F") return int(val, 16) elif str.startswith("%"): if str.startswith("%0"): return int(str[2:], 8) else: return int(str[1:].rjust(self.WORDSIZE, "1"), 8) return int(str, 10) def parseConstant(self, linked_node, register="R0"): node = linked_node.node if node.type == "int": value = self.convertInteger(node.value) else: self.logger.error("Unsupported constant type: " + node.type, linked_node=linked_node) return [self.asm.binary_op("LOAD", register, value)] def parseID(self, linked_node, register="R0"): """Returns the name for a name""" # XXX this is not made for functions try: addr = "+".join(linked_node.variables.getAddress(linked_node.node.name)) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return [self.asm.binary_op("LOAD", register, "[" + addr + "]")] def parseIf(self, linked_node): linked_node.incrementLevel() node = linked_node.node lines = self.parseExpression(node.cond, linked_node) branch_op = "BEQ" then_block = self.parseStatement(node.iftrue, linked_node) if node.iffalse: else_block = self.parseStatement(node.iffalse, linked_node) # if(x){}else{y} is equivalent to if(!x){} if not then_block: # note: else can still be empty, if(x){}else{}, but that is # handled below with the "if then_block" then_block = else_block else_block = [] # invert result branch_op = "BNE" else: else_block = [] # if(x){}else{} is equivalent to x, which do not need the below if then_block: if else_block: lbl_false = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif else: lbl_end = lbl_false = self.uniqLbl("fi") # endif # if cond is satisfied, go to else (BEQ branches if Z=0, BNE # branches if Z!=0). Do not use CMP, rely on the status flags set # set by the condition lines.append(self.asm.branch_op(branch_op, lbl_false)) # if true... lines += then_block # else... if else_block: # this branch is part of the then block, but include it here to # avoid redundancy (BRA lbl, lbl: NOOP) lines.append(self.asm.branch_op("BRA", lbl_end)) # XXX this is optimization for labelling else, do it later? if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_false)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_false) lines += else_block lines.append(self.asm.noop(lbl_end)) return lines def parseReturn(self, linked_node): """Return from a function. For non-void functions the result is in R0""" func_node = linked_node.getFunctionNode() if not func_node: self.logger.error("return not in function", linked_node=linked_node) return_value = linked_node.node.expr if return_value: lines = self.parseExpression(return_value, linked_node) else: lines = [] if return_value: reg = self.registers.find_register(lines, fatal=True) # ensure that the return value is in R0 for consistency if reg != "R0": lines.append(self.asm.binary_op("LOAD", "R0", reg)) self.asm.level = linked_node.level - 1 lines.append(self.asm.branch_op("BRA", func_node.function.labelEnd())) # don't do the below, it may not cleanup properly #lines.append(self.asm.format_line("RTS")) return lines def parseFuncCall(self, linked_node): lines = [] # node.name is a c_ast.ID, the real function name is in name funcname = linked_node.node.name.name params = linked_node.node.args if params: linked_params = LinkedNode(params, linked_node) # call convention: params in reverse order for expr in reversed(params.exprs): line = self.parseExpression(expr, linked_params) result_reg = self.registers.find_register(line, fatal=True) lines += line lines.append(self.asm.push(result_reg)) lbl_func = self.functionLabel(funcname) if lbl_func is None: # name not found :? perhaps an external declaration lbl_func = "fn_" + funcname self.logger.warning("call to undefined function, assuming label" " '{}'".format(lbl_func), linked_node=linked_node) lines.append(self.asm.branch_op("BRS", lbl_func)) if params: lines.append(self.asm.binary_op("ADD", "SP", len(params.exprs))) # by convention, a function call must return the result in R0, we # also make sure that the PSW flags are set properly lines.append(self.asm.binary_op("LOAD", "R0", "R0")) return lines def parseCast(self, linked_node): #self.logger.warning("Found a type cast, but these are unsupported.", linked_node=linked_node) return self.parseExpression(linked_node.node.expr, linked_node) def parseDoWhile(self, linked_node): node = linked_node.node lbl_do = self.uniqLbl("do") # used for supporting continue/break lbl_cond = self.uniqLbl("doCond") lbl_end = self.uniqLbl("doEnd") linked_node.setBreak(lbl_end) linked_node.setContinue(lbl_doCond) # provides a way to redo the loop lines = [self.asm.noop(lbl_do)] # do { ... }, expect do{/*empty*/} as well lines += self.parseStatement(node.stmt, linked_node) # while (...) # used for supporting continue lines.append(self.asm.noop(lbl_cond)) lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BNE", lbl_do)) # used for supporting break lines.append(self.asm.noop(lbl_end)) return lines def parseWhile(self, linked_node): node = linked_node.node lbl_while = self.uniqLbl("while") lbl_while_end = self.uniqLbl("whileEnd") # used for supporting break/continue linked_node.setBreak(lbl_while_end) linked_node.setContinue(lbl_while) # while ( lines = [self.asm.noop(lbl_while)] # .. lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_while_end)) # ){.. lines += self.parseStatement(node.stmt, linked_node) lines.append(self.asm.branch_op("BRA", lbl_while)) # } lines.append(self.asm.noop(lbl_while_end)) return lines def parseFor(self, linked_node): """Returns ASM for a for loop for (init; cond; next) { body; } is equivalent to: init; while (cond) { body; next; } Note that for (;;){} is allowed too which is equivalent to: while (true) {} """ node = linked_node.node lines = [] lbl_for = self.uniqLbl("for") lbl_for_end = self.uniqLbl("forEnd") # used for supporting break/continue linked_node.setBreak(lbl_for_end) if node.next: lbl_cont = self.uniqLbl("forNext") linked_node.setContinue(lbl_cont) else: linked_node.setContinue(lbl_for) if node.init: # init; if isinstance(node.init, c_ast.DeclList): # loop initial declaration is supported in C99 lines += self.parseDeclList(LinkedNode(node.init, linked_node)) else: lines += self.parseExpression(node.init, linked_node) # while ( lines.append(self.asm.noop(lbl_for)) if node.cond: # if no condition, it's should evaluate to true lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_for_end)) # ){ body; lines += self.parseStatement(node.stmt, linked_node) if node.next: # next; lines.append(self.asm.noop(lbl_cont)) lines += self.parseExpression(node.next, linked_node) lines.append(self.asm.branch_op("BRA", lbl_for)) # } lines.append(self.asm.noop(lbl_for_end)) return lines def parseExprList(self, linked_node): """Parse an expression list. This method should not be used when parsing a list declaration (done by parseDecl) nor function params """ lines = [] for expr in linked_node.node.exprs: lines += self.parseExpression(expr, linked_node) return lines def parseAssignment(self, linked_node): lines = [] node = linked_node.node expr_result = self.parseExpression(node.rvalue, linked_node) lines += expr_result result_reg = self.registers.find_register(expr_result) self.registers.alloc(result_reg) # a register which is available for storing the address of lvalue lvalue_reg = self.registers.alloc() self.registers.free(lvalue_reg) self.registers.free(result_reg) linked_lval = LinkedNode(node.lvalue, linked_node) # this lvalue may be an expression if there is an indirection if (linked_lval.type == "UnaryOp" and node.lvalue.op == "*" or linked_lval.type == "ArrayRef"): if linked_lval.type == "UnaryOp": lvalue = self.parseExpression(linked_lval.node.expr, linked_lval) else: lvalue = self.parseExpression(linked_lval.node, linked_node) lvalue_result_reg = self.registers.find_register(lvalue) for line in lvalue: # if the right value register is modified, we need to pull if self.registers.is_register_changed(line, result_reg): lines.append(self.asm.push(result_reg)) lines += lvalue # if the register of the left value equals the register of # the right value, pull the right value in the register # which was reserved for the left value if lvalue_result_reg == result_reg: result_reg = lvalue_reg lines.append(self.asm.pull(result_reg)) break else: lines += lvalue # update the register for the left value because the expression may # return a different result register lvalue_reg = lvalue_result_reg else: # if not an expression, it must be a single variable name lines += self.parseLValue(linked_lval, register=lvalue_reg) math_and_shift_ops = self.math_ops.copy() math_and_shift_ops.update(self.shift_ops) if node.op == "=": lines.append(self.asm.binary_op("STOR", result_reg, "[" + lvalue_reg + "]")) elif (len(node.op) >= 2 and node.op[-1] == "=" and node.op[0:-1] in math_and_shift_ops): # the operator, like << or + operator = node.op[0:-1] binop = math_and_shift_ops[operator] # abuse lvalue_reg to save a register by storing the value of the # lvalue. This should allow for doing x / y first before assigning # (writing) to y lines.append(self.asm.push(lvalue_reg)) # get the value of lvalue, e.g. the value of x lines.append(self.asm.binary_op("LOAD", lvalue_reg, "["+lvalue_reg+"]")) # perform the operation on the value if operator in self.shift_ops: lines += self.processShift(operator, lvalue_reg, result_reg, linked_node) else: lines.append(self.asm.binary_op(binop, lvalue_reg, result_reg)) # since lvalue_reg is now the integer result, use result_reg as the # register for holding the address of y lines.append(self.asm.pull(result_reg)) # finally store the result from lvalue_reg into result_reg lines.append(self.asm.binary_op("STOR", lvalue_reg, "[" + result_reg + "]")) else: self.logger.error("Unsupported assignment operator: " + node.op, linked_node=linked_node) return lines def parseLValue(self, linked_node, register="R0"): """Returns lines of ASM for a lvalue type. It's assumed that register does not change to another one. If this is changed in the future, please revisit all uses of it. """ lines = [] if linked_node.type == "UnaryOp" and linked_node.node.op == "*": lines += self.parseLValue(LinkedNode(linked_node.node.expr, linked_node), register) lines.append(self.asm.binary_op("LOAD", register, "[" + register + "]")) elif linked_node.type == "ID": try: var_reg, var_disp = linked_node.variables.getAddress(linked_node.node.name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) lines.append(self.asm.binary_op("LOAD", register, var_reg)) lines.append(self.asm.binary_op("ADD", register, var_disp)) else: self.logger.error("Expected a lvalue (pointer or names)", linked_node=linked_node) return lines def parseDecl(self, linked_node): """Parses declarations in the global context and for local variables (not params) """ lines = [] in_function = linked_node.getFunctionNode() is not None size = 1 linked_type = LinkedNode(linked_node.node.type, linked_node) name = linked_node.node.name if linked_type.type == "ArrayDecl": if linked_type.node.dim is not None: linked_dim_node = LinkedNode(linked_type.node.dim, linked_node) try: size = self.determineConstValue(linked_dim_node) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_dim_node) else: size = None node_init = linked_node.node.init # determine the array dimension based on the initializer if node_init and isinstance(node_init, c_ast.ExprList): real_size = len(node_init.exprs) if size is None: size = real_size elif real_size > size: self.logger.warning("excess elements in array initializer", linked_node=linked_node) # it's an error if the array dimension is unavailable, e.g.: # int a[]; if size is None: self.logger.error("Array size missing in '{}'".format(name), linked_node=linked_node) elif linked_type.type == "PtrDecl": # no check whether the pointer is valid or not pass elif linked_type.type == "TypeDecl": if not linked_type.node.type.names[0] in ("int", "void"): self.logger.warning("Only void and int types are supported", linked_node) elif linked_type.type == "FuncDecl": # a function declaration does not accept an initializer, but it's # still special because it has params return self.parseStatement(linked_type.node, linked_node) else: self.logger.error("Unknown declaration type '{}'".format(linked_type.type), linked_node=linked_node) try: # global variables may be declared multiple times, local variables # are not allowed for that if in_function or not linked_node.variables.isDeclared(name): is_static = "static" in linked_node.node.storage linked_node.variables.declName(name, size, is_static=is_static) # address of variable split up in register and displacement var_reg, var_disp = linked_node.variables.getAddress(name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) # if the declaration also has a definition if linked_node.node.init: # global variables needs to be marked as defined if not in_function: try: linked_node.variables.defName(name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) linked_init = LinkedNode(linked_node.node.init, linked_node) if (linked_type.type == "ArrayDecl" and linked_init.type == "ExprList"): # next available register for storing the value reg_value = self.registers.next_free() array_index = 0 for array_elm in linked_init.node.exprs: linked_arrelm = LinkedNode(array_elm, linked_init) try: # only constant expressions are allowed in array init init_val = str(self.determineConstValue(linked_arrelm)) if in_function: lines.append(self.asm.binary_op("LOAD", reg_value, init_val)) else: self.varNames[var_disp][array_index] = init_val except RuntimeError as errmsg: # only functions are allowed to have non-constant # expressions if not in_function: self.logger.error(errmsg, linked_node=linked_arrelm) init_val = self.parseExpression(array_elm, linked_init) # if the result register has changed, update reg_value = self.registers.find_register(init_val, fatal=True) lines += init_val if in_function: assert var_reg == self.registers.BP, ("Expected an" " array definition in a local variable") # an array element at index array_index of variable at # var_disp, relative to the BP. Remember that a local # variable can be accessed with [BP-i] rel_addr = int(var_disp) - array_index lines.append(self.asm.binary_op("STOR", reg_value, "[" + var_reg + "+" + str(rel_addr) + "]")) array_index += 1 # ignore elements which do not fit in the allocated space if array_index >= size: break else: result_reg = self.registers.next_free() try: init_val = str(self.determineConstValue(linked_init)) if in_function: lines.append(self.asm.binary_op("LOAD", result_reg, init_val)) else: self.varNames[var_disp][0] = init_val except RuntimeError as errmsg: # only constant expressions are allowed in global init if not in_function: self.logger.error(errmsg, linked_node=linked_init) init_vals = self.parseExpression(linked_node.node.init, linked_node) result_reg = self.registers.find_register(init_vals) lines += init_vals if in_function: lines.append(self.asm.binary_op("STOR", result_reg, "[" + var_reg + "+" + var_disp + "]")) # if not in a function (in global scope), the only lines that may be # returned are initialization of the array pointer address return lines def parseBreak(self, linked_node): loop_node = linked_node.getBreakNode() if not loop_node: self.logger.error("break not in a loop or switch", linked_node=linked_node) return [self.asm.branch_op("BRA", loop_node.break_label)] def parseContinue(self, linked_node): loop_node = linked_node.getContinueNode() if not loop_node: self.logger.error("continue not in a loop", linked_node=linked_node) return [self.asm.branch_op("BRA", loop_node.continue_label)] def parseEmptyStatement(self, linked_node): """Returns an empty list for an "empty statement" (duh)""" return [] def parseSwitch(self, linked_node): lines = [] lines_cases = [] lines_stmts = [] node = linked_node.node stmt = node.stmt cond = self.parseStatement(node.cond, linked_node) switch_reg = self.registers.find_register(cond, fatal=True) lines += cond if not isinstance(stmt, c_ast.Compound): raise RuntimeError("The switch statement is expected to have a" " compound statement") switch_end = self.uniqLbl("switchEnd") linked_node.setBreak(switch_end) lbl_default = None for cn in stmt.block_items: linked_cn = LinkedNode(cn, linked_node) if linked_cn.type == "Case": lbl_case = self.uniqLbl("case") try: case_val = self.determineConstValue(LinkedNode(cn.expr, linked_cn)) except RuntimeError: e = sys.exc_info()[1] print "case label does not reduce to an integer constant" print e raise lines_cases.append(self.asm.binary_op("CMP", switch_reg, case_val)) lines_cases.append(self.asm.branch_op("BEQ", lbl_case)) lines_stmts.append(self.asm.noop(lbl_case)) lines_stmts += self.parseStatement(cn.stmt, linked_cn) elif linked_cn.type == "Default": lbl_default = self.uniqLbl("default") lines_stmts.append(self.asm.noop(lbl_default)) lines_stmts += self.parseStatement(cn.stmt, linked_cn) else: lines_stmts += self.parseStatement(cn, linked_cn) lines += lines_cases if lbl_default: lines.append(self.asm.branch_op("BRA", lbl_default)) else: lines.append(self.asm.branch_op("BRA", switch_end)) lines += lines_stmts lines.append(self.asm.noop(switch_end)) return lines def parseDeclList(self, linked_node): lines = [] for cn in linked_node.decls: lines += self.parseStatement(cn, linked_node) def parseArrayRef(self, linked_node): lines = [] name_expr = self.parseExpression(linked_node.node.name, linked_node) name_reg = self.registers.find_register(name_expr, fatal=True) lines += name_expr index_expr = self.parseExpression(linked_node.node.subscript, linked_node) index_reg = self.registers.find_register(index_expr, fatal=True) for line in index_expr: if self.registers.is_register_changed(line, name_reg): lines.append(self.asm.push(name_reg)) lines += index_expr self.registers.alloc(index_reg) name_reg = self.registers.alloc() self.registers.free(name_reg) self.registers.free(index_reg) lines.append(self.asm.pull(name_reg)) break else: lines += index_expr if (linked_node.parent and linked_node.parent.type == "Assignment" and linked_node.parent.node.lvalue is linked_node.node): lines.append(self.asm.binary_op("ADD", name_reg, index_reg)) else: lines.append(self.asm.binary_op("LOAD", name_reg, "[" + name_reg + "+" + index_reg + "]")) return lines def parseLabel(self, linked_node): lines = [] try: name = linked_node.node.name label_name = self.uniqLbl(name) linked_node.setLabel(name, label_name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) lines.append(self.asm.noop(label_name)) lines += self.parseStatement(linked_node.node.stmt, linked_node) return lines def parseGoto(self, linked_node): """Provides support for the infinitely abused goto statement. Use it for debugging only! """ try: label_name = linked_node.lookupLabel(linked_node.node.name) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return [self.asm.branch_op("BRA", label_name)] def parseFuncDecl(self, linked_node): lines = [] node = linked_node.node funcname = node.type.declname if not funcname in self.functions: is_static = "static" in linked_node.parent.node.storage self.functions[funcname] = Function(node, is_static=is_static, uniq_provider=self.uniqLbl) if node.args: argstype = type(node.args).__name__ assert argstype == "ParamList", "Expected function arguments, found '{}' instead".format(argstype) lines += self.parseStatement(node.args, linked_node) return lines def parseParamList(self, linked_node): """Processes function parameters and returns assembly for modifying SP """ lines = [] function = linked_node.getFunctionNode() # function may be None if this is a function declaration without body if function: variables = function.variables params = linked_node.node.params try: for param in params: variables.declName(param.name, is_param=True) except RuntimeError as errmsg: self.logger.error(errmsg, linked_node=linked_node) return lines def parseStatement(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) if linked_node.needNewScope(): linked_node.setVariablesObj(Variables(linked_node.parent.variables)) self.asm.level = linked_node.level lines = [] if linked_node.isTypeStatement(): #lines.append(self.asm.format_line("; " + linked_node.type + " " + # linked_node.getLocation())) if hasattr(self, "parse" + linked_node.type): lines += getattr(self, "parse" + linked_node.type)(linked_node) # add for support post increment and decrement lines += linked_node.post_lines else: self.logger.error("Not implemented for type " + repr(node), linked_node=linked_node) else: lines += self.parseExpression(node, parent_linked_node, level_increment) return lines # currently, there is no difference between an expression and statement. This might # change in the future def parseExpression(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) self.asm.level = linked_node.level lines = [] #lines.append(self.asm.format_line("; " + linked_node.type + " " + # linked_node.getLocation())) if linked_node.type in ("ID", "Constant", "UnaryOp", "FuncCall", "Cast", "BinaryOp", "TernaryOp", "Assignment", "ExprList", "ArrayRef"): lines += getattr(self, "parse" + linked_node.type)(linked_node) elif linked_node.isTypeStatement(): self.logger.error("A statement is used in expression context", linked_node=linked_node) elif linked_node.type in ("CompoundLiteral",# won't be supported "IdentifierType", "NamedInitializer",# attributes, remove "StructRef", "Typename"): self.logger.error("Not implemented for expression type " + repr(node), linked_node=linked_node) else: self.logger.error("Not implemented expression and unknown type: " + repr(node), linked_node=linked_node) return lines def determineConstValue(self, linked_node): """Returns the value of the constant expression of linked_node as an integer. An exception is thrown if the expression is not constant """ node = linked_node.node value = None # most significant bit, i.e. the bit that is one if negative msb = pow(2, self.WORDSIZE - 1) if linked_node.type == "Constant": if node.type != "int": raise RuntimeError("Unsupported constant type '{}'".format(node.type)) value = self.convertStrToInteger(self.convertInteger(node.value)) elif linked_node.type == "Cast": #self.logger.warning("Found a type cast, but these are unsupported", # linked_node=linked_node) value = self.determineConstValue(LinkedNode(node.expr, linked_node)) elif linked_node.type == "UnaryOp": op = node.op operators = { "-": "neg", "+": "pos", "!": "not", "~": "invert" } if op not in operators: raise RuntimeError("Operator '{}' is not supported".format(op)) else: operand = self.determineConstValue(LinkedNode(node.expr, linked_node)) if op in ("-", "~"): value = operand ^ (pow(2, self.WORDSIZE) - 1) # negating a value in PP2 is inverting the bits and adding 1 if op == "-": value += 1 else: value = int(getattr(operator, op)(operand)) elif linked_node.type == "BinaryOp": op = node.op operators = { "*": "mul", "/": "div", "%": "mod", "+": "add", "-": "sub", # invert result if the sign bits do not equal "<": "lt", ">": "gt", "<=": "le", ">=": "ge", # end special treatment "==": "eq", "!=": "ne", "&": "and_", "^": "xor", "|": "or_", "<<": "lshift", ">>": "rshift" } if op not in operators: raise RuntimeError("Operator '{}' is not supported".format(op)) else: operand1 = self.determineConstValue(LinkedNode(node.left, linked_node)) operand2 = self.determineConstValue(LinkedNode(node.right, linked_node)) if op == "-": # ensure that the result of substraction is positive by # setting the (WORDSIZE+1)-nth bit on the first operand operand1 |= pow(2, self.WORDSIZE) if op in ("<", ">", "<=", ">="): value = int(getattr(operator, op)(operand1, operand2)) # 0 < 1 and -2 < -1 holds, but -1 < 0 does not. If the sign # bits are not equal, the result needs to be flipped if operand2 & msb != operand2 & msb: value = not value elif op == "/" and operand2 & msb: raise RuntimeError("Division by a negative value is" "undefined", linked_node=linked_node) elif op == "/" and operand2 == 0: raise RuntimeError("Division by zero", linked_node=linked_node) else: value = int(getattr(operator, op)(operand1, operand2)) else: raise RuntimeError("Unsupported node type '{}'".format(linked_node.type)) # The PP2 can only count in 18-bit, therefore AND the result. Note that # overflow may occur for large numbes. This is a non-issue for unary + # - ~ (and ! because it results in 0 or 1). For binary comparison # operators and bitwise operations, it's neither an issue because the # word length does not increase. Right shift works fine, left shift may # change the sign bit (as expected) value &= pow(2, self.WORDSIZE) - 1 return value if __name__ == "__main__": settings = { "input_files": [], "dump_parsed_files": False, "output_filename": "", "cpp_args": [], "use_cpp": True } set_arg = None # arguments processing class ArgumentError(Exception): pass def usage(): print """Usage: pp2cc.py [options] filename.. Multiple input files can be specified, options can be specified before and after filenames. Options: -o filename The name of the output file. It defaults to the first input file from which the extension is removed, and .asm is added. --tree Instead of compiling the file into assembly, show the parse tree. -D name -D name=definition This option is passed to the cpp program, and acts like adding #define name or #define name definition respectively -U name This option is passed to the cpp program and acts like adding #undef name --no-cpp Disable the use of the C Preprocessor """ try: for arg in sys.argv[1:]: if not set_arg is None: assert set_arg in settings, "The command line option cannot be found in the settings" setting = settings[set_arg] if isinstance(setting, str): settings[set_arg] = arg elif isinstance(setting, list): setting.append(arg) elif isinstance(setting, bool): raise NotImplementedError("Booleans cannot be set in options") else: raise ArgumentError("Invalid type for command line setting" " '{}'".format(type(setting).__name__)) set_arg = None elif len(arg): # if the argument is an option if arg[0] == "-": if arg == "-o": set_arg = "output_filename" elif arg == "--tree": settings["dump_parsed_files"] = True elif arg[1] in ("D", "U"): settings["cpp_args"].append(arg) if len(arg) == 2: # Support -Dmacro=defn and -D macro=defn and # -Umacro and -U macro set_arg = "cpp_args" elif arg == "--no-cpp": settings["use_cpp"] = False elif arg == "-h" or arg == "--help" or arg == "--usage": usage() sys.exit(0) else: raise ArgumentError("Unknown commandline argument '{}'".format(arg)) else: # must be a file settings["input_files"].append(arg) if not set_arg is None: raise ArgumentError("Expected a value for option '{}'".format(sys.argv[-1])) if not settings["input_files"]: raise ArgumentError("No input files") except ArgumentError: e = sys.exc_info()[1] print str(e) print "For usage, run: python pp2cc.py --help" sys.exit(255) # end of arguments processing if not settings["output_filename"]: settings["output_filename"] = os.path.splitext(settings["input_files"][0])[0] + ".asm" try: parse = Parse() for filename in settings["input_files"]: parse.loadFile(filename, use_cpp=settings["use_cpp"], cpp_args=settings["cpp_args"]) if settings["dump_parsed_files"]: parse.show() else: parse.compile() if not settings["dump_parsed_files"]: source = parse.getSource() outf = open(settings["output_filename"], "w") outf.write(source) outf.close() print "Compilation succesful" print "The output is written to", settings["output_filename"] except c_parser.ParseError: e = sys.exc_info()[1] print "A fatal (syntax) error was found during parsing:", str(e) sys.exit(1)