#!/usr/bin/env python """Compiles C into assembly for the practicum processor (PP2) All rights reserved, you may not redistribute or use this program without prior permission from Peter Wu or Xander Houtman. Use of this program is entirely your own risk. In no circumstances can the authors of this program be held responsible for any damage including, but not limited to, financial damage or data loss. Modification of this program is not allowed without prior permission. The generated output (assembly and messages) are not subject to this license. """ import sys, re, os from pycparser import c_parser, c_ast __author__ = "Peter Wu" __copyright__ = "Copyright 2011, Peter Wu" __credits__ = ["Peter Wu", "Xander Houtman"] __license__ = "Proprietary" __version__ = "1.0" __maintainer__ = "Peter Wu" __email__ = "uwretep@gmail.com" parser = c_parser.CParser() class Registers(object): """Register related functions Some functions were supposed to optimize things (alloc and free), but it's not implemented """ def __init__(self): # which registers are in use? self.registers = {} for reg in range(0, 8): self.registers[str(reg)] = False def alloc(self, register=None): """Retrieves a register which is marked unused and marks it as in use""" # if a register was explicitly requested if register: if not self.is_register(register): raise RuntimeError("'{}' is not a register".format(register)) register = register[1] if self.registers[register]: raise RuntimeError("Register 'R{}' is already in use".format(register)) for register in range(0, 6): register = str(register) # find a free register if not self.registers[register]: break else: raise RuntimeError("No free registers are available") self.registers[register] = True return "R" + register def free(self, register): """Marks a register as unused Keyword arguments: register -- a register in the format Rn where 0 <= n < 8 """ # remove leading R register = register[1:] if register in self.registers: self.registers[register] = False; else: raise RuntimeError("free() of unused register") def get_register(self, text): """Retrieve the first register from a binary instruction""" text = text.strip() # ignore commented lines if text.startswith(";"): return None # skip labels if any text = text.upper().split(":", 1)[-1] # exclude the first operand, e.g. LOAD text = text.lstrip(" ").split(" ", 1)[-1] # retieve the first element before the next space, e.g. R0 text = text.lstrip(" ").split(" ", 1)[0] # is it a register? if self.is_register(text): return text # out of luck return None def is_register(self, text): """Returns True if text is a register, false otherwise""" return len(text) == 2 and text[0] == "R" and text[1] in self.registers def get_instruction(self, text): """Retrieve the instruction from text skipping labels and comments""" text = text.strip() # ignore commented lines if text.startswith(";"): return None # skip labels if any text = text.upper().split(":", 1)[-1] # the first element is assumed to be an instruction text = text.lstrip(" ").split(" ", 1)[0] # instructions have a length between 2 (OR) and 4 (LOAD) if len(text) >= 2 and len(text) <= 4: return text return None def find_register(self, instructions_list, fatal=False): """Finds the last modified register in a list of instructions""" for line in reversed(instructions_list): reg = self.get_register(line) if reg: return reg else: instruction = self.get_instruction(line) # convention: non-void functions store their result value in R0 if instruction == "BRS": return "R0" if fatal: raise RuntimeError("No register found in instructions list") return None class Asm(object): def __init__(self): self.level = 0 def format_line(self, line, label="", indent_size=-1): """Returns an indented line optionally prefixed with label""" if indent_size < 0: indent_size = 8 + self.level * 4 if label: indent_size -= len(label) + 2 if indent_size >= 0: label += ": " else: label += ":" # whitespace is generated if indent_size > 0 indent = indent_size * " " return indent + label + line def binary_op(self, binop, reg, operand, label=""): # output format: BBBB RR OO... return self.format_line("{:<4} {:<2} {}".format(binop, reg, operand), label) def unary_op(self, unop, operand, label=""): # output format: UUUU OO... return self.format_line("{:<4} {}".format(unop, operand), label) def branch_op(self, brop, branch_label, label=""): # output format: BBB LL... return self.format_line("{:<4} {}".format(brop, branch_label), label) def push(self, register, label=""): return self.format_line("PUSH " + register, label) def pull(self, register, label=""): return self.format_line("PULL " + register, label) def insert_label(self, instruction, label): new_instruction = instruction.lstrip(" ") indent_size = len(instruction) - len(new_instruction) return self.format_line(new_instruction, label, indent_size) def has_label(self, line): """"Returns True if a line appears to contain a label, False otherwise""" return Parse.is_identifier(line.split(":", 1)[0].strip()) def noop(self, label, register="R0"): """Returns a labelled no operation operator Note that the Zero, Negative and oVerflow flags are modified """ return self.binary_op("LOAD", register, register, label) + " ; NOOP" class LinkedNode(object): """Stores nodes with a reference to the parent""" def __init__(self, node, parent=None, level_increment=False): self.node = node self.parent = parent self.function = None self.break_label = None self.continue_label = None self.type = type(node).__name__ self.level = 0 # inherit level from parent if parent: self.level = parent.level if level_increment: self.incrementLevel() def incrementLevel(self): self.level += 1 def getFunctionNode(self): if self.type == "FuncDef": return self if self.parent: assert isinstance(self.parent, LinkedNode), "parent is not a LinkedNode!" return self.parent.getFunctionNode() return None def setFunction(self, function): """Sets the function object containing label information""" self.function = function def getLocation(self): if hasattr(self.node, "coord"): return self.node.coord return "Unknown" def setBreak(self, break_label): """Marks this node as a loop or switch by setting the label for break Keywords list: break_label -- The label to continue when using the break keyword """ self.break_label = break_label def setContinue(self, continue_label): """Marks this node as a loop by setting the label for continue Keywords list: continue_label -- The label to continue when using the continue keyword """ self.continue_label = continue_label def getBreakNode(self): """Returns the label to the end of the nearest switch statement or for loop""" if self.break_label is not None: return self if self.parent: assert isinstance(self.parent, LinkedNode), "parent is not a LinkedNode!" return self.parent.getBreakNode() return None def getContinueNode(self): """Returns the label to the label to continue a loop""" if self.continue_label is not None: return self if self.parent: assert isinstance(self.parent, LinkedNode), "parent is not a LinkedNode!" return self.parent.getContinueNode() return None class Function(object): def __init__(self, node): self.name = node.decl.name self.node = node def labelBegin(self): """Returns a label pointing to the begin of a function""" return "fn_" + self.name def labelEnd(self): """Returns a label pointing to the end of a function""" return "fne_" + self.name class Logger(object): def __init__(self): pass def warning(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="warning") def log(self, message, linked_node=None, type="log"): source = "" if isinstance(linked_node, LinkedNode): source = str(linked_node.getLocation()) + ": " print type + ":" + source + message class Parse(object): re_identifier = re.compile("^[0-9a-zA-Z_][0-9a-zA-Z_]*$") @classmethod def is_identifier(cls, text): return cls.re_identifier.match(text) def __init__(self): self.logger = Logger() self.node = None # word size in bits self.WORDSIZE = 18 # preload operators and their instructions self.binary_ops = { "+": "ADD", "-": "SUB", "*": "MULS", "/": "DIV", "%": "MOD", "&": "AND", "|": "OR", "^": "XOR" } self.comparison_ops = { "<" : "BLT", ">" : "BGT", "<=": "BLE", ">=": "BGE", "==": "BEQ", "!=": "BNE" } self.binary_ops.update(self.comparison_ops) # global variable names to be defined in @DATA self.globalVars = [] self.functions = {} # holds instructions for initializing the globalVars self.globalInit = [] # holds the miscellaneous instructions for @CODE self.codeSegment = [] self.labels = set() # helper for asm code generation self.asm = Asm() # watches which registers are used and which are not self.registers = Registers() def loadFile(self, filename): """Loads a file and parses the contents of it""" file = open(filename, 'r') source = file.read() file.close() self.node = parser.parse(source, filename=filename); def show(self): self.node.show(showcoord=True) def uniqLbl(self, labelname): """Generates an unique label name, save and return it""" lbl = labelname if labelname in self.labels: i = 0 while True: lbl = labelname + "_" + str(i) if not lbl in self.labels: break i += 1 self.addLabel(lbl) return lbl def addLabel(self, labelname): if labelname in self.labels: raise RuntimeError("Redefinition of label '{}'".format(labelname)) self.labels.add(labelname) def compile(self): """Loops through the nodes in an AST syntax tree and generates ASM""" root_node = LinkedNode(self.node) for thing in self.node.ext: if not isinstance(thing, c_ast.Decl) and not isinstance(thing, c_ast.FuncDef): linked_node = LinkedNode(thing, parent=root_node) self.logger.warning("I expected a variable or function" " definition or declaration in the root" " node, but also found " + str(thing), linked_node=linked_node) self.codeSegment += self.parseStatement(thing, root_node) def getSource(self): """Retrieves the ASM source. You need to compile it first""" output = [] output.append("@DATA") for varName in self.globalVars: padding = " " * (16 - len(varName) - 1) output.append(self.nameToVar(varName) + padding + " DW 1") output.append("") output.append("@CODE") # initialization of global variables output += self.globalInit if not "main" in self.functions: self.logger.warning("No main function found with label 'fn_main'") output.append(self.asm.branch_op("BRA", "fn_main")) output.append("") output += self.codeSegment output.append("") output.append("@END") output.append("") return os.linesep.join(output) def nameToVar(self, name): """Returns the variable name which will be used in assembly Keyword list: name -- This name will be returned with the var_ prefix """ return "var_" + name def parseFuncDef(self, linked_node): node = linked_node.node linked_node.incrementLevel() self.asm.level = linked_node.level funcname = node.decl.name if funcname in self.functions: raise RuntimeError("Redefinition of function '{}'".format(funcname)) function = self.functions[funcname] = Function(node) lbl_func = function.labelBegin() lbl_end = function.labelEnd() self.addLabel(lbl_func) self.addLabel(lbl_end) linked_node.setFunction(function) lines = [self.asm.noop(lbl_func)] lines += self.parseStatement(node.body, linked_node) self.asm.level = linked_node.level # functions other than main() are subroutines #if funcname != "main": # since main should never be left, add the instruction anyway. # Otherwise, it'd crash anyway because it loops into the stack (if it # had not crashed before) lines.append(self.asm.format_line("RTS", label=lbl_end)) # add an extra newline lines.append("") return lines def functionLabel(self, name): if name in self.functions: function = self.functions[name] return function.labelBegin() # name not found :? perhaps an external declaration lbl = "fn_" + name self.logger.warning("call to undefined function, assuming label '{}'".format(lbl)) return lbl def parseCompound(self, linked_node): linked_node.incrementLevel() lines = [] if linked_node.node.block_items: for cn in linked_node.node.block_items: lines += self.parseStatement(cn, linked_node) return lines def parseBinaryLogicalOp(self, linked_node): """Returns lines of assembly for a logical OR or AND operation ASM for logical OR: operand1 BEQ fal ; if operand1 is false operand2 BEQ fal ; if operand2 is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP ASM for logical AND: operand1 BNE tru ; if operand1 is true operand2 BNE tru ; if operand2 is true fal:LOAD R0 0 BRA end tru:LOAD R0 1 end:NOOP """ node = linked_node.node assert node.op in ("&&", "||"), "Expected a && or || operator, found '{}' instead".format(node.op) isAnd = node.op == "&&" reg_result = "R0" lbl_false = self.uniqLbl("lOp_false") lbl_true = self.uniqLbl("lOp_true") lbl_end = self.uniqLbl("lOp_end") # branch instruction to be inserted if && yields false or if || is true if isAnd: branch_early = self.asm.branch_op("BEQ", lbl_false) else: branch_early = self.asm.branch_op("BNE", lbl_true) result_true = self.asm.binary_op("LOAD", reg_result, 1, label=lbl_true) result_false = self.asm.binary_op("LOAD", reg_result, 0, label=lbl_false) lines = self.parseExpression(node.left, linked_node, level_increment=True) lines.append(branch_early) lines += self.parseExpression(node.right, linked_node, level_increment=True) lines.append(branch_early) # fix indentation self.asm.level = linked_node.level # if 'true && true' or 'false || false' if isAnd: lines.append(result_true) else: lines.append(result_false) lines.append(self.asm.branch_op("BRA", lbl_end)) # if 'false && x' or 'true && false' or 'x || true' or 'true || x' if isAnd: lines.append(result_false) else: lines.append(result_true) lines.append(self.asm.noop(lbl_end, reg_result)) return lines def parseBinaryOp(self, linked_node): node = linked_node.node lines = [] if node.op in self.binary_ops: op = self.binary_ops[node.op] elif node.op in ("&&", "||"): return self.parseBinaryLogicalOp(linked_node) elif node.op in ("<<", ">>"): op = node.op else: raise RuntimeError("Binary op is not implemented yet: " + node.op) # process the first operand, the result is likely in R0 or R1 ops_first = self.parseExpression(node.left, linked_node, level_increment=True) assert len(ops_first), "Empty operand for binary operation (first part)" lines += ops_first reg_first = self.registers.find_register(ops_first, fatal=True) # fix indentation after running parseStatement self.asm.level = linked_node.level # optimization, but this should probably be done afterwards if isinstance(node.right, c_ast.Constant): operand = self.convertInteger(node=node.right) else: lines.append(self.asm.push(reg_first)) ops_second = self.parseExpression(node.right, linked_node, level_increment=True) assert len(ops_second), "Empty operand for binary operation (second part)" lines += ops_second # the register storing the result of the second operand reg_second = self.registers.find_register(ops_second, fatal=True) operand = reg_second # since we use the stack for calculations, we can use less # registers (2). Use the Registers class for determining the next # free register self.registers.alloc(reg_second) reg_first = self.registers.alloc() self.registers.free(reg_second) self.registers.free(reg_first) # put the data back in the free register lines.append(self.asm.pull(reg_first)) # perform binary operation if node.op in self.comparison_ops: bin_end = self.uniqLbl("cmpEnd") bin_true = self.uniqLbl("cmpTrue") lines.append(self.asm.binary_op("CMP", reg_first, operand)) # a comparison is more likely to be true, so optimize for that: # true will follow the path CMP - - LOAD - NOOP, where false # will follow: CMP - - LOAD - BRA - NOOP. This just happened # to be the easiest to implement, so don't blame me for premature # optimization ;) lines.append(self.asm.branch_op(op, bin_true)) lines.append(self.asm.binary_op("LOAD", reg_first, 0)) lines.append(self.asm.branch_op("BRA", bin_end)) lines.append(self.asm.binary_op("LOAD", reg_first, 1, label=bin_true)) lines.append(self.asm.noop(bin_end, register=reg_first)) elif op in ("<<", ">>"): if isinstance(node.right, c_ast.Constant): shift_bits = self.convertStrToInteger(operand) if shift_bits < 0: self.logger.warning("The number of bits to be shifted are" "negative, no operation will be performed", linked_node=linked_node) lines.append(self.asm.binary_op("LOAD", reg_first, reg_first)) elif shift_bits > 0: # shifting only makes a difference if shifting a number # larger than 0 (e.g. 1 << 1) operand = pow(2, shift_bits) if op == "<<": lines.append(self.asm.binary_op("MULS", reg_first, operand)) else: # because DIV works with signed numbers, right shift is # a bit harder because the sign bit does not vanish lbl_make_positive = self.uniqLbl("shiftPositive") lines.append(self.asm.binary_op("CMP", reg_first, 0)) # if it's positive, skip handling for negative values lines.append(self.asm.branch_op("BPL", lbl_make_positive)) # -1 / 2 = 0, so make sure that the last bit is zero lines.append(self.asm.binary_op("AND", reg_first, "%10")) # shift it by one lines.append(self.asm.binary_op("DIV", reg_first, 1)) # make it positive if not already mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_first, mask, label=lbl_make_positive)) # decrease shift count and finish if nothing to shift operand /= 2 if operand > 1: lines.append(self.asm.binary_op("DIV", reg_first, operand)) else: lbl_gen = self.uniqLbl("gen2_loop") lbl_gen_end = self.uniqLbl("gen2_done") # necessary to set the status flags for the branch conditions lines.append(self.asm.binary_op("CMP", reg_second, 0)) # if operand 2 is lower than 1, do not bother shifting operand # 1 and jump to the end lines.append(self.asm.branch_op("BLE", lbl_gen_end)) if op == "<<": lines.append(self.asm.binary_op("MULS", reg_first, 2, label=lbl_gen)) else: # now first check if the first number is negative... lines.append(self.asm.binary_op("CMP", reg_first, 0)) lines.append(self.asm.branch_op("BPL", lbl_gen)) # shift it by one lines.append(self.asm.binary_op("DIV", reg_first, 1)) # make negative values positive mask = "%0" + (self.WORDSIZE - 1) * "1" lines.append(self.asm.binary_op("AND", reg_first, mask)) # decrease shift count and finish if shift count is zero lines.append(self.asm.binary_op("SUB", reg_second, 1)) lines.append(self.asm.branch_op("BEQ", lbl_gen_end)) # continue regular shifting lines.append(self.asm.binary_op("DIV", reg_first, 2, label=lbl_gen)) lines.append(self.asm.binary_op("SUB", reg_second, 1)) # if we came from BLE, x <= 0. BGT only jumps if x > 0, so # we can safe another NOOP by adding the label here lines.append(self.asm.branch_op("BGT", lbl_gen, label=lbl_gen_end)) else: lines.append(self.asm.binary_op(op, reg_first, operand)) return lines def parseUnaryOp(self, linked_node): """Returns lines of assembly for unary operators Supported operators are logical negation, one's complement (bitwise NOT), plus and minus ASM for logical NOT: operand BEQ fal ; if operand is false tru:LOAD R0 1 BRA end fal:LOAD R0 0 end:NOOP """ op = linked_node.node.op # first fetch the operand lines = self.parseExpression(linked_node.node.expr, linked_node) # determine the register in which the result is stored reg = self.registers.find_register(lines, fatal=True) if op == "+": # K&R A7.4.4 "[the unary +] was added for symmetry with unary -" pass elif op == "-": lines.append(self.asm.binary_op("MULS", reg, "-1")) elif op == "~": # Conforming K&R A7.4.6, signed types are treated as unsigned when # applying ~ lines.append(self.asm.binary_op("XOR", reg, "%1")) elif op == "!": lbl_false = self.uniqLbl("not_false") lbl_true = self.uniqLbl("not_true") lbl_end = self.uniqLbl("not_end") # assume that the status words of the registers describes the value # of the expression lines.append(self.asm.branch_op("BEQ", lbl_false)) lines.append(self.asm.binary_op("LOAD", reg, 1, label=lbl_true)) lines.append(self.asm.branch_op("BRA", lbl_end)) lines.append(self.asm.binary_op("LOAD", reg, 0, label=lbl_false)) lines.append(self.asm.noop(lbl_end, register=reg)) elif op == "--": lines.append(self.asm.binary_op("SUB", reg, 1)) elif op == "++": lines.append(self.asm.binary_op("ADD", reg, 1)) elif op == "&": raise RuntimeError("Address operator '&' is not supported.") elif op == "*": # XXX support writing to addresses? (IOAREA) raise RuntimeError("Indirection operator '*' is not supported.") elif op == "sizeof": raise RuntimeError("Sizeof operator 'sizeof' is not supported.") elif op == "p++" or op == "p--": # XXX postfix/prefix raise RuntimeError("Post-inc or post-dec is not supported (yet?).") else: raise RuntimeError("Unknown unary operator '{}'".format(op)) return lines def parseTernaryOp(self, linked_node): """Returns lines of ASM for ternary operation cond?expr_true:expr_false It looks very similar to an If expect that the return value is important """ linked_node.incrementLevel() node = linked_node.node lbl_el = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif lines = self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_el)) # if true... then_block = self.parseExpression(node.iftrue, linked_node) # use a single register for the result, using the one of expr_true result_reg = self.registers.find_register(then_block, fatal=True) lines += then_block lines.append(self.asm.branch_op("BRA", lbl_end)) # else... else_block = self.parseExpression(node.iffalse, linked_node) assert len(else_block), "Else block is empty" result_reg_else = self.registers.find_register(else_block, fatal=True) # for consistency with the result of expr_true if result_reg_else != result_reg: lines.append(self.asm.binary_op("LOAD", result_reg, result_reg_else)) # add labels for the else block if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_el)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_el) lines += else_block lines.append(self.asm.noop(lbl_end, register=result_reg)) return lines def convertInteger(self, value=None, node=None): if value is None: if isinstance(node, c_ast.Constant) and node.type == "int": value = node.value else: raise RuntimeError("Value is not given and node is not a valid integer constant") value = value.upper() if "LL" in value: # long long integer, ignore value = value.replace("LL", "", 1) elif "L" in value: # long integer, ignore value = value.replace("L", "", 1) if "U" in value: # unsigned integer, ignore for now value = value.replace("U", "", 1) if value.startswith("0X"): value = value[2:] if len(value) == 5: # highest possible value is 3FFFF, so adding a zero would not make sense value = "$" + value else: value = "$0" + value elif value.startswith("0"): # octal numbers are not understood by PP2 assembly, therefore convert it value = str(int(value, 8)) return value def convertStrToInteger(self, str): """Converts a PP2 assembly integer string to an integer""" str = str.upper() if str.startswith("$"): return int(str[1:], 16) elif str.startswith("%"): return int(str[1:], 8) return int(str, 10) def parseConstant(self, linked_node, register="R0"): node = linked_node.node if node.type == "int": value = self.convertInteger(node.value) else: raise RuntimeError("Unsupported constant type: " + node.type) return [self.asm.binary_op("LOAD", register, value)] def parseID(self, linked_node, register="R0"): name = linked_node.node.name if not name in self.globalVars: raise RuntimeError("Use of undefined variable '{}'".format(name)) var_name = self.nameToVar(name) return [self.asm.binary_op("LOAD", register, "[GB+" + var_name + "]")] def parseIf(self, linked_node): linked_node.incrementLevel() node = linked_node.node lines = self.parseExpression(node.cond, linked_node) branch_op = "BEQ" then_block = self.parseStatement(node.iftrue, linked_node) if node.iffalse: else_block = self.parseStatement(node.iffalse, linked_node) # if(x){}else{y} is equivalent to if(!x){} if not then_block: # note: else can still be empty, if(x){}else{}, but that is # handled below with the "if then_block" then_block = else_block else_block = [] # invert result branch_op = "BNE" else: else_block = [] # if(x){}else{} is equivalent to x, which do not need the below if then_block: if else_block: lbl_false = self.uniqLbl("el") # else lbl_end = self.uniqLbl("fi") # endif else: lbl_end = lbl_false = self.uniqLbl("fi") # endif # if cond is satisfied, go to else (BEQ branches if Z=0, BNE # branches if Z!=0). Do not use CMP, rely on the status flags set # set by the condition lines.append(self.asm.branch_op(branch_op, lbl_false)) # if true... lines += then_block # else... if else_block: # this branch is part of the then block, but include it here to # avoid redundancy (BRA lbl, lbl: NOOP) lines.append(self.asm.branch_op("BRA", lbl_end)) # XXX this is optimization for labelling else, do it later? if self.asm.has_label(else_block[0]): lines.append(self.asm.noop(lbl_false)) else: else_block[0] = self.asm.insert_label(else_block[0], lbl_false) lines += else_block lines.append(self.asm.noop(lbl_end)) return lines def parseReturn(self, linked_node): """Return from a function. For non-void functions the result is in R0""" func_node = linked_node.getFunctionNode() if not func_node: raise RuntimeError("return not in function") return_value = linked_node.node.expr if return_value: lines = self.parseExpression(return_value, linked_node) else: lines = [] if return_value: reg = self.registers.find_register(lines, fatal=True) # ensure that the return value is in R0 for consistency if reg != "R0": lines.append(self.asm.binary_op("LOAD", "R0", reg)) self.asm.level = linked_node.level - 1 lines.append(self.asm.branch_op("BRA", func_node.function.labelEnd())) # don't do the below, it may not cleanup properly #lines.append(self.asm.format_line("RTS")) return lines def parseFuncCall(self, linked_node): # XXX function arguments # node.name is a c_ast.ID, the real function name is in name funcname = linked_node.node.name.name return [self.asm.branch_op("BRS", self.functionLabel(funcname))] def parseCast(self, linked_node): self.logger.warning("Found a type cast, but these are unsupported.", linked_node=linked_node) return self.parseExpression(linked_node.node.expr, linked_node) def parseDoWhile(self, linked_node): node = linked_node.node lbl_do = self.uniqLbl("do") # used for supporting continue/break lbl_cond = self.uniqLbl("doCond") lbl_end = self.uniqLbl("doEnd") linked_node.setBreak(lbl_end) linked_node.setContinue(lbl_doCond) # provides a way to redo the loop lines = [self.asm.noop(lbl_do)] # do { ... }, expect do{/*empty*/} as well lines += self.parseStatement(node.stmt, linked_node) # while (...) # used for supporting continue lines.append(self.asm.noop(lbl_cond)) lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BNE", lbl_do)) # used for supporting break lines.append(self.asm.noop(lbl_end)) return lines def parseWhile(self, linked_node): node = linked_node.node lbl_while = self.uniqLbl("while") lbl_while_end = self.uniqLbl("whileEnd") # used for supporting break/continue linked_node.setBreak(lbl_while_end) linked_node.setContinue(lbl_while) # while ( lines = [self.asm.noop(lbl_while)] # .. lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_while_end)) # ){.. lines += self.parseStatement(node.stmt, linked_node) lines.append(self.asm.branch_op("BRA", lbl_while)) # } lines.append(self.asm.noop(lbl_while_end)) return lines def parseFor(self, linked_node): """Returns ASM for a for loop for (init; cond; next) { body; } is equivalent to: init; while (cond) { body; next; } Note that for (;;){} is allowed too which is equivalent to: while (true) {} """ node = linked_node.node lines = [] lbl_for = self.uniqLbl("for") lbl_for_end = self.uniqLbl("forEnd") # used for supporting break/continue linked_node.setBreak(lbl_for_end) if node.next: lbl_cont = self.uniqLbl("forNext") linked_node.setContinue(lbl_cont) else: linked_node.setContinue(lbl_for) if node.init: # init; lines += self.parseExpression(node.init, linked_node) # while ( lines.append(self.asm.noop(lbl_for)) if node.cond: # if no condition, it's should evaluate to true lines += self.parseExpression(node.cond, linked_node) lines.append(self.asm.branch_op("BEQ", lbl_for_end)) # ){ body; lines += self.parseStatement(node.stmt, linked_node) if node.next: # next; lines.append(self.asm.noop(lbl_cont)) lines += self.parseExpression(node.next, linked_node) lines.append(self.asm.branch_op("BRA", lbl_for)) # } lines.append(self.asm.noop(lbl_for_end)) return lines def parseExprList(self, linked_node): lines = [] for expr in linked_node.node.exprs: lines += self.parseExpression(expr, linked_node) return lines def parseAssignment(self, linked_node): lines = [] node = linked_node.node # XXX support other types like pointers if not isinstance(node.lvalue, c_ast.ID): raise RuntimeError("Currently only assignments to symbolic names" "are allowed") name = node.lvalue.name # name of variable as it appears in the ASM code if not name in self.globalVars: raise RuntimeError("Assignment to undefined variable '{}'".format(name)) var_name = self.nameToVar(node.lvalue.name) expr_result = self.parseExpression(node.rvalue, linked_node) lines += expr_result result_reg = self.registers.find_register(expr_result) if node.op == "=": lines.append(self.asm.binary_op("STOR", result_reg, "[GB+" + var_name + "]")) else: raise RuntimeError("Unsupported assignment operator: " + node.op) return lines def parseDecl(self, linked_node): lines = [] varname = linked_node.node.name if varname in self.globalVars: raise RuntimeError("Redefinition of variable '{}'".format(varname)) self.globalVars.append(varname) # if the declaration also has a definition if linked_node.node.init: var_name = self.nameToVar(varname) init_val = self.parseExpression(linked_node.node.init, linked_node) result_reg = self.registers.find_register(init_val) init_val.append(self.asm.binary_op("STOR", result_reg, "[GB+" + var_name + "]")) # if global context (i.e. not in a function) if linked_node.getFunctionNode() is None: self.globalInit += init_val else: lines += init_val return lines def parseBreak(self, linked_node): loop_node = linked_node.getBreakNode() if not loop_node: raise RuntimeError("break not in a loop or switch") return [self.asm.branch_op("BRA", loop_node.break_label)] def parseContinue(self, linked_node): loop_node = linked_node.getContinueNode() if not loop_node: raise RuntimeError("continue not in a loop") return [self.asm.branch_op("BRA", loop_node.continue_label)] def parseEmptyStatement(self, linked_node): """Returns an empty list for an "empty statement" (duh)""" return [] def parseStatement(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) self.asm.level = linked_node.level lines = [] if linked_node.type in ("Compound", "If", "Return", "DoWhile", "While", "For", "Decl", "FuncDef", "Break", "Continue", "EmptyStatement"): lines += getattr(self, "parse" + linked_node.type)(linked_node) elif linked_node.type in ("FuncDecl", "ArrayDecl", "Case", "DeclList", "Default", "EllipsisParam",# (int a, ...) "Enum", # enum type "Enumerator", # enum value "EnumeratorList", # list of enum values "FuncDecl", "Goto", "Label", "ParamList", "PtrDecl", "Struct", "TypeDecl", "Typedef", "Switch", "Union"): raise RuntimeError("Not implemented for type " + repr(node)) else: lines += self.parseExpression(node, parent_linked_node, level_increment) return lines # currently, there is no difference between an expression and statement. This might # change in the future def parseExpression(self, node, parent_linked_node, level_increment=False): linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) self.asm.level = linked_node.level lines = [] if linked_node.type in ("ID", "Constant", "UnaryOp", "FuncCall", "Cast", "BinaryOp", "TernaryOp", "Assignment", "ExprList"): lines += getattr(self, "parse" + linked_node.type)(linked_node) elif linked_node.type in ("Compound", "If", "Return", "DoWhile", "While", "For", "Decl", "FuncDef", "Break", "Continue", "EmptyStatement"): raise RuntimeError("A statement is used in expression context") elif linked_node.type in ("ArrayRef", "CompoundLiteral",# won't be supported "IdentifierType", "NamedInitializer",# attributes, remove "StructRef", "Typename"): raise RuntimeError("Not implemented for expression type " + repr(node)) else: raise RuntimeError("Not implemented expression and unknown type: " + repr(node)) return lines if __name__ == "__main__": settings = { "input_files": [], "dump_parsed_files": False, "output_filename": "" } set_arg = None # arguments processing class ArgumentError(Exception): pass def usage(): print """Usage: python pp2cc.py [options] filename.. Multiple input files can be specified, options can be specified before and after filenames. Options: -o filename The name of the output file. It defaults to the first input file from which the extension is removed, and .asm is added. --tree Instead of compiling the file into assembly, show the parse tree.""" try: for arg in sys.argv[1:]: if not set_arg is None: assert set_arg in settings, "The command line option cannot be found in the settings" setting = settings[set_arg] if isinstance(setting, str): settings[set_arg] = arg elif isinstance(arg, list): settings.append(arg) elif isinstance(arg, bool): raise NotImplementedError("Booleans cannot be set in options") set_arg = None elif len(arg): # if the argument is an option if arg[0] == "-": if arg == "-o": set_arg = "output_filename" elif arg == "--tree": settings["dump_parsed_files"] = True elif arg == "-h" or arg == "--help" or arg == "--usage": usage() sys.exit(0) else: raise ArgumentError("Unknown commandline argument '{}'".format(arg)) else: # must be a file settings["input_files"].append(arg) if not set_arg is None: raise ArgumentError("Expected a value for option '{}'".format(sys.argv[-1])) if not settings["input_files"]: raise ArgumentError("No input files") except ArgumentError: e = sys.exc_info()[1] print str(e) print "For usage, run: python pp2cc.py --help" sys.exit(255) # end of arguments processing if not settings["output_filename"]: settings["output_filename"] = os.path.splitext(settings["input_files"][0])[0] + ".asm" try: parse = Parse() for filename in settings["input_files"]: parse.loadFile(filename) if settings["dump_parsed_files"]: parse.show() else: parse.compile() if not settings["dump_parsed_files"]: source = parse.getSource() outf = open(settings["output_filename"], "w") outf.write(source) outf.close() print "Compilation succesful" print "The output is written to", settings["output_filename"] except c_parser.ParseError: e = sys.exc_info()[1] print "Parse error:", str(e) sys.exit(1)