From 424646a2a6ed13fbefefeb5081f4802e8da55b3e Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sat, 26 Nov 2011 20:32:14 +0000 Subject: Initial commit of pp2cc --- pp2cc.py | 666 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 666 insertions(+) create mode 100755 pp2cc.py diff --git a/pp2cc.py b/pp2cc.py new file mode 100755 index 0000000..40ef5cf --- /dev/null +++ b/pp2cc.py @@ -0,0 +1,666 @@ +#!/usr/bin/env python +"""Compiles C into assembly for the practicum processor (PP2) + +All rights reserved, you may not redistribute or use this program without prior +permission from Peter Wu or Xander Houtman. Use of this program is entirely +your own risk. In no circumstances can the authors of this program be held +responsible for any damage including, but not limited to, financial damage or +data loss. Modification of this program is not allowed without prior +permission. The generated output (assembly and messages) are not subject to +this license. +""" + +import sys +from pycparser import c_parser, c_ast + +__author__ = "Peter Wu" +__copyright__ = "Copyright 2011, Peter Wu" +__credits__ = ["Peter Wu", "Xander Houtman"] +__license__ = "Proprietary" +__version__ = "1.0" +__maintainer__ = "Peter Wu" +__email__ = "uwretep@gmail.com" + +parser = c_parser.CParser() + +class Registers(object): + """Register related functions + + Some functions were supposed to optimize things (alloc and free), but it's + not implemented + """ + def __init__(self): + # which registers are in use? + self.registers = {} + for reg in range(0, 8): + self.registers[str(reg)] = False + def alloc(self): + """Retrieves a register which is marked unused and marks it as in use""" + for register in range(0, 6): + # find a free register + if not self.registers[register]: + self.registers[register] = True + return "R" + register + raise RuntimeError("No free registers are available") + def free(self, register): + """Marks a register as unused + + Keyword arguments: + register -- a register in the format Rn where 0 <= n < 8 + + """ + # remove leading R + register = register[1:] + if self.registers[register]: + self.registers[register] = False; + else: + raise RuntimeError("free() of unused register") + def get_register(self, text): + """Retrieve the first register from a binary instruction""" + text = text.strip() + # ignore commented lines + if text.startswith(";"): + return None + # skip labels if any + text = text.upper().split(":", 1)[-1] + # exclude the first operand, e.g. LOAD + text = text.lstrip(" ").split(" ", 1)[-1] + # retieve the first element before the next space, e.g. R0 + text = text.lstrip(" ").split(" ", 1)[0] + # is it a register? + if len(text) == 2 and text[0] == "R" and text[1] in self.registers: + return text + # out of luck + return None + def find_register(self, instructions_list, fatal=False): + """Finds the last modified register in a list of instructions""" + for line in reversed(instructions_list): + reg = self.get_register(line) + if reg: + return reg + if fatal: + raise RuntimeError("No register found in instructions list") + return None + +class Asm(object): + def __init__(self): + self.level = 0 + + def format_line(self, line, label="", indent_size=-1): + """Returns an indented line optionally prefixed with label""" + if indent_size < 0: + indent_size = 8 + self.level * 4 + if label: + indent_size -= len(label) + 2 + if indent_size >= 0: + label += ": " + else: + label += ":" + # whitespace is generated if indent_size > 0 + indent = indent_size * " " + return indent + label + line + + def binary_op(self, binop, reg, operand, label=""): + # output format: BBBB RR OO... + return self.format_line("{:<4} {:<2} {}".format(binop, reg, operand), label) + + def unary_op(self, unop, operand, label=""): + # output format: UUUU OO... + return self.format_line("{:<4} {}".format(unop, operand), label) + + def branch_op(self, brop, branch_label, label=""): + # output format: BBB LL... + return self.format_line("{:<4} {}".format(brop, branch_label), label) + + def push(self, register, label=""): + return self.format_line("PUSH " + register, label) + + def pull(self, register): + return self.format_line("PULL " + register) + + def insert_label(self, instruction, label): + new_instruction = instruction.lstrip(" ") + indent_size = len(instruction) - len(new_instruction) + return self.format_line(new_instruction, label, indent_size) + + def noop(self, label, register="R0"): + """Returns a labelled no operation operator + + Note that the Zero, Negative and oVerflow flags are modified + """ + return self.binary_op("LOAD", register, register, label) + +class LinkedNode(object): + """Stores nodes with a reference to the parent""" + def __init__(self, node, parent=None, level_increment=False): + self.node = node + self.parent = parent + self.function = None + self.level = 0 + # inherit level from parent + if parent: + self.level = parent.level + if level_increment: + self.incrementLevel() + def incrementLevel(self): + self.level += 1 + def getFunctionNode(self): + if isinstance(self.node, c_ast.FuncDef): + return self + if self.parent: + assert isinstance(self.parent, LinkedNode), "parent is not a LinkedNode!" + return self.parent.getFunctionNode() + return None + def setFunction(self, function): + """Sets the function object containing label information""" + self.function = function + def getLocation(self): + if hasattr(self.node, "coord"): + return self.node.coord + return "Unknown" + +class Function(object): + def __init__(self, node): + self.name = node.decl.name + self.node = node + def labelBegin(self): + """Returns a label pointing to the begin of a function""" + return "fn_" + self.name + def labelEnd(self): + """Returns a label pointing to the end of a function""" + return "fne_" + self.name + +class Logger(object): + def __init__(self): + pass + def warning(self, message, linked_node=None): + self.log(message, linked_node=linked_node, type="warning") + def log(self, message, linked_node=None, type="log"): + source = "" + if isinstance(linked_node, LinkedNode): + source = str(linked_node.getLocation()) + ": " + print type + ":" + source + message + +class Parse(object): + def __init__(self, filename, source=''): + if source is "": + file = open(filename, 'r') + source = file.read() + file.close() + self.node = parser.parse(source, filename=filename); + self.logger = Logger() + + # word size in bits + self.WORDSIZE = 18 + # preload operators and their instructions + self.binary_ops = { + "+": "ADD", + "-": "SUB", + "*": "MULS", + "/": "DIV", + "%": "MOD", + "&": "AND", + "|": "OR", + "^": "XOR" + } + self.comparison_ops = { + "<" : "BLT", + ">" : "BGT", + "<=": "BLE", + ">=": "BGE", + "==": "BEQ", + "!=": "BNE" + } + self.binary_ops.update(self.comparison_ops) + + self.globalVars = [] + self.functions = {} + self.codeSegment = [] + self.labels = set() + # helper for asm code generation + self.asm = Asm() + # watches which registers are used and which are not + self.registers = Registers() + def show(self): + self.node.show(showcoord=True) + def uniqLbl(self, labelname): + """Generates an unique label name, save and return it""" + lbl = labelname + if labelname in self.labels: + i = 0 + while True: + lbl = labelname + "_" + str(i) + if not lbl in self.labels: + break + i += 1 + self.addLabel(lbl) + return lbl + def addLabel(self, labelname): + if labelname in self.labels: + raise RuntimeError("Redefinition of label '{}'".format(labelname)) + self.labels.add(labelname) + def compile(self): + """Loops through the nodes in an AST syntax tree and generates ASM""" + root_node = LinkedNode(self.node) + for thing in self.node.ext: + linked_node = LinkedNode(thing, root_node) + self.asm.level = linked_node.level + if isinstance(thing, c_ast.Decl): + self.defineGlobalVar(linked_node) + elif isinstance(thing, c_ast.FuncDef): + self.defineFunction(linked_node) + else: + raise RuntimeError("I don't know a node in the global scope: " + str(thing)) + def getSource(self): + """Retrieves the ASM source. You need to compile it first""" + self.addSource("@DATA") + for varName in self.globalVars: + padding = " " * (16 - len(varName) - 1) + self.addSource(varName + padding + " DW 1") + self.addSource() + self.addSource("@CODE") + for line in self.codeSegment: + self.addSource(line) + self.addSource() + self.addSource() + self.addSource("@END") + def addSource(self, line=''): + print line + def defineGlobalVar(self, linked_node): + varname = linked_node.node.name + self.globalVars.append(varname) + def defineFunction(self, linked_node): + node = linked_node.node + linked_node.incrementLevel() + self.asm.level = linked_node.level + + funcname = node.decl.name + if funcname in self.functions: + raise RuntimeError("Redefinition of function '{}'".format(funcname)) + function = self.functions[funcname] = Function(node) + + lbl_func = function.labelBegin() + lbl_end = function.labelEnd() + self.addLabel(lbl_func) + self.addLabel(lbl_end) + linked_node.setFunction(function) + + self.codeSegment.append(self.asm.noop(lbl_func)) + + stmts = self.parseStatement(node.body, linked_node) + self.asm.level = linked_node.level + for stmt in stmts: + self.codeSegment.append(stmt) + + # functions other than main() are subroutines + #if funcname != "main": + # since main should never be left, add the instruction anyway. + # Otherwise, it'd crash anyway because it loops into the stack (if it + # had not crashed before) + self.codeSegment.append(self.asm.format_line("RTS", label=lbl_end)) + # add an extra newline + self.codeSegment.append("") + def functionLabel(self, name): + if name in self.functions: + function = self.functions[name] + return function.labelBegin() + # name not found :? perhaps an external declaration + lbl = "fn_" + name + self.logger.warning("call to undefined function, assuming label '{}'".format(lbl)) + return lbl + + def parseCompound(self, linked_node): + linked_node.incrementLevel() + lines = [] + for cn in linked_node.node.block_items: + lines += self.parseStatement(cn, linked_node) + return lines + + def parseBinaryLogicalOp(self, linked_node): + """Returns lines of assembly for a logical OR or AND operation + + ASM for logical OR: + operand1 + BEQ fal ; if operand1 is false + operand2 + BEQ fal ; if operand2 is false + tru:LOAD R0 1 + BRA end + fal:LOAD R0 0 + end:NOOP + + ASM for logical AND: + operand1 + BNE tru ; if operand1 is true + operand2 + BNE tru ; if operand2 is true + fal:LOAD R0 0 + BRA end + tru:LOAD R0 1 + end:NOOP + """ + node = linked_node.node + assert node.op in ("&&", "||"), "Expected a && or || operator, found '{}' instead".format(node.op) + isAnd = node.op == "&&" + + reg_result = "R0" + lbl_false = self.uniqLbl("lOp_false") + lbl_true = self.uniqLbl("lOp_true") + lbl_end = self.uniqLbl("lOp_end") + + # branch instruction to be inserted if && yields false or if || is true + if isAnd: + branch_early = self.asm.branch_op("BEQ", lbl_false) + else: + branch_early = self.asm.branch_op("BNE", lbl_true) + + result_true = self.asm.binary_op("LOAD", reg_result, 1, label=lbl_true) + result_false = self.asm.binary_op("LOAD", reg_result, 0, label=lbl_false) + + lines = self.parseStatement(node.left, linked_node, level_increment=True) + lines.append(branch_early) + + lines += self.parseStatement(node.right, linked_node, level_increment=True) + lines.append(branch_early) + # fix indentation + self.asm.level = linked_node.level + + # if 'true && true' or 'false || false' + if isAnd: + lines.append(result_true) + else: + lines.append(result_false) + lines.append(self.asm.branch_op("BRA", lbl_end)) + + # if 'false && x' or 'true && false' or 'x || true' or 'true || x' + if isAnd: + lines.append(result_false) + else: + lines.append(result_true) + lines.append(self.asm.noop(lbl_end, reg_result)) + return lines + def parseBinaryOp(self, linked_node): + node = linked_node.node + + lines = [] + if node.op in self.binary_ops: + op = self.binary_ops[node.op] + elif node.op in ("&&", "||"): + return self.parseBinaryLogicalOp(linked_node) + else: + raise RuntimeError("Binary op is not implemented yet: " + node.op) + + # process the first operand, the result is likely in R0 or R1 + ops_first = self.parseStatement(node.left, linked_node, level_increment=True) + assert len(ops_first), "Empty operand for binary operation (first part)" + lines += ops_first + + reg_first = self.registers.find_register(ops_first, fatal=True) + + # fix indentation after running parseStatement + self.asm.level = linked_node.level + + # optimization, but this should probably be done afterwards + if isinstance(node.right, c_ast.Constant): + operand = self.convertInteger(node=node.right) + else: + lines.append(self.asm.push(reg_first)) + + ops_second = self.parseStatement(node.right, linked_node, level_increment=True) + assert len(ops_second), "Empty operand for binary operation (second part)" + lines += ops_second + + # the register storing the result of the second operand + reg_second = self.registers.find_register(ops_second, fatal=True) + operand = reg_second + + # since we use the stack for calculations, we can use less + # registers: R0 and R1 + if reg_second == "R0": + reg_first = "R1" + else: + reg_first = "R0" + # put the data back in the free register + lines.append(self.asm.pull(reg_first)) + + # perform binary operation + if node.op in self.comparison_ops: + bin_end = self.uniqLbl("cmpEnd") + bin_true = self.uniqLbl("cmpTrue") + lines.append(self.asm.binary_op("CMP", reg_first, operand)) + # a comparison is more likely to be true, so optimize for that: + # true will follow the path CMP - - LOAD - NOOP, where false + # will follow: CMP - - LOAD - BRA - NOOP. This just happened + # to be the easiest to implement, so don't blame me for premature + # optimization ;) + lines.append(self.asm.branch_op(op, bin_true)) + lines.append(self.asm.binary_op("LOAD", reg_first, 0)) + lines.append(self.asm.branch_op("BRA", bin_end)) + lines.append(self.asm.binary_op("LOAD", reg_first, 1, label=bin_true)) + lines.append(self.asm.noop(bin_end, register=reg_first)) + else: + lines.append(self.asm.binary_op(op, reg_first, operand)) + return lines + def parseUnaryOp(self, linked_node): + """Returns lines of assembly for unary operators + + Supported operators are logical negation, one's complement (bitwise + NOT), plus and minus + + ASM for logical NOT: + operand + BEQ fal ; if operand is false + tru:LOAD R0 1 + BRA end + fal:LOAD R0 0 + end:NOOP + """ + op = linked_node.node.op + # first fetch the operand + lines = self.parseStatement(linked_node.node.expr, linked_node) + # determine the register in which the result is stored + reg = self.registers.find_register(lines, fatal=True) + + if op == "+": + # K&R A7.4.4 "[the unary +] was added for symmetry with unary -" + pass + elif op == "-": + lines.append(self.asm.binary_op("MULS", reg, "-1")) + elif op == "~": + # XXX unsigned vs signed integers. Assume signed for now + mask = "1" * (self.WORDSIZE - 1) + lines.append(self.asm.binary_op("XOR", reg, "%0" + mask)) + elif op == "!": + lbl_false = self.uniqLbl("not_false") + lbl_true = self.uniqLbl("not_true") + lbl_end = self.uniqLbl("not_end") + # assume that the status words of the registers describes the value + # of the expression + lines.append(self.asm.branch_op("BEQ", lbl_false)) + lines.append(self.asm.binary_op("LOAD", reg, 1, label=lbl_true)) + lines.append(self.asm.branch_op("BRA", lbl_end)) + lines.append(self.asm.binary_op("LOAD", reg, 0, label=lbl_false)) + lines.append(self.asm.noop(lbl_end, register=reg)) + elif op == "--": + lines.append(self.asm.binary_op("SUB", reg, 1)) + elif op == "++": + lines.append(self.asm.binary_op("ADD", reg, 1)) + elif op == "&": + raise RuntimeError("Address operator '&' is not supported.") + elif op == "*": + # XXX support writing to addresses? (IOAREA) + raise RuntimeError("Indirection operator '*' is not supported.") + elif op == "sizeof": + raise RuntimeError("Sizeof operator 'sizeof' is not supported.") + elif op == "p++" or op == "p--": + # XXX postfix/prefix + raise RuntimeError("Post-inc or post-dec is not supported (yet?).") + else: + raise RuntimeError("Unknown unary operator '{}'".format(op)) + return lines + def convertInteger(self, value=None, node=None): + if value is None: + if isinstance(node, c_ast.Constant) and node.type == "int": + value = node.value + else: + raise RuntimeError("Value is not given and node is not a valid integer constant") + + value = value.upper() + if "LL" in value: # long long integer, ignore + value = value.replace("LL", "", 1) + elif "L" in value: # long integer, ignore + value = value.replace("L", "", 1) + if "U" in value: # unsigned integer, ignore for now + value = value.replace("U", "", 1) + if value.startswith("0X"): + value = value[2:] + if len(value) == 5: + # highest possible value is 3FFFF, so adding a zero would not make sense + value = "$" + value + else: + value = "$0" + value + elif value.startswith("0"): + # octal numbers are not understood by PP2 assembly, therefore convert it + value = str(int(value, 8)) + return value + def convertStrToInteger(self, str): + """Converts a PP2 assembly integer string to an integer""" + str = str.upper() + if str.startswith("$"): + return int(str[1:], 16) + elif str.startswith("%"): + return int(str[1:], 8) + return int(str, 10) + def parseConstant(self, linked_node, register="R0"): + node = linked_node.node + if node.type == "int": + value = self.convertInteger(node.value) + else: + raise RuntimeError("Unsupported constant type: " + node.type) + return [self.asm.binary_op("LOAD", register, value)] + def parseID(self, linked_node): + raise RuntimeError("Not implemented yet: symbolic names") + def parseIf(self, linked_node): + linked_node.incrementLevel() + node = linked_node.node + lbl_if = self.uniqLbl("if") # if + lbl_th = self.uniqLbl("th") # then + if node.iffalse: + lbl_end = lbl_el = self.uniqLbl("el") # else + else: + lbl_end = lbl_fi = self.uniqLbl("fi") # endif + + + lines = self.parseStatement(node.cond, linked_node) + # unused label, but insert it for clearness if possible + if len(lines) > 0: + if not self.asm.has_label(lines[0]): + lines[0] = self.asm.insert_label(lines[0], lbl_if) + + # if cond is zero, go to else (BEQ branches if Z=0). Do not use CMP + lines.append(self.asm.branch_op("BEQ", lbl_end)) + + # if true... + then_block = self.parseStatement(node.iftrue, linked_node) + # required label, so insert a NOOP if necessary + if len(then_block) > 0: + if self.asm.has_label(then_block[0]): + lines.append(self.asm.noop(lbl_th)) + else: + then_block[0] = self.asm.insert_label(then_block, lbl_th) + lines += then_block + lines.append(self.asm.branch_op("BRA", lbl_end)) + + # else... + if node.iffalse: + else_block = self.parseStatement(node.iffalse, linked_node) + if len(else_block) > 0: + if self.asm.has_label(else_block[0]): + lines.append(self.asm.noop(lbl_el)) + else: + else_block[0] = self.asm.insert_label(else_block, lbl_el) + lines += else_block + # why branch if already at the end of the block? hmm + #lines.append(self.asm.branch_op("BRA", lbl_end)) + + lines.append(self.asm.noop(lbl_end)) + return lines + def parseReturn(self, linked_node): + """Return from a function. For non-void functions the result is in R0""" + func_node = linked_node.getFunctionNode() + if not func_node: + raise RuntimeError("return not in function") + + return_value = linked_node.node.expr + if return_value: + lines = self.parseStatement(return_value, linked_node) + else: + lines = [] + if return_value: + reg = self.registers.find_register(lines, fatal=True) + # ensure that the return value is in R0 for consistency + if reg != "R0": + lines.append(self.asm.binary_op("LOAD", "R0", reg)) + + self.asm.level = linked_node.level - 1 + lines.append(self.asm.branch_op("BRA", func_node.function.labelEnd())) + # don't do the below, it may not cleanup properly + #lines.append(self.asm.format_line("RTS")) + return lines + def parseFuncCall(self, linked_node): + # XXX function arguments + # node.name is a c_ast.ID, the real function name is in name + funcname = linked_node.node.name.name + return [self.asm.branch_op("BRS", self.functionLabel(funcname))] + def parseCast(self, linked_node): + self.logger.warning("Found a type cast, but these are unsupported.", linked_node=linked_node) + return self.parseStatement(linked_node.node.expr, linked_node) + def parseStatement(self, node, parent_linked_node, level_increment=False): + linked_node = LinkedNode(node, parent_linked_node, level_increment=level_increment) + self.asm.level = linked_node.level + lines = [] + if isinstance(node, c_ast.Assignment): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.Decl): + self.defineGlobalVar(linked_node, parent_linked_node) + elif isinstance(node, c_ast.DoWhile): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.ExprList): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.For): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.FuncDecl): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.PtrDecl): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.TernaryOp): + raise RuntimeError("Not implemented for type " + repr(node)) + elif isinstance(node, c_ast.While): + raise RuntimeError("Not implemented for type " + repr(node)) + else: + for entry in ("Compound", "BinaryOp", "If", "Constant", "ID", + "Return", "FuncCall", "UnaryOp", "Cast"): + if isinstance(node, getattr(c_ast, entry)): + lines += getattr(self, "parse" + entry)(linked_node) + break + else: + raise RuntimeError("Not implemented and unknown: " + repr(node)) + return lines + + # currently, there is no difference between an expression and statement. This might + # change in the future + def parseExpression(self, node, parent): + pass + +try: + filename = sys.argv[1] + parse = Parse(filename) + if len(sys.argv) == 3: + parse.show() + #node.ext[0].ext[0].show() + else: + parse.compile() + parse.getSource() +except c_parser.ParseError: + e = sys.exc_info()[1] + print "Parse error:" + str(e) -- cgit v1.2.1