From a6f9617bd891074bba20cc1fc220e60a226e7fb4 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 2 Dec 2011 18:43:48 +0000 Subject: WIP for supporting ASM parsing and initialization of variables in @DATA --- Asm.py | 24 ++++++++++--- AsmLine.py | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ AsmParser.py | 88 ++++++++++++++++++++++++++++----------------- NamedId.py | 29 +++++++++++++++ pp2cc.py | 15 ++++++-- 5 files changed, 231 insertions(+), 39 deletions(-) create mode 100644 AsmLine.py create mode 100644 NamedId.py diff --git a/Asm.py b/Asm.py index 56aed45..018b4de 100644 --- a/Asm.py +++ b/Asm.py @@ -21,13 +21,29 @@ __email__ = "uwretep@gmail.com" import re class Asm(object): + id_re = "[a-zA-Z_][0-9a-zA-Z_]*" + re_identifier = re.compile("^" + id_re + "$") + re_label = re.compile("^\s*" + id_re + "\s*:") + # these instructions accept a label name + operators_branch = ("BEQ", "BNE", "BCS", "BCC", "BLS", "BHI", "BVC", "BVS", + "BPL", "BMI", "BLT", "BGE", "BLE", "BGT", "BRA", "BRS") + # these instructions are weird. + operators_unary = ("JMP", "JSR", "CLRI", "SETI", "PSEM", "VSEM") + # these instructions accept a register (arg 1) and addressing mode (arg 2) + operators_binary = ("LOAD", "ADD", "SUB", "CMP", "MULS", "MULL", "CHCK", + "DIV", "MOD", "DVMD", "AND", "OR", "XOR", "STOR") + # these operators accept a register + operators_misc_reg = ("PUSH", "PULL") + # these instructions accept no args + operators_misc_noreg = ("RTS", "RTE") + operators_misc = operators_misc_reg + operators_misc_noreg + # all available operators + operators_all = operators_branch + operators_unary + operators_binary + operators_all += operators_misc def __init__(self): self.level = 0 - re_id = "[0-9a-zA-Z_][0-9a-zA-Z_]*" - self.re_identifier = re.compile("^" + re_id + "$") - self.re_label = re.compile("^\s*" + re_id + "\s*:") def is_identifier(self, text): - return self.re_identifier.match(text) + return self.re_identifier.match(text) is not None def format_line(self, line, label="", indent_size=-1): """Returns an indented line optionally prefixed with label""" if indent_size < 0: diff --git a/AsmLine.py b/AsmLine.py new file mode 100644 index 0000000..4561b30 --- /dev/null +++ b/AsmLine.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +"""Compiles C into assembly for the practicum processor (PP2) + +All rights reserved, you may not redistribute or use this program without prior +permission from Peter Wu or Xander Houtman. Use of this program is entirely +your own risk. In no circumstances can the authors of this program be held +responsible for any damage including, but not limited to, financial damage or +data loss. Modification of this program is not allowed without prior +permission. The generated output (assembly and messages) are not subject to +this license. +""" + +__author__ = "Peter Wu" +__copyright__ = "Copyright 2011, Peter Wu" +__credits__ = ["Peter Wu"] +__license__ = "Proprietary" +__version__ = "1.0" +__maintainer__ = "Peter Wu" +__email__ = "uwretep@gmail.com" + +import re +from Asm import Asm +from Registers import Registers +from NamedId import NamedId + +class AsmLine(object): + asm = Asm() + registers = Registers() + re_whitespace = re.compile("\s+") + def __init__(self, line, id_dict=None): + # for storing NamedId objects + self.id_dict = id_dict + self.label = "" + self.register = "" + self.operand = "" + line = line.split(";", 1)[0].strip() + + if ":" in line: + self.label, line = line.split(":", 1) + self.label = self.label.strip() + line = line.strip() + if self.asm.is_identifier(self.label): + # for easier label name manipulation + self.getNamedId(self.label) + else: + self.label = "" + line = label + ":" + line + + parts = self.re_whitespace.split(line, 1) + instruction = parts[0] + + if instruction in Asm.operators_binary: + # a label for sure + reg, operand = self.re_whitespace.split(parts[1], 1) + self.register = reg + self.setOperand(operand) + elif instruction in Asm.operators_branch: + # skip validation of reg for speed + self.register = parts[1] + elif instruction in Asm.operators_unary: + self.setOperand(parts[1]) + elif instruction in Asm.operators_misc_reg: + self.register = parts[1] + elif instruction in Asm.operators_misc_noreg: + # no args + pass + else: + raise RuntimeError("Unknown instruction '{}'".format(instruction)) + def setOperand(self, str): + """Sets the operand for this object either as a string or an identifier + object + """ + if self.id_dict is None: + self.operand = str + elif self.registers.is_register(str): + # register + self.operand = [str] + else: + if str.startswith("["): + # split by [ ] and +, grouping multiple occurences + self.operand = re.split("([\[\]+]+)", str) + else: + # value + self.operand = [str] + for i, part in enumerate(self.operand): + # skip empty elements, registers and [ ] + + if (part and not self.registers.is_register(part) and + part[0] not in "[]+"): + # turn named items in an object for easier manipulation of + # the names later + # if performance is an issue, you may want to skip the name + # validity check on the cost of NamedIds pollution + if self.asm.is_identifier(part): + self.operand[i] = self.getNamedId(part) + def getNamedId(self, name): + """Returns an NamedId object for keeping a track of all names at once + """ + # if there is no dictionary to store names, just return the name + if self.id_dict is None: + return name + if name not in self.id_dict: + self.id_dict[name] = NamedId(name) + return self.id_dict[name] + def __str__(self): + """Returns a line of assembly""" + line = "" + if self.label: + line += str(self.label) + ": " + line += self.instruction + if self.register: + line += " " + self.register + if self.operand: + # join all operand parts together + line += " " + "".join(str(elm) for elm in self.operand) diff --git a/AsmParser.py b/AsmParser.py index 40ad384..4ab25ed 100644 --- a/AsmParser.py +++ b/AsmParser.py @@ -18,7 +18,8 @@ __version__ = "1.0" __maintainer__ = "Peter Wu" __email__ = "uwretep@gmail.com" -from Asm import Asm +from AsmLine import AsmLine +from NamedId import NamedId import re class AsmParser(object): @@ -27,13 +28,14 @@ class AsmParser(object): self.parent = parent self.defined_names = parent.defined_names else: + self.parent = None self.data = [] self.code = [] - # defined labels for the assembly files. key: label, value: mapped - # label (to avoid name clashes with the other code) - self.labels = {} - # defined words/storage - self.defined_names = [] + # defined labels for the assembly files + self.labels = [] + # dictionary for holding NamedId objects for defined labels and + # identifiers (variables) + self.defined_names = {} # valid values: None, DATA and CODE self.in_section = None @@ -41,7 +43,6 @@ class AsmParser(object): self.constants = {} self.re_whitespace = re.compile("\s+") - self.asm = Asm() file = open(filename, "rU") while True: @@ -52,6 +53,11 @@ class AsmParser(object): else: break file.close() + # substitute constants and remove the name + for name, value in self.constants.iteritems(): + if name in self.defined_names: + self.defined_names[name].rename(value) + del self.defined_names[name] def parseLine(self, line): """Processes the a line from assembly""" if line.startswith("@"): @@ -80,7 +86,7 @@ class AsmParser(object): raise RuntimeError("Unrecognized command '{}'".format(cmd)) elif self.in_section in ("DATA", "CODE"): match = re.split(self.re_whitespace, line, 2) - if len(match) == 2: + if len(match) == 3: name, what, data = match if what == "EQU": self.setConstant(name, data) @@ -88,14 +94,23 @@ class AsmParser(object): elif what in ("DS", "DW"): if self.in_section == "CODE": raise RuntimeError("DS or DW found in @CODE section") - self.addName(name) - self.addData(line) + # we shouldn't need to change the name for DS/DW stuff + #self.addName(name) + if what == "DS": + # DS initializes names with zero, let's convert it to + # DW to make it compatible with the Parser + self.addData(name, [0 for x in range(0, int(data))]) + else: + self.addData(name, data.split(",")) + line = "" if line: if self.in_section == "DATA": - raise RuntimeError("Found non definition data in @DATA") - label = self.asm.get_label(line) - self.addLabel(label) - self.addCode(line) + raise RuntimeError("Found non-definition data in @DATA, " + "namely: " + line) + lineobj = AsmLine(line, id_dict=self.defined_names) + if lineobj.label: + self.labels.append(lineobj.label) + self.addCode(lineobj) else: # ignore other lines pass @@ -111,37 +126,44 @@ class AsmParser(object): self.parent.addCode(line) else: self.code.append(line) - def addData(self, line): - """Add a line to the @DATA section""" + def addData(self, name, words): + """Define a name in the @DATA section initialized with words + + Keyword arguments: + name -- The name as it appears in the @DATA section + words -- A list of words to be initialized + """ if self.parent: - self.parent.addData(line) + self.parent.addData(name, words) + elif name in self.data: + raise RuntimeError("Redefinition of '{}'".format(name)) else: - self.data.append(line) + self.data[name] = words def evaluateConstant(self, expression): """Evaluates a constant expression in an EQU""" if not expression.isdigit(): raise RuntimeError("I am lazy and do not support values other than" " digits in an EQU") return expression - def addLabel(self, label): - """Adds a label to the list of labels""" - if self.parent: - self.parent.addLabel(label) - elif label in self.labels: - raise RuntimeError("Label '{}' is already defined".format(label)) - else: - self.labels[label] = label def addName(self, name): - """Adds a name to the list of define words/ storage""" + """Adds a NamedId object for the name of a label or variable to the + list of defined names + """ if self.parent: self.parent.addName(name) elif name in self.defined_names: raise RuntimeError("Name '{}' is already defined".format(name)) else: - self.defined_names.append(name) - def mapLabel(self, label, new_label): - """Renames a label""" - if not name in self.labels: + self.defined_names[name] = NamedId(name) + def renameId(self, name, new_name): + """Renames an identifier for a label or variable""" + if not name in self.defined_names: raise RuntimeError("Attempt to rename an undefined '{}' to '{}'" - .format(label, new_label)) - self.labels[label] = new_label + .format(name, new_name)) + self.defined_names[name].rename(new_name) + def getCodeLines(self): + """Returns the known assembly lines for the @CODE section as a list""" + if self.parent: + raise RuntimeError("You can only get the lines for @CODE from the" + " root node") + return [elm for elm in self.code] diff --git a/NamedId.py b/NamedId.py new file mode 100644 index 0000000..cb3091b --- /dev/null +++ b/NamedId.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +"""Compiles C into assembly for the practicum processor (PP2) + +All rights reserved, you may not redistribute or use this program without prior +permission from Peter Wu or Xander Houtman. Use of this program is entirely +your own risk. In no circumstances can the authors of this program be held +responsible for any damage including, but not limited to, financial damage or +data loss. Modification of this program is not allowed without prior +permission. The generated output (assembly and messages) are not subject to +this license. +""" + +__author__ = "Peter Wu" +__copyright__ = "Copyright 2011, Peter Wu" +__credits__ = ["Peter Wu"] +__license__ = "Proprietary" +__version__ = "1.0" +__maintainer__ = "Peter Wu" +__email__ = "uwretep@gmail.com" + +class NamedId(object): + def __init__(self, name): + self.name = name + self.display_name = name + def rename(self, name): + """Changes the display value of this object""" + self.display_name = name + def __str__(self): + return self.display_name \ No newline at end of file diff --git a/pp2cc.py b/pp2cc.py index 6370613..ffe9ee4 100755 --- a/pp2cc.py +++ b/pp2cc.py @@ -30,6 +30,8 @@ __email__ = "uwretep@gmail.com" class Logger(object): def __init__(self): pass + def info(self, message, linked_node=None): + self.log(message, linked_node=linked_node, type="info") def warning(self, message, linked_node=None): self.log(message, linked_node=linked_node, type="warning") def error(self, message, linked_node=None): @@ -149,14 +151,23 @@ class Parse(object): self.codeSegment += self.parseStatement(thing, root_node) def compileASM(self): """Processes lines of assembly and merge it into the output""" - pass + for label in self.asm_node.labels: + new_label = label + # rename existing names + if label in self.labels: + new_label = self.uniqLbl(label) + self.asm_node.renameId(label, new_label) + self.labels.add(new_label) + self.codeSegment += self.asm_node.getCodeLines() def getSource(self): """Retrieves the ASM source. You need to compile it first""" output = [] output.append("@DATA") for varName, size in self.varNames.iteritems(): padding = " " * (16 - len(varName) - 1) - output.append(varName + padding + " DS " + str(size)) + assert size > 0, "Size of '{}' must be at least 1".format(varName) + initializers = "0,".repeat(size)[0:-1] + output.append(varName + padding + " DW " + initializers) output.append("") output.append("@CODE") # initialization of global variables -- cgit v1.2.1