From 8247c54b7bf96d467a28774a97c963534d140a69 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 1 Dec 2011 15:47:34 +0000 Subject: Split classes in separate files --- pp2cc.py | 396 +-------------------------------------------------------------- 1 file changed, 5 insertions(+), 391 deletions(-) (limited to 'pp2cc.py') diff --git a/pp2cc.py b/pp2cc.py index c0e763d..b0c9b18 100755 --- a/pp2cc.py +++ b/pp2cc.py @@ -12,6 +12,9 @@ this license. import sys, re, os, operator from pycparser import c_parser, c_ast +from Asm import Asm +from Registers import Registers +from LinkedNode import LinkedNode __author__ = "Peter Wu" __copyright__ = "Copyright 2011, Peter Wu" @@ -21,392 +24,6 @@ __version__ = "1.0" __maintainer__ = "Peter Wu" __email__ = "uwretep@gmail.com" -parser = c_parser.CParser() - -class Registers(object): - """Register related functions - - Some functions were supposed to optimize things (alloc and free), but it's - not implemented - """ - def __init__(self): - # which registers are in use? - self.registers = {} - for reg in range(0, 8): - self.registers[str(reg)] = False - def alloc(self, register=None): - """Retrieves a register which is marked unused, marks it as in use and - return it""" - # if a register was explicitly requested - if register: - if not self.is_register(register): - raise RuntimeError("'{}' is not a register".format(register)) - register = register[1] - if self.registers[register]: - raise RuntimeError("Register 'R{}' is already in use".format(register)) - else: - for register in range(0, 6): - register = str(register) - # find a free register - if not self.registers[register]: - break - else: - raise RuntimeError("No free registers are available") - self.registers[register] = True - return "R" + register - def free(self, register): - """Marks a register as unused - - Keyword arguments: - register -- a register in the format Rn where 0 <= n < 8 - - """ - # remove leading R - register = register[1:] - if register in self.registers: - if self.registers[register]: - self.registers[register] = False; - else: - raise RuntimeError("free() of unused register") - else: - raise RuntimeError("free() of invalid register") - def get_register(self, text): - """Retrieve the first register from a binary instruction""" - text = text.strip() - # ignore commented lines - if text.startswith(";"): - return None - # skip labels if any - text = text.upper().split(":", 1)[-1] - # exclude the first operand, e.g. LOAD - text = text.lstrip(" ").split(" ", 1)[-1] - # retieve the first element before the next space, e.g. R0 - text = text.lstrip(" ").split(" ", 1)[0] - # is it a register? - if self.is_register(text): - return text - # out of luck - return None - def is_register(self, text): - """Returns True if text is a register, false otherwise""" - return len(text) == 2 and text[0] == "R" and text[1] in self.registers - def get_instruction(self, text): - """Retrieve the instruction from text skipping labels and comments""" - text = text.strip() - # ignore commented lines - if text.startswith(";"): - return None - # skip labels if any - text = text.upper().split(":", 1)[-1] - # the first element is assumed to be an instruction - text = text.lstrip(" ").split(" ", 1)[0] - # instructions have a length between 2 (OR) and 4 (LOAD) - if len(text) >= 2 and len(text) <= 4: - return text - return None - def find_register(self, instructions_list, fatal=False): - """Finds the last modified register in a list of instructions""" - for line in reversed(instructions_list): - reg = self.get_register(line) - if reg: - return reg - else: - instruction = self.get_instruction(line) - # convention: non-void functions store their result value in R0 - if instruction == "BRS": - return "R0" - if fatal: - raise RuntimeError("No register found in instructions list") - return None - def is_register_changed(self, line, register): - """Returns True if the register is possibly modified in the line - - Keyword arguments: - line -- The instruction line to be analyzed - register -- The register to be looked for in the line - """ - line = line.split(";", 1)[0].strip().upper() - register = register.upper() - if not self.is_register(register): - raise RuntimeError("Invalid register argument") - # split into at most 3 elements (instruction, register, operand) - matches = re.split("\s+", line, 2) - if len(matches) == 2: - instruction, reg = matches - if (instruction == "PULL" and - self.is_register(reg) and reg == register): - return True - # Assume function calls to be malicious - if instruction == "BRS": - return True - elif len(matches) == 3: - instruction, reg, operand = matches - # remove whitespace from the operand. LF and CR do not occur - operand = operand.translate(None, "\t ") - if (operand.startswith("[--" + register) - or operand.endswith(register + "++]")): - return True - if instruction == "STOR" and operand == register: - return True - # MULL and DVMD modify two registers - if (instruction in ("MULL", "DVMD") and self.is_register(reg) and - int(reg[1]) + 1 == int(register[1])): - return True - if instruction not in ("CMP", "CHCK") and reg == register: - return True - return False - -class Asm(object): - def __init__(self): - self.level = 0 - - def format_line(self, line, label="", indent_size=-1): - """Returns an indented line optionally prefixed with label""" - if indent_size < 0: - indent_size = 8 + self.level * 4 - if label: - indent_size -= len(label) + 2 - if indent_size >= 0: - label += ": " - else: - label += ":" - # whitespace is generated if indent_size > 0 - indent = indent_size * " " - return indent + label + line - - def binary_op(self, binop, reg, operand, label=""): - # output format: BBBB RR OO... - return self.format_line("{:<4} {:<2} {}".format(binop, reg, operand), label) - - def unary_op(self, unop, operand, label=""): - # output format: UUUU OO... - return self.format_line("{:<4} {}".format(unop, operand), label) - - def branch_op(self, brop, branch_label, label=""): - # output format: BBB LL... - return self.format_line("{:<4} {}".format(brop, branch_label), label) - - def push(self, register, label=""): - return self.format_line("PUSH " + register, label) - - def pull(self, register, label=""): - return self.format_line("PULL " + register, label) - - def insert_label(self, instruction, label): - new_instruction = instruction.lstrip(" ") - indent_size = len(instruction) - len(new_instruction) - return self.format_line(new_instruction, label, indent_size) - - def has_label(self, line): - """"Returns True if a line appears to contain a label, False otherwise""" - return Parse.is_identifier(line.split(":", 1)[0].strip()) - - def noop(self, label, register="R0"): - """Returns a labelled no operation operator - - Note that the Zero, Negative and oVerflow flags are modified - """ - return self.binary_op("LOAD", register, register, label) + " ; NOOP" - -class Variables(object): - def __init__(self, defined_names, parent_variables): - """A scope for holding variable names - - Keywords arguments: - defined_names -- A list of defined variables to which additional - variables might be appended - parent_variables -- the parent Variables object. If None, it's a global - variable scope - """ - self.local_vars = {} - self.defined_names = defined_names - self.parent_variables = parent_variables - def uniqName(self, name): - """Returns an unique variable name""" - uniq_name = name - i = 0 - while uniq_name in self.defined_names: - uniq_name = name + "_" + str(i) - i += 1 - return uniq_name - def getName(self, name): - """Gets the name of a declared variable as it appears in the @DATA - section""" - if name in self.local_vars: - return self.local_vars[name] - if self.parent_variables: - return self.parent_variables.getName(name) - raise RuntimeError("Use of undefined variable '{}'".format(name)) - def declName(self, name, size=1, prefix="var"): - """Declares a variable in the nearest scope and returns the label - name""" - if name in self.local_vars: - raise RuntimeError("Redeclaration of variable '{}'".format(name)) - # global variables are prefixed "var_", locals with "varl_" - var_name = prefix + ("l_" if self.parent_variables else "_") + name - var_name = self.uniqName(var_name) - self.local_vars[name] = var_name - self.defined_names[var_name] = size - return var_name - -class LinkedNode(object): - """Stores nodes with a reference to the parent""" - def __init__(self, node, parent=None, level_increment=False, - defined_names=None): - """Holds properties for a node - - Keyword arguments: - node -- a Node object which is an object from the c_ast class - parent -- a parent LinkedNode object - level_increment -- True if the indentation level needs to be - incremented, False otherwise - defined_names -- a list of names which will be used in the @DATA - section. If not set, it'll be looked up in the parent - """ - self.node = node - if parent: - assert isinstance(parent, LinkedNode), "parent is not a LinkedNode!" - self.parent = parent - self.function = None - self.break_label = None - self.continue_label = None - self.type = type(node).__name__ - self.level = 0 - self.variables = None - # for supporting post increment and post decrement - self.post_lines = [] - parent_variables = None - # inherit level and variables from parent - if parent: - self.level = parent.level - self.variables = parent_variables = parent.variables - if not defined_names: - defined_names = parent.variables.defined_names - - # for is added for the init part (C99) - if self.type in ("Compound", "FileAST", "For"): - # the node appears to have an own variable scope - if defined_names is None: - raise RuntimeError("No object found for storing variables") - self.variables = Variables(defined_names, parent_variables) - # Identifiers which are in use (think of variables and labels) - self.defined_names = defined_names - if not self.variables: - raise RuntimeError("No variables object found") - if level_increment: - self.incrementLevel() - # labels are limited to function contexts - if self.type == "FuncDef": - self.goto_labels = {} - def handle_post_lines(self, lines): - """Add post-increment lines to the lines list and clear the queue""" - lines += self.post_lines - self.post_lines = [] - def getScopeNode(self): - """Get the nearest node which introduces a new scope. - - If there is no node found an exception is raised because it expects at - least a global scope""" - if self.local_vars is not None: - return self - if self.parent: - return self.parent.getScopeNode() - raise RuntimeError("No global variable scope was found") - def isTypeStatement(self): - """Returns True if the node is a statement type""" - return self.type in ("Compound", "If", "Return", "DoWhile", "While", - "For", "Decl", "FuncDef", "Break", "Continue", - "EmptyStatement", "Switch", "DeclList", - "FuncDecl", "ArrayDecl", "Case", - "Default", "EllipsisParam",# (int a, ...) - "Enum", # enum type - "Enumerator", # enum value - "EnumeratorList", # list of enum values - "FuncDecl", "Goto", "Label", "ParamList", "PtrDecl", "Struct", - "TypeDecl", "Typedef", "Union") - def getStatementNode(self): - """Returns the nearest LinkedNode which is a statement node type""" - if self.isTypeStatement(): - return self - if self.parent: - return self.parent.getStatementNode() - return None - def incrementLevel(self): - self.level += 1 - def getFunctionNode(self): - """Returns the nearest LinkedNode which is a function definition node - type""" - if self.type == "FuncDef": - return self - if self.parent: - return self.parent.getFunctionNode() - return None - def setFunction(self, function): - """Sets the function object containing label information""" - self.function = function - def getLocation(self): - if hasattr(self.node, "coord"): - return self.node.coord - return "Unknown" - def setBreak(self, break_label): - """Marks this node as a loop or switch by setting the label for break - - Keywords arguments: - break_label -- The label to continue when using the break keyword - """ - self.break_label = break_label - def setContinue(self, continue_label): - """Marks this node as a loop by setting the label for continue - - Keywords arguments: - continue_label -- The label to continue when using the continue keyword - """ - self.continue_label = continue_label - def getBreakNode(self): - """Returns the label to the end of the nearest switch statement or for - loop""" - if self.break_label is not None: - return self - if self.parent: - return self.parent.getBreakNode() - return None - def getContinueNode(self): - """Returns the label to the label to continue a loop""" - if self.continue_label is not None: - return self - if self.parent: - return self.parent.getContinueNode() - return None - def setLabel(self, label_name): - """Sets the label for this node and return the label name as it appears - in assembly - """ - if self.parent: - function = self.parent.getFunctionNode() - if not self.parent or not function: - raise RuntimeError("Labels are only allowed in functions") - if label_name in function.goto_labels: - raise RuntimeError("Duplicate label '{}'".format(label_name)) - - label_asm = "lbl_" + label_name - i = 0 - while label_asm in self.defined_names: - label_asm = "lbl_" + label_name + str(i) - i += 1 - - function.goto_labels[label_name] = label_asm - return label_asm - def lookupLabel(self, label_name): - """Returns the label name as it appears in assembly for label_name""" - # get the nearest function for this node - if self.parent: - function = self.parent.getFunctionNode() - if not self.parent or not function: - raise RuntimeError("Labels are only allowed in functions") - if label_name in function.goto_labels: - return function.goto_labels[label_name] - raise RuntimeError("Label '{}' used but not defined".format(name)) - class Function(object): def __init__(self, node): self.name = node.decl.name @@ -430,11 +47,8 @@ class Logger(object): print type + ":" + source + message class Parse(object): - re_identifier = re.compile("^[0-9a-zA-Z_][0-9a-zA-Z_]*$") - @classmethod - def is_identifier(cls, text): - return cls.re_identifier.match(text) def __init__(self): + self.parser = c_parser.CParser() self.logger = Logger() self.node = None @@ -484,7 +98,7 @@ class Parse(object): file = open(filename, 'r') source = file.read() file.close() - self.node = parser.parse(source, filename=filename); + self.node = self.parser.parse(source, filename=filename); def show(self): self.node.show(showcoord=True) def uniqLbl(self, labelname): -- cgit v1.2.1