From f06b48eb6c0856205545c62cd5717f3d7a04107a Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Tue, 29 Nov 2011 21:56:03 +0000 Subject: Proper support for address operator & , indirection * and pointer assignment Now *(x+1) = y is supported as well --- pp2cc.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 74 insertions(+), 11 deletions(-) diff --git a/pp2cc.py b/pp2cc.py index 7683af2..39e3d31 100755 --- a/pp2cc.py +++ b/pp2cc.py @@ -113,6 +113,43 @@ class Registers(object): if fatal: raise RuntimeError("No register found in instructions list") return None + def is_register_changed(self, line, register): + """Returns True if the register is possibly modified in the line + + Keyword arguments: + line -- The instruction line to be analyzed + register -- The register to be looked for in the line + """ + line = line.split(";", 1)[0].strip().upper() + register = register.upper() + if not self.is_register(register): + raise RuntimeError("Invalid register argument") + # split into at most 3 elements (instruction, register, operand) + matches = re.split("\s+", line, 2) + if len(matches) == 2: + instruction, reg = matches + if (instruction == "PULL" and + self.is_register(reg) and reg == register): + return True + # Assume function calls to be malicious + if instruction == "BRS": + return True + elif len(matches) == 3: + instruction, reg, operand = matches + # remove whitespace from the operand. LF and CR do not occur + operand = operand.translate(None, "\t ") + if (operand.startswith("[--" + register) + or operand.endswith(register + "++]")): + return True + if instruction == "STOR" and operand == register: + return True + # MULL and DVMD modify two registers + if (instruction in ("MULL", "DVMD") and self.is_register(reg) and + int(reg[1]) + 1 == int(register[1])): + return True + if instruction not in ("CMP", "CHCK") and reg == register: + return True + return False class Asm(object): def __init__(self): @@ -698,8 +735,9 @@ class Parse(object): "--": "SUB" } op = linked_node.node.op - if op == "*": - lines = self.parseLValue(linked_node) + linked_cn = LinkedNode(linked_node.node.expr, linked_node) + if op == "&": + lines = self.parseLValue(linked_cn) elif op in ("p++", "p--"): # XXX support postinc/dec raise RuntimeError("Post increment and post decrement operators" @@ -709,7 +747,7 @@ class Parse(object): if not stmt: raise RuntimeError("No statement found for post inc/decrement") # assume that the result register is R0 - lvalue = self.parseLValue(LinkedNode(linked_node.node.expr, linked_node)) + lvalue = self.parseLValue(linked_cn) lines = lvalue; stmt.post_lines += lvalue stmt.post_lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) @@ -717,7 +755,7 @@ class Parse(object): stmt.post_lines.append(self.asm.binary_op("STOR", "R1", "[R0]")) elif op in ("--", "++"): binop = binops[op] - lvalue = self.parseLValue(LinkedNode(linked_node.node.expr, linked_node)) + lvalue = self.parseLValue(linked_cn) lines = lvalue; lines.append(self.asm.binary_op("LOAD", "R1", "[R0]")) lines.append(self.asm.binary_op(binop, "R1", 1)) @@ -741,14 +779,17 @@ class Parse(object): """ op = linked_node.node.op # some operators works on lvalues - if op in ("*", "--", "++", "p++", "p--"): + if op in ("--", "++", "p++", "p--", "&"): return self.parseUnaryOpLValue(linked_node) # first fetch the operand lines = self.parseExpression(linked_node.node.expr, linked_node) # determine the register in which the result is stored reg = self.registers.find_register(lines, fatal=True) - if op == "+": + if op == "*": + # load the value Rn at address into Rn + lines.append(self.asm.binary_op("LOAD", reg, "[" + reg + "]")) + elif op == "+": # K&R A7.4.4 "[the unary +] was added for symmetry with unary -" pass elif op == "-": @@ -768,8 +809,6 @@ class Parse(object): lines.append(self.asm.branch_op("BRA", lbl_end)) lines.append(self.asm.binary_op("LOAD", reg, 0, label=lbl_false)) lines.append(self.asm.noop(lbl_end, register=reg)) - elif op == "&": - raise RuntimeError("Address operator '&' is not supported.") elif op == "sizeof": raise RuntimeError("Sizeof operator 'sizeof' is not supported.") else: @@ -1045,13 +1084,37 @@ class Parse(object): expr_result = self.parseExpression(node.rvalue, linked_node) lines += expr_result result_reg = self.registers.find_register(expr_result) - self.registers.alloc(result_reg) + # a register which is available for storing the address of lvalue lvalue_reg = self.registers.alloc() self.registers.free(lvalue_reg) self.registers.free(result_reg) - lines += self.parseLValue(LinkedNode(node.lvalue, linked_node), - register=lvalue_reg) + + linked_lval = LinkedNode(node.lvalue, linked_node) + # this lvalue may be an expression if there is an indirection + if isinstance(node.lvalue, c_ast.UnaryOp) and node.lvalue.op == "*": + lvalue = self.parseExpression(linked_lval.node.expr, linked_lval) + lvalue_result_reg = self.registers.find_register(lvalue) + for line in lvalue: + # if the right value register is modified, we need to pull + if self.registers.is_register_changed(line, result_reg): + lines.append(self.asm.push(result_reg)) + lines += lvalue + # if the register of the left value equals the register of + # the right value, pull the right value in the register + # which was reserved for the left value + if lvalue_result_reg == result_reg: + result_reg = lvalue_reg + lines.append(self.asm.pull(result_reg)) + break + else: + lines += lvalue + # update the register for the left value because the expression may + # return a different result register + lvalue_reg = lvalue_result_reg + else: + # if not an expression, it must be a single variable name + lines += self.parseLValue(linked_lval, register=lvalue_reg) if node.op == "=": lines.append(self.asm.binary_op("STOR", result_reg, -- cgit v1.2.1