From 35a991bdcc806e58627bc676341931aed0a71432 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sun, 8 May 2016 17:40:25 +0200 Subject: PicoRec: begin implementing parsing Remove keywordRegex, it is not used now. To do: make sure that ID not match keywords, implement parseExp. --- src/PicoRec.java | 47 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/PicoRec.java b/src/PicoRec.java index 191d2f8..1d6e16b 100644 --- a/src/PicoRec.java +++ b/src/PicoRec.java @@ -11,9 +11,6 @@ public class PicoRec { /** Regular expression describing the NAT symbol. */ private final static String natRegex = "0|[1-9][0-9]*"; - /** Regular expression describing keywords (not to be interpreted as ID). */ - private final static String keywordRegex = - literalRegex("begin|declare|end"); /** Regular expression matches all tokens accepted by the language (other * than keywords). */ private final static String tokenRegex = @@ -25,7 +22,7 @@ public class PicoRec { * terminals and non-terminals. */ private final static String END_SYMBOL = "$"; - private final RunAutomaton idR, natR, keywordR, tokenR, layoutR; + private final RunAutomaton idR, natR, tokenR, layoutR; /** The current parsing state. */ private String input; @@ -40,7 +37,6 @@ public class PicoRec { public PicoRec() { idR = new RunAutomaton(new RegExp(idRegex).toAutomaton()); natR = new RunAutomaton(new RegExp(natRegex).toAutomaton()); - keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton()); tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton()); layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton()); } @@ -53,6 +49,36 @@ public class PicoRec { public void parse(String text) { input = text; offset = 0; + match("begin"); + match("declare"); + parseIdList(); + match("|"); + parseStatements(); + match("end"); + match(END_SYMBOL); + } + + private void parseIdList() { + // Parses IDLIST ::= ID "," | + while (idR.run(next())) { + match(next()); // ID + match(","); + } + } + + private void parseStatements() { + // Parses STATEMENTS ::= STATEMENT ";" | + // => STATEMENT ::= ID ":=" EXP ";" | + while (idR.run(next())) { + match(next()); // ID + match(":="); + parseExp(); + match(","); + } + } + + private void parseExp() { + // TODO implement me } /** Skip layout characters. */ @@ -68,7 +94,7 @@ public class PicoRec { * * @throws ParseError on unrecognized tokens. */ - private String next(RunAutomaton r) { + private String next() { String symbol = END_SYMBOL; // "In context-free syntax, layout is allowed between symbols in the // left-hand side of the productions, by automatically inserting @@ -76,7 +102,7 @@ public class PicoRec { skipLayout(); // match actual symbol. - int length = r.run(input, offset); + int length = tokenR.run(input, offset); if (length >= 0) { symbol = input.substring(offset, offset + length); } @@ -97,7 +123,7 @@ public class PicoRec { } } else { if (!input.startsWith(symbol, offset)) { - throw new ParseError("Cannot match symbol!"); + throw new ParseError("Cannot match symbol: " + symbol); } offset += symbol.length(); } @@ -146,14 +172,15 @@ public class PicoRec { // minimal program satisfying the language, without whitespace. rec.assertOk("begindeclare|end"); // simple expression - rec.assertOk("begin declare a | a := 1; end"); + rec.assertOk("begin declare a, | a := 1; end"); // Failure cases rec.assertFail(""); rec.assertFail("begin"); rec.assertFail("begin declare"); rec.assertFail("begin declare |"); - rec.assertFail("begin declare a | a := 1 end"); + rec.assertFail("begin declare a | a := 1; end"); + rec.assertFail("begin declare a, | a := 1 end"); rec.assertFail("begin declare | end end"); rec.assertFail("begin declare end"); rec.assertFail("begin end"); -- cgit v1.2.1