diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/PicoRec.java | 101 |
1 files changed, 96 insertions, 5 deletions
diff --git a/src/PicoRec.java b/src/PicoRec.java index eec2f38..f601ce3 100644 --- a/src/PicoRec.java +++ b/src/PicoRec.java @@ -11,11 +11,38 @@ public class PicoRec { /** Regular expression describing the NAT symbol. */ private final static String natRegex = "0|[1-9][0-9]*"; - private final RunAutomaton idR, natR; + /** Regular expression describing keywords (not to be interpreted as ID). */ + private final static String keywordRegex = + literalRegex("begin|declare|end"); + /** Regular expression matches all tokens accepted by the language (other + * than keywords). */ + private final static String tokenRegex = + literalRegex("[\\|,;\\+\\*\\-]|:=|" + idRegex + "|" + natRegex); + /** Regular expression matching layout tokens (to be ignored). */ + private final static String layoutRegex = "[ \n]*"; + + /** Indicator for the end of input. This symbol does not occur in the + * terminals and non-terminals. */ + private final static String END_SYMBOL = "$"; + + private final RunAutomaton idR, natR, keywordR, tokenR, layoutR; + + /** The current parsing state. */ + private String input; + private int offset; + + /** Auxilliary function to produce a regular expression taking into account + * layout considerations for symbols. */ + private static String literalRegex(String regex) { + return layoutRegex + "(" + regex + ")" + layoutRegex; + } public PicoRec() { idR = new RunAutomaton(new RegExp(idRegex).toAutomaton()); natR = new RunAutomaton(new RegExp(natRegex).toAutomaton()); + keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton()); + tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton()); + layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton()); } /** @@ -24,8 +51,71 @@ public class PicoRec { * @return true if the language is recognized, false otherwise. */ public boolean parse(String text) { - // TODO implement this - return false; + input = text; + offset = 0; + try { + // TODO implement this + return true; + } catch (ParseError ex) { + System.err.println("Parsing failed due to: " + ex); + return false; + } + } + + /** Skip layout characters. */ + private void skipLayout() { + int layoutLength = layoutR.run(input, offset); + assert(layoutLength >= 0); + offset += layoutLength; + } + + /** Returns the next token as recognized by the scanner or END_SYMBOL if the + * end of input is reached. Subsequent calls will always return the same + * value when match() is not called. + * + * @throws ParseError on unrecognized tokens. + */ + private String next(RunAutomaton r) { + String symbol = END_SYMBOL; + // "In context-free syntax, layout is allowed between symbols in the + // left-hand side of the productions, by automatically inserting + // optional layout (e.g. LAYOUT?) between them" + skipLayout(); + + // match actual symbol. + int length = r.run(input, offset); + if (length >= 0) { + symbol = input.substring(offset, offset + length); + } + + // if no symbol matched and this is not the end of string, fail + if (length < 0 && offset < input.length()) { + throw new ParseError("Unrecognized non-token"); + } + return symbol; + } + + /** Consumes the symbol and advanced the input pointer. */ + private void match(String symbol) { + skipLayout(); + if (END_SYMBOL.equals(symbol)) { + if (offset != input.length()) { + throw new ParseError("Cannot match end!"); + } + } else { + if (!input.startsWith(symbol, offset)) { + throw new ParseError("Cannot match symbol!"); + } + offset += symbol.length(); + } + } + + /** Exception that is raised for parser errors. */ + private static class ParseError extends RuntimeException { + private static final long serialVersionUID = 1L; + ParseError(String message) { + super(message); + } } @@ -57,13 +147,14 @@ public class PicoRec { // minimal program satisfying the language, without whitespace. rec.assertOk("begindeclare|end"); // simple expression - rec.assertOk("begin declare a | a := 1 end"); + rec.assertOk("begin declare a | a := 1; end"); - // Trivial failures + // Failure cases rec.assertFail(""); rec.assertFail("begin"); rec.assertFail("begin declare"); rec.assertFail("begin declare |"); + rec.assertFail("begin declare a | a := 1 end"); rec.assertFail("begin declare | end end"); rec.assertFail("begin declare end"); rec.assertFail("begin end"); |