/** * Recognizer for the language described by grammar-ll1.txt. */ import dk.brics.automaton.RegExp; import dk.brics.automaton.RunAutomaton; public class PicoRec { /** Regular expression describing the ID symbol. */ private final static String idRegex = "[a-z][a-z0-9]*"; /** Regular expression describing the NAT symbol. */ private final static String natRegex = "0|[1-9][0-9]*"; /** Regular expression describing keywords (not to be interpreted as ID). */ private final static String keywordRegex = literalRegex("begin|declare|end"); /** Regular expression matches all tokens accepted by the language (other * than keywords). */ private final static String tokenRegex = literalRegex("[\\|,;\\+\\*\\-]|:=|" + idRegex + "|" + natRegex); /** Regular expression matching layout tokens (to be ignored). */ private final static String layoutRegex = "[ \n]*"; /** Indicator for the end of input. This symbol does not occur in the * terminals and non-terminals. */ private final static String END_SYMBOL = "$"; private final RunAutomaton idR, natR, keywordR, tokenR, layoutR; /** The current parsing state. */ private String input; private int offset; /** Auxilliary function to produce a regular expression taking into account * layout considerations for symbols. */ private static String literalRegex(String regex) { return layoutRegex + "(" + regex + ")" + layoutRegex; } public PicoRec() { idR = new RunAutomaton(new RegExp(idRegex).toAutomaton()); natR = new RunAutomaton(new RegExp(natRegex).toAutomaton()); keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton()); tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton()); layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton()); } /** * Try to parse the input. * * @return true if the language is recognized, false otherwise. */ public boolean parse(String text) { input = text; offset = 0; try { // TODO implement this return true; } catch (ParseError ex) { System.err.println("Parsing failed due to: " + ex); return false; } } /** Skip layout characters. */ private void skipLayout() { int layoutLength = layoutR.run(input, offset); assert(layoutLength >= 0); offset += layoutLength; } /** Returns the next token as recognized by the scanner or END_SYMBOL if the * end of input is reached. Subsequent calls will always return the same * value when match() is not called. * * @throws ParseError on unrecognized tokens. */ private String next(RunAutomaton r) { String symbol = END_SYMBOL; // "In context-free syntax, layout is allowed between symbols in the // left-hand side of the productions, by automatically inserting // optional layout (e.g. LAYOUT?) between them" skipLayout(); // match actual symbol. int length = r.run(input, offset); if (length >= 0) { symbol = input.substring(offset, offset + length); } // if no symbol matched and this is not the end of string, fail if (length < 0 && offset < input.length()) { throw new ParseError("Unrecognized non-token"); } return symbol; } /** Consumes the symbol and advanced the input pointer. */ private void match(String symbol) { skipLayout(); if (END_SYMBOL.equals(symbol)) { if (offset != input.length()) { throw new ParseError("Cannot match end!"); } } else { if (!input.startsWith(symbol, offset)) { throw new ParseError("Cannot match symbol!"); } offset += symbol.length(); } } /** Exception that is raised for parser errors. */ private static class ParseError extends RuntimeException { private static final long serialVersionUID = 1L; ParseError(String message) { super(message); } } /* Convenience functions for testing. */ private void check(String input, boolean expectOk) { boolean accepted = parse(input); System.out.println("Testing input: " + input); if (expectOk && !accepted || !expectOk && accepted) { throw new RuntimeException("Unexpected result for " + input); } } private void check(String[] input, boolean expectOk) { check(String.join("\n", input), expectOk); } private void assertOk(String... input) { check(input, true); } private void assertFail(String... input) { check(input, false); } public static void main(String[] args) { PicoRec rec = new PicoRec(); // minimal program satisfying the language, without whitespace. rec.assertOk("begindeclare|end"); // simple expression rec.assertOk("begin declare a | a := 1; end"); // Failure cases rec.assertFail(""); rec.assertFail("begin"); rec.assertFail("begin declare"); rec.assertFail("begin declare |"); rec.assertFail("begin declare a | a := 1 end"); rec.assertFail("begin declare | end end"); rec.assertFail("begin declare end"); rec.assertFail("begin end"); rec.assertFail("end"); } }