/**
 * Recognizer for the language described by grammar-ll1.txt.
 */
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;

public class PicoRec {

    /** Regular expression describing the ID symbol. */
    private final static String idRegex = "[a-z][a-z0-9]*";
    /** Regular expression describing the NAT symbol. */
    private final static String natRegex = "0|[1-9][0-9]*";

    /** Regular expression describing keywords (not to be interpreted as ID). */
    private final static String keywordRegex =
        literalRegex("begin|declare|end");
    /** Regular expression matches all tokens accepted by the language (other
     * than keywords). */
    private final static String tokenRegex =
        literalRegex("[\\|,;\\+\\*\\-]|:=|" + idRegex + "|" + natRegex);
    /** Regular expression matching layout tokens (to be ignored). */
    private final static String layoutRegex = "[ \n]*";

    /** Indicator for the end of input. This symbol does not occur in the
     * terminals and non-terminals. */
    private final static String END_SYMBOL = "$";

    private final RunAutomaton idR, natR, keywordR, tokenR, layoutR;

    /** The current parsing state. */
    private String input;
    private int offset;

    /** Auxilliary function to produce a regular expression taking into account
     * layout considerations for symbols. */
    private static String literalRegex(String regex) {
        return layoutRegex + "(" + regex + ")" + layoutRegex;
    }

    public PicoRec() {
        idR = new RunAutomaton(new RegExp(idRegex).toAutomaton());
        natR = new RunAutomaton(new RegExp(natRegex).toAutomaton());
        keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton());
        tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton());
        layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton());
    }

    /**
     * Try to parse the input.
     *
     * @throws ParseError if the language is not recognized.
     */
    public void parse(String text) {
        input = text;
        offset = 0;
    }

    /** Skip layout characters. */
    private void skipLayout() {
        int layoutLength = layoutR.run(input, offset);
        assert(layoutLength >= 0);
        offset += layoutLength;
    }

    /** Returns the next token as recognized by the scanner or END_SYMBOL if the
     * end of input is reached. Subsequent calls will always return the same
     * value when match() is not called.
     *
     * @throws ParseError on unrecognized tokens.
     */
    private String next(RunAutomaton r) {
        String symbol = END_SYMBOL;
        // "In context-free syntax, layout is allowed between symbols in the
        // left-hand side of the productions, by automatically inserting
        // optional layout (e.g. LAYOUT?) between them"
        skipLayout();

        // match actual symbol.
        int length = r.run(input, offset);
        if (length >= 0) {
            symbol = input.substring(offset, offset + length);
        }

        // if no symbol matched and this is not the end of string, fail
        if (length < 0 && offset < input.length()) {
            throw new ParseError("Unrecognized non-token");
        }
        return symbol;
    }

    /** Consumes the symbol and advanced the input pointer. */
    private void match(String symbol) {
        skipLayout();
        if (END_SYMBOL.equals(symbol)) {
            if (offset != input.length()) {
                throw new ParseError("Cannot match end!");
            }
        } else {
            if (!input.startsWith(symbol, offset)) {
                throw new ParseError("Cannot match symbol!");
            }
            offset += symbol.length();
        }
    }

    /** Exception that is raised for parser errors. */
    public static class ParseError extends RuntimeException {
        private static final long serialVersionUID = 1L;
        private ParseError(String message) {
            super(message);
        }
    }


    /* Convenience functions for testing. */

    private void check(String input, boolean expectOk) {
        System.out.println("Testing input: " + input);
        try {
            parse(input);
            if (!expectOk) {
                throw new RuntimeException("Unexpected pass for " + input);
            }
        } catch (ParseError ex) {
            if (expectOk) {
                throw new RuntimeException(ex);
            }
        }
    }

    private void check(String[] input, boolean expectOk) {
        check(String.join("\n", input), expectOk);
    }

    private void assertOk(String... input) {
        check(input, true);
    }

    private void assertFail(String... input) {
        check(input, false);
    }

    public static void main(String[] args) {
        PicoRec rec = new PicoRec();

        // minimal program satisfying the language, without whitespace.
        rec.assertOk("begindeclare|end");
        // simple expression
        rec.assertOk("begin declare a | a := 1; end");

        // Failure cases
        rec.assertFail("");
        rec.assertFail("begin");
        rec.assertFail("begin declare");
        rec.assertFail("begin declare |");
        rec.assertFail("begin declare a | a := 1 end");
        rec.assertFail("begin declare | end end");
        rec.assertFail("begin declare end");
        rec.assertFail("begin end");
        rec.assertFail("end");
    }
}