/**
 * Recognizer for the language described by grammar-ll1.txt.
 */
import dk.brics.automaton.RegExp;
import dk.brics.automaton.RunAutomaton;

public class PicoRec {

    /** Regular expression describing the ID symbol. */
    private final static String idRegex = "[a-z][a-z0-9]*";
    /** Regular expression describing the NAT symbol. */
    private final static String natRegex = "0|[1-9][0-9]*";

    /** Regular expression matches all tokens accepted by the language. */
    private final static String tokenRegex =
        "[\\|,;\\+\\*\\-\\(\\)]|:=|" + idRegex + "|" + natRegex;
    /** Regular expression matching layout tokens (to be ignored). */
    private final static String layoutRegex = "[ \n]*";

    /** Indicator for the end of input. This symbol does not occur in the
     * terminals and non-terminals. */
    private final static String END_SYMBOL = "$";

    private final RunAutomaton idR, natR, tokenR, layoutR;

    /** The current parsing state. */
    private String input;
    private int offset;

    public PicoRec() {
        idR = new RunAutomaton(new RegExp(idRegex).toAutomaton());
        natR = new RunAutomaton(new RegExp(natRegex).toAutomaton());
        tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton());
        layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton());
    }

    /**
     * Try to parse the input.
     *
     * @throws ParseError if the language is not recognized.
     */
    public void parse(String text) {
        input = text;
        offset = 0;
        match("begin");
        match("declare");
        parseIdList();
        match("|");
        parseStatements();
        match("end");
        match(END_SYMBOL);
    }

    /** Returns true if the next token is a non-keyword identifier. */
    private boolean isId(String token) {
        boolean result = !("begin".equals(token) || "declare".equals(token) ||
                "end".equals(token)) && idR.run(token);
        //System.err.println("Testing for ID: " + token + ": " + result);
        return result;
    }

    private void parseIdList() {
        // Parses IDLIST ::= ID "," |
        while (isId(next())) {
            match(next()); // ID
            match(",");
        }
    }

    private void parseStatements() {
        // Parses STATEMENTS ::= STATEMENT ";" |
        // => STATEMENTS ::= ID ":=" EXP ";" |
        while (isId(next())) {
            match(next()); // ID
            match(":=");
            parseExp();
            match(";");
        }
    }

    private void parseExp() {
        // Parses EXP ::= PROD-EXP PLUS-EXP'
        parseProdExp();
        parsePlusExpPrefixed();
    }

    private void parsePlusExp() {
        // Parses PLUS-EXP ::= PROD-EXP PLUS-EXP'
        parseProdExp();
        parsePlusExpPrefixed();
    }

    private void parsePlusExpPrefixed() {
        // Parses PLUS-EXP' ::= "+" PROD-EXP PLUS-EXP' |
        while ("+".equals(next())) {
            match("+");
            parseProdExp();
            parsePlusExpPrefixed();
        }
    }

    private void parseProdExp() {
        // Parses PROD-EXP ::= SINGLE-EXP PROD-EXP'
        parseSingleExp();
        parseProdExpPrefixed();
    }

    private void parseProdExpPrefixed() {
        // Parses PROD-EXP' ::= "*" SINGLE-EXP PROD-EXP' |
        while ("*".equals(next())) {
            match("*");
            parseSingleExp();
            parseProdExpPrefixed();
        }
    }

    private void parseSingleExp() {
        // Parses SINGLE-EXP ::= "-" SINGLE-EXP | ID | NAT | "(" EXP ")"
        String token = next();
        if ("-".equals(token)) {
            match("-");
            parseSingleExp();
        } else if (isId(token)) {
            match(token);
        } else if (natR.run(token)) {
            match(token);
        } else if ("(".equals(token)) {
            match("(");
            parseExp();
            match(")");
        } else {
            throw new ParseError("Invalid expression");
        }
    }

    /** Skip layout characters. */
    private void skipLayout() {
        int layoutLength = layoutR.run(input, offset);
        assert(layoutLength >= 0);
        offset += layoutLength;
    }

    /** Returns the next token as recognized by the scanner or END_SYMBOL if the
     * end of input is reached. Subsequent calls will always return the same
     * value when match() is not called.
     *
     * @throws ParseError on unrecognized tokens.
     */
    private String next() {
        String symbol = END_SYMBOL;
        // "In context-free syntax, layout is allowed between symbols in the
        // left-hand side of the productions, by automatically inserting
        // optional layout (e.g. LAYOUT?) between them"
        skipLayout();

        // match actual symbol.
        int length = tokenR.run(input, offset);
        if (length >= 0) {
            symbol = input.substring(offset, offset + length);
        }

        // if no symbol matched and this is not the end of string, fail
        if (length < 0 && offset < input.length()) {
            throw new ParseError("Unrecognized non-token: " + input.substring(offset));
        }
        return symbol;
    }

    /** Consumes the symbol and advanced the input pointer. */
    private void match(String symbol) {
        skipLayout();
        if (END_SYMBOL.equals(symbol)) {
            if (offset != input.length()) {
                throw new ParseError("Cannot match end!");
            }
        } else {
            if (!input.startsWith(symbol, offset)) {
                //System.err.println("Current input: " + input.substring(offset));
                throw new ParseError("Cannot match symbol: " + symbol);
            }
            offset += symbol.length();
        }
    }

    /** Exception that is raised for parser errors. */
    public static class ParseError extends RuntimeException {
        private static final long serialVersionUID = 1L;
        private ParseError(String message) {
            super(message);
        }
    }


    /* Convenience functions for testing. */

    private void check(String input, boolean expectOk) {
        System.out.println("Testing input: " + input);
        try {
            parse(input);
            if (!expectOk) {
                throw new RuntimeException("Unexpected pass for " + input);
            }
        } catch (ParseError ex) {
            if (expectOk) {
                throw new RuntimeException(ex);
            }
        }
    }

    private void assertOk(String input) {
        check(input, true);
    }

    private void assertFail(String input) {
        check(input, false);
    }

    public static void main(String[] args) {
        PicoRec rec = new PicoRec();

        // minimal program satisfying the language, without whitespace.
        rec.assertOk("begindeclare|end");
        // simple expression
        rec.assertOk("begin declare a, | a := 1; end");
        // other tests
        rec.assertOk("begindeclarea,|a:=1;end");
        rec.assertOk("  begin  declare a , |  a  :=  1 ;  end \n");
        // substrings of keywords in IDs should be accepted
        rec.assertOk("begin declare a, | a := endname; end");
        rec.assertOk("begin declare declarative, | a := 1; end");
        rec.assertOk("begin declare abc, | abegin := 1; end");
        // basic cases
        rec.assertOk("begin declare a, | end");
        rec.assertOk("begin declare | a := 1; end");
        rec.assertOk("begin declare a, | a := -a; end");
        rec.assertOk("begin declare a, | a := abc; end");
        rec.assertOk("begin declare a, | a := 123; end");
        rec.assertOk("begin declare a, | a := (a); end");
        // more complicated expressions
        rec.assertOk("begin declare a, | a := 2 * -3 + abc + def; end");
        rec.assertOk("begin declare a,b,c, | a := b + -(c*c*c) + -c; end");
        rec.assertOk("begin declare a,b, | a := -a; b := 1+23+456; end");
        rec.assertOk("begin declare a, | a := ---((a)); end");

        // Failure cases
        rec.assertFail("");
        rec.assertFail("begin");
        rec.assertFail("begin declare");
        rec.assertFail("begin declare |");
        rec.assertFail("begin declare a, | a := ; end");
        rec.assertFail("begin declare a | a := 1; end");
        rec.assertFail("begin declare a, | a := 1 end");
        rec.assertFail("begin declare a,b | a := 1; end");
        rec.assertFail("begin declare a, | a := 1;; end");
        rec.assertFail("begin declare | end end");
        rec.assertFail("begin declare | endx");
        rec.assertFail("begin declare end");
        rec.assertFail("begin end");
        rec.assertFail("end");
        // Failures in expression
        rec.assertFail("begin declare a, | a := -; end");
        rec.assertFail("begin declare a, | a := -+1; end");
        rec.assertFail("begin declare a, | a := ; end");
        rec.assertFail("begin declare a, | a := (a; end");
        rec.assertFail("begin declare a, | a := (a)); end");
        rec.assertFail("begin declare a, | a := end; end");
        rec.assertFail("begin declare a, | a := end; end");
        // Keywords should not be used as identifiers
        rec.assertFail("begin declare begin, | a := 1; end");
        rec.assertFail("begin declare a, | declare := 1; end");
        rec.assertFail("begin declare a, | a := end; end");
        // Whitespace is cannot split identifiers
        rec.assertFail("begin dec lare abc, | end");
        rec.assertFail("begin declare a bc, | end");
        // Garbage input
        rec.assertFail("begin declare A, | end");
        rec.assertFail("begin declare a, | end,");
        rec.assertFail("begin declare a, | end#");
        rec.assertFail("begin declare a, | end$");
    }
}