summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/PicoRec.java101
1 files changed, 96 insertions, 5 deletions
diff --git a/src/PicoRec.java b/src/PicoRec.java
index eec2f38..f601ce3 100644
--- a/src/PicoRec.java
+++ b/src/PicoRec.java
@@ -11,11 +11,38 @@ public class PicoRec {
/** Regular expression describing the NAT symbol. */
private final static String natRegex = "0|[1-9][0-9]*";
- private final RunAutomaton idR, natR;
+ /** Regular expression describing keywords (not to be interpreted as ID). */
+ private final static String keywordRegex =
+ literalRegex("begin|declare|end");
+ /** Regular expression matches all tokens accepted by the language (other
+ * than keywords). */
+ private final static String tokenRegex =
+ literalRegex("[\\|,;\\+\\*\\-]|:=|" + idRegex + "|" + natRegex);
+ /** Regular expression matching layout tokens (to be ignored). */
+ private final static String layoutRegex = "[ \n]*";
+
+ /** Indicator for the end of input. This symbol does not occur in the
+ * terminals and non-terminals. */
+ private final static String END_SYMBOL = "$";
+
+ private final RunAutomaton idR, natR, keywordR, tokenR, layoutR;
+
+ /** The current parsing state. */
+ private String input;
+ private int offset;
+
+ /** Auxilliary function to produce a regular expression taking into account
+ * layout considerations for symbols. */
+ private static String literalRegex(String regex) {
+ return layoutRegex + "(" + regex + ")" + layoutRegex;
+ }
public PicoRec() {
idR = new RunAutomaton(new RegExp(idRegex).toAutomaton());
natR = new RunAutomaton(new RegExp(natRegex).toAutomaton());
+ keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton());
+ tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton());
+ layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton());
}
/**
@@ -24,8 +51,71 @@ public class PicoRec {
* @return true if the language is recognized, false otherwise.
*/
public boolean parse(String text) {
- // TODO implement this
- return false;
+ input = text;
+ offset = 0;
+ try {
+ // TODO implement this
+ return true;
+ } catch (ParseError ex) {
+ System.err.println("Parsing failed due to: " + ex);
+ return false;
+ }
+ }
+
+ /** Skip layout characters. */
+ private void skipLayout() {
+ int layoutLength = layoutR.run(input, offset);
+ assert(layoutLength >= 0);
+ offset += layoutLength;
+ }
+
+ /** Returns the next token as recognized by the scanner or END_SYMBOL if the
+ * end of input is reached. Subsequent calls will always return the same
+ * value when match() is not called.
+ *
+ * @throws ParseError on unrecognized tokens.
+ */
+ private String next(RunAutomaton r) {
+ String symbol = END_SYMBOL;
+ // "In context-free syntax, layout is allowed between symbols in the
+ // left-hand side of the productions, by automatically inserting
+ // optional layout (e.g. LAYOUT?) between them"
+ skipLayout();
+
+ // match actual symbol.
+ int length = r.run(input, offset);
+ if (length >= 0) {
+ symbol = input.substring(offset, offset + length);
+ }
+
+ // if no symbol matched and this is not the end of string, fail
+ if (length < 0 && offset < input.length()) {
+ throw new ParseError("Unrecognized non-token");
+ }
+ return symbol;
+ }
+
+ /** Consumes the symbol and advanced the input pointer. */
+ private void match(String symbol) {
+ skipLayout();
+ if (END_SYMBOL.equals(symbol)) {
+ if (offset != input.length()) {
+ throw new ParseError("Cannot match end!");
+ }
+ } else {
+ if (!input.startsWith(symbol, offset)) {
+ throw new ParseError("Cannot match symbol!");
+ }
+ offset += symbol.length();
+ }
+ }
+
+ /** Exception that is raised for parser errors. */
+ private static class ParseError extends RuntimeException {
+ private static final long serialVersionUID = 1L;
+ ParseError(String message) {
+ super(message);
+ }
}
@@ -57,13 +147,14 @@ public class PicoRec {
// minimal program satisfying the language, without whitespace.
rec.assertOk("begindeclare|end");
// simple expression
- rec.assertOk("begin declare a | a := 1 end");
+ rec.assertOk("begin declare a | a := 1; end");
- // Trivial failures
+ // Failure cases
rec.assertFail("");
rec.assertFail("begin");
rec.assertFail("begin declare");
rec.assertFail("begin declare |");
+ rec.assertFail("begin declare a | a := 1 end");
rec.assertFail("begin declare | end end");
rec.assertFail("begin declare end");
rec.assertFail("begin end");