From 35a991bdcc806e58627bc676341931aed0a71432 Mon Sep 17 00:00:00 2001
From: Peter Wu <peter@lekensteyn.nl>
Date: Sun, 8 May 2016 17:40:25 +0200
Subject: PicoRec: begin implementing parsing

Remove keywordRegex, it is not used now. To do: make sure that ID not
match keywords, implement parseExp.
---
 src/PicoRec.java | 47 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/src/PicoRec.java b/src/PicoRec.java
index 191d2f8..1d6e16b 100644
--- a/src/PicoRec.java
+++ b/src/PicoRec.java
@@ -11,9 +11,6 @@ public class PicoRec {
     /** Regular expression describing the NAT symbol. */
     private final static String natRegex = "0|[1-9][0-9]*";
 
-    /** Regular expression describing keywords (not to be interpreted as ID). */
-    private final static String keywordRegex =
-        literalRegex("begin|declare|end");
     /** Regular expression matches all tokens accepted by the language (other
      * than keywords). */
     private final static String tokenRegex =
@@ -25,7 +22,7 @@ public class PicoRec {
      * terminals and non-terminals. */
     private final static String END_SYMBOL = "$";
 
-    private final RunAutomaton idR, natR, keywordR, tokenR, layoutR;
+    private final RunAutomaton idR, natR, tokenR, layoutR;
 
     /** The current parsing state. */
     private String input;
@@ -40,7 +37,6 @@ public class PicoRec {
     public PicoRec() {
         idR = new RunAutomaton(new RegExp(idRegex).toAutomaton());
         natR = new RunAutomaton(new RegExp(natRegex).toAutomaton());
-        keywordR = new RunAutomaton(new RegExp(keywordRegex).toAutomaton());
         tokenR = new RunAutomaton(new RegExp(tokenRegex).toAutomaton());
         layoutR = new RunAutomaton(new RegExp(layoutRegex).toAutomaton());
     }
@@ -53,6 +49,36 @@ public class PicoRec {
     public void parse(String text) {
         input = text;
         offset = 0;
+        match("begin");
+        match("declare");
+        parseIdList();
+        match("|");
+        parseStatements();
+        match("end");
+        match(END_SYMBOL);
+    }
+
+    private void parseIdList() {
+        // Parses IDLIST ::= ID "," |
+        while (idR.run(next())) {
+            match(next()); // ID
+            match(",");
+        }
+    }
+
+    private void parseStatements() {
+        // Parses STATEMENTS ::= STATEMENT ";" |
+        // => STATEMENT ::= ID ":=" EXP ";" |
+        while (idR.run(next())) {
+            match(next()); // ID
+            match(":=");
+            parseExp();
+            match(",");
+        }
+    }
+
+    private void parseExp() {
+        // TODO implement me
     }
 
     /** Skip layout characters. */
@@ -68,7 +94,7 @@ public class PicoRec {
      *
      * @throws ParseError on unrecognized tokens.
      */
-    private String next(RunAutomaton r) {
+    private String next() {
         String symbol = END_SYMBOL;
         // "In context-free syntax, layout is allowed between symbols in the
         // left-hand side of the productions, by automatically inserting
@@ -76,7 +102,7 @@ public class PicoRec {
         skipLayout();
 
         // match actual symbol.
-        int length = r.run(input, offset);
+        int length = tokenR.run(input, offset);
         if (length >= 0) {
             symbol = input.substring(offset, offset + length);
         }
@@ -97,7 +123,7 @@ public class PicoRec {
             }
         } else {
             if (!input.startsWith(symbol, offset)) {
-                throw new ParseError("Cannot match symbol!");
+                throw new ParseError("Cannot match symbol: " + symbol);
             }
             offset += symbol.length();
         }
@@ -146,14 +172,15 @@ public class PicoRec {
         // minimal program satisfying the language, without whitespace.
         rec.assertOk("begindeclare|end");
         // simple expression
-        rec.assertOk("begin declare a | a := 1; end");
+        rec.assertOk("begin declare a, | a := 1; end");
 
         // Failure cases
         rec.assertFail("");
         rec.assertFail("begin");
         rec.assertFail("begin declare");
         rec.assertFail("begin declare |");
-        rec.assertFail("begin declare a | a := 1 end");
+        rec.assertFail("begin declare a | a := 1; end");
+        rec.assertFail("begin declare a, | a := 1 end");
         rec.assertFail("begin declare | end end");
         rec.assertFail("begin declare end");
         rec.assertFail("begin end");
-- 
cgit v1.2.1