From e1c97dd440827a9b6815fb337cbf8629e5c92156 Mon Sep 17 00:00:00 2001
From: Roland Knall <roland.knall@br-automation.com>
Date: Mon, 4 Jan 2016 14:19:55 +0100
Subject: extcap: Rewrite the tokenizer to use regexps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change the tokenizer to two regular expressions, which
make the parsing of the sentence strings a lot safer and faster.

Change-Id: I444adb8db10b689dd387c0caa951981ba28be917
Reviewed-on: https://code.wireshark.org/review/13040
Reviewed-by: Roland Knall <rknall@gmail.com>
Reviewed-by: Stig Bjørlykke <stig@bjorlykke.org>
---
 extcap_parser.c | 164 ++++++++++++++++++++++++--------------------------------
 1 file changed, 71 insertions(+), 93 deletions(-)

(limited to 'extcap_parser.c')

diff --git a/extcap_parser.c b/extcap_parser.c
index d884298a3a..ac75b50ab3 100644
--- a/extcap_parser.c
+++ b/extcap_parser.c
@@ -270,9 +270,10 @@ void extcap_free_tokenized_sentence_list(extcap_token_sentence *f) {
 }
 
 extcap_token_sentence *extcap_tokenize_sentence(const gchar *s) {
-    gchar *b, *e, *eq;
-
     extcap_token_param *tv = NULL;
+    GRegex * regex = NULL;
+    GMatchInfo * match_info = NULL;
+    GError * error = NULL;
 
     extcap_token_sentence *rs = g_new(extcap_token_sentence, 1);
 
@@ -280,106 +281,83 @@ extcap_token_sentence *extcap_tokenize_sentence(const gchar *s) {
     rs->next_sentence = NULL;
     rs->param_list = NULL;
 
-    if ((b = g_strstr_len(s, -1, " ")) == NULL) {
-        extcap_free_tokenized_sentence(rs);
-        return NULL ;
-    }
+    /* Regex for catching just the allowed values for sentences */
+    if ( ( regex = g_regex_new ( "^[\\t| ]*(arg|value|interface|dlt)(?=[\\t| ]+\\{)",
+            (GRegexCompileFlags) G_REGEX_CASELESS, (GRegexMatchFlags) 0, NULL ) ) != NULL ) {
+        g_regex_match ( regex, s, (GRegexMatchFlags) 0, &match_info );
 
-    rs->sentence = g_strndup(s, b - s);
+        if ( g_match_info_matches ( match_info ) )
+            rs->sentence = g_match_info_fetch(match_info, 0);
 
-    if ((b = g_strstr_len(s, -1, "{")) == NULL) {
-        /* printf("debug - tokenizer - sentence with no values\n"); */
+        g_match_info_free ( match_info );
+        g_regex_unref ( regex );
+    }
+    /* No valid sentence found, exiting here */
+    if ( rs->sentence == NULL ) {
         extcap_free_tokenized_sentence(rs);
-        return NULL ;
+        return NULL;
     }
 
-    while (b != NULL ) {
-        if ((e = g_strstr_len(b, -1, "}")) == NULL) {
-            /* printf("debug - tokenizer - invalid, missing }\n"); */
-            extcap_free_tokenized_sentence(rs);
-            return NULL ;
-        }
-
-        /* caught a regex quantifier end bracket and not the end of the line.
-         * let's find the correct end bracket */
-        if ( *(e+1) != '{' && strlen ( e ) > 1 ) {
-            gchar *f = (e + 1);
-
-            while ( ( f = g_strstr_len(f, -1, "}") ) != NULL) {
-                if ( strlen ( f ) <= 1 || *(f+1) == '{' )
-                    break;
-                f++;
+    /* Capture the argument and the value of the list. This will ensure,
+     * that regex patterns given to {validation=} are parsed correctly,
+     * as long as }{ does not occur within the pattern */
+    regex = g_regex_new ( "\\{([a-zA-Z_-]*?)\\=(.*?)\\}(?=\\{|$|\\s)",
+            (GRegexCompileFlags) G_REGEX_CASELESS, (GRegexMatchFlags) 0, NULL );
+    if ( regex != NULL ) {
+        g_regex_match_full(regex, s, -1, 0, (GRegexMatchFlags) 0, &match_info, &error );
+        while(g_match_info_matches(match_info)) {
+            gchar * arg = g_match_info_fetch ( match_info, 1 );
+
+            if ( arg == NULL )
+                break;
+
+            tv = g_new(extcap_token_param, 1);
+            tv->arg = arg;
+            tv->value = g_match_info_fetch ( match_info, 2 );
+
+            if (g_ascii_strcasecmp(tv->arg, "number") == 0) {
+                tv->param_type = EXTCAP_PARAM_ARGNUM;
+            } else if (g_ascii_strcasecmp(tv->arg, "call") == 0) {
+                tv->param_type = EXTCAP_PARAM_CALL;
+            } else if (g_ascii_strcasecmp(tv->arg, "display") == 0) {
+                tv->param_type = EXTCAP_PARAM_DISPLAY;
+            } else if (g_ascii_strcasecmp(tv->arg, "type") == 0) {
+                tv->param_type = EXTCAP_PARAM_TYPE;
+            } else if (g_ascii_strcasecmp(tv->arg, "arg") == 0) {
+                tv->param_type = EXTCAP_PARAM_ARG;
+            } else if (g_ascii_strcasecmp(tv->arg, "default") == 0) {
+                tv->param_type = EXTCAP_PARAM_DEFAULT;
+            } else if (g_ascii_strcasecmp(tv->arg, "value") == 0) {
+                tv->param_type = EXTCAP_PARAM_VALUE;
+            } else if (g_ascii_strcasecmp(tv->arg, "range") == 0) {
+                tv->param_type = EXTCAP_PARAM_RANGE;
+            } else if (g_ascii_strcasecmp(tv->arg, "tooltip") == 0) {
+                tv->param_type = EXTCAP_PARAM_TOOLTIP;
+            } else if (g_ascii_strcasecmp(tv->arg, "mustexist") == 0) {
+                tv->param_type = EXTCAP_PARAM_FILE_MUSTEXIST;
+            } else if (g_ascii_strcasecmp(tv->arg, "fileext") == 0) {
+                tv->param_type = EXTCAP_PARAM_FILE_EXTENSION;
+            } else if (g_ascii_strcasecmp(tv->arg, "name") == 0) {
+                tv->param_type = EXTCAP_PARAM_NAME;
+            } else if (g_ascii_strcasecmp(tv->arg, "enabled") == 0) {
+                tv->param_type = EXTCAP_PARAM_ENABLED;
+            } else if (g_ascii_strcasecmp(tv->arg, "parent") == 0) {
+                tv->param_type = EXTCAP_PARAM_PARENT;
+            } else if (g_ascii_strcasecmp(tv->arg, "required") == 0) {
+                tv->param_type = EXTCAP_PARAM_REQUIRED;
+            } else if (g_ascii_strcasecmp(tv->arg, "validation") == 0) {
+                tv->param_type = EXTCAP_PARAM_VALIDATION;
+            } else {
+                tv->param_type = EXTCAP_PARAM_UNKNOWN;
             }
 
-            if ( f != NULL )
-                e = f;
-        }
+            tv->next_token = rs->param_list;
+            rs->param_list = tv;
 
-        if ((eq = g_strstr_len(b, -1, "=")) == NULL) {
-            /* printf("debug - tokenizer - invalid, missing =\n"); */
-            extcap_free_tokenized_sentence(rs);
-            return NULL ;
+            g_match_info_next(match_info, &error);
         }
-
-        b++;
-        e--;
-
-        if (b >= eq || e <= eq) {
-            /* printf("debug - tokenizer - invalid, missing arg or value in {}\n"); */
-            extcap_free_tokenized_sentence(rs);
-            return NULL ;
-        }
-
-        tv = g_new(extcap_token_param, 1);
-        tv->arg = g_strndup(b, eq - b);
-        tv->value = g_strndup(eq + 1, e - eq);
-
-        if (g_ascii_strcasecmp(tv->arg, "number") == 0) {
-            tv->param_type = EXTCAP_PARAM_ARGNUM;
-        } else if (g_ascii_strcasecmp(tv->arg, "call") == 0) {
-            tv->param_type = EXTCAP_PARAM_CALL;
-        } else if (g_ascii_strcasecmp(tv->arg, "display") == 0) {
-            tv->param_type = EXTCAP_PARAM_DISPLAY;
-        } else if (g_ascii_strcasecmp(tv->arg, "type") == 0) {
-            tv->param_type = EXTCAP_PARAM_TYPE;
-        } else if (g_ascii_strcasecmp(tv->arg, "arg") == 0) {
-            tv->param_type = EXTCAP_PARAM_ARG;
-        } else if (g_ascii_strcasecmp(tv->arg, "default") == 0) {
-            tv->param_type = EXTCAP_PARAM_DEFAULT;
-        } else if (g_ascii_strcasecmp(tv->arg, "value") == 0) {
-            tv->param_type = EXTCAP_PARAM_VALUE;
-        } else if (g_ascii_strcasecmp(tv->arg, "range") == 0) {
-            tv->param_type = EXTCAP_PARAM_RANGE;
-        } else if (g_ascii_strcasecmp(tv->arg, "tooltip") == 0) {
-            tv->param_type = EXTCAP_PARAM_TOOLTIP;
-        } else if (g_ascii_strcasecmp(tv->arg, "mustexist") == 0) {
-            tv->param_type = EXTCAP_PARAM_FILE_MUSTEXIST;
-        } else if (g_ascii_strcasecmp(tv->arg, "fileext") == 0) {
-            tv->param_type = EXTCAP_PARAM_FILE_EXTENSION;
-        } else if (g_ascii_strcasecmp(tv->arg, "name") == 0) {
-            tv->param_type = EXTCAP_PARAM_NAME;
-        } else if (g_ascii_strcasecmp(tv->arg, "enabled") == 0) {
-            tv->param_type = EXTCAP_PARAM_ENABLED;
-        } else if (g_ascii_strcasecmp(tv->arg, "parent") == 0) {
-            tv->param_type = EXTCAP_PARAM_PARENT;
-        } else if (g_ascii_strcasecmp(tv->arg, "required") == 0) {
-            tv->param_type = EXTCAP_PARAM_REQUIRED;
-        } else if (g_ascii_strcasecmp(tv->arg, "validation") == 0) {
-            tv->param_type = EXTCAP_PARAM_VALIDATION;
-        } else {
-            tv->param_type = EXTCAP_PARAM_UNKNOWN;
-        }
-
-        tv->next_token = rs->param_list;
-        rs->param_list = tv;
-
-        /* printf("debug - tokenizer - got '%s' = '%s'\n", tv->arg, tv->value); */
-
-        b = e + 1;
-        if ((size_t) (b - s) > strlen(s))
-            break;
-
-        b = g_strstr_len(b, -1, "{");
+        g_match_info_free(match_info);
+        g_regex_unref(regex);
     }
 
     return rs;
-- 
cgit v1.2.1