Snort: use GRegex to find pcre matches

Change-Id: Ie08bc1f3139ebe5564365e662f89257ad8d5b129 Reviewed-on: https://code.wireshark.org/review/20177 Petri-Dish: Martin Mathieson <martin.r.mathieson@googlemail.com> Tested-by: Petri Dish Buildbot <buildbot-no-reply@wireshark.org> Reviewed-by: Martin Mathieson <martin.r.mathieson@googlemail.com>
author: Martin Mathieson <martin.r.mathieson@googlemail.com> 2017-02-19 02:32:04 -0800
committer: Martin Mathieson <martin.r.mathieson@googlemail.com> 2017-02-21 20:03:45 +0000
commit: b9851c740836760ab36c4862f82f3f8cc83fde27 (patch)
tree: b07689270ab5202b657d3bdfd1a4de6c443377d7
parent: 012a179785abada629fa324652755c6acb51be74 (diff)
download: wireshark-b9851c740836760ab36c4862f82f3f8cc83fde27.tar.gz
3 files changed, 135 insertions, 10 deletions
diff --git a/epan/dissectors/packet-snort.c b/epan/dissectors/packet-snort.c
index 567c9d0f5c..991895c6cb 100644
--- a/epan/dissectors/packet-snort.c
+++ b/epan/dissectors/packet-snort.c
@@ -289,6 +289,57 @@ static gboolean content_compare_case_insensitive(const guint8* memory, const cha
 }
 
 
+/* Move through the bytes of the tvbuff, looking for a match against the
+ * regexp from the given content.
+ */
+static gboolean look_for_pcre(content_t *content, tvbuff_t *tvb _U_, guint start_offset _U_, guint *match_offset _U_, guint *match_length _U_)
+{
+    /* Create a regex object for the pcre in the content. */
+    GRegex *regex;
+    GMatchInfo *match_info;
+    gboolean match_found = FALSE;
+
+    /* Make sure pcre string is ready for regex library. */
+    if (!content_convert_pcre_for_regex(content)) {
+        return FALSE;
+    }
+
+    /* Copy remaining bytes into NULL-terminated string. */
+    int length_remaining = tvb_captured_length_remaining(tvb, start_offset);
+    gchar *string = (gchar*)g_malloc(length_remaining + 1);
+    tvb_memcpy(tvb, (void*)string, start_offset, length_remaining);
+    string[length_remaining] = '\0';
+
+    /* Create regex */
+    /* For pcre, translated_str already has / /[modifiers] removed.. */
+    regex = g_regex_new(content->translated_str,
+                        content->pcre_case_insensitive ? G_REGEX_CASELESS : (GRegexCompileFlags)0,
+                        (GRegexMatchFlags)0, NULL);
+
+    /* Lookup PCRE match */
+    g_regex_match(regex, string, (GRegexMatchFlags)0, &match_info);
+    /* Only first match needed */
+    /* TODO: need to restart at any NULL before the final end? */
+    if (g_match_info_matches(match_info)) {
+        gint start_pos, end_pos;
+
+        /* Find out where the match is */
+        g_match_info_fetch_pos(match_info,
+                               0, /* match_num */
+                               &start_pos, &end_pos);
+
+        *match_offset = start_offset + start_pos;
+        *match_length = end_pos - start_pos;
+        match_found = TRUE;
+    }
+
+    g_match_info_free(match_info);
+    g_regex_unref(regex);
+    g_free(string);
+
+    return match_found;
+}
+
 /* Move through the bytes of the tvbuff, looking for a match against the expanded
    binary contents of this content object.
  */
@@ -303,14 +354,14 @@ static gboolean look_for_content(content_t *content, tvbuff_t *tvb, guint start_
     for (guint m=start_offset; m <= (tvb_len-converted_content_length); m++) {
         const guint8 *ptr = tvb_get_ptr(tvb, m, converted_content_length);
         if (content->nocase) {
-            if (content_compare_case_insensitive(ptr, content->binary_str, content->translated_length)) {
+            if (content_compare_case_insensitive(ptr, content->translated_str, content->translated_length)) {
                 *match_offset = m;
                 *match_length = content->translated_length;
                 return TRUE;
             }
         }
         else {
-            if (content_compare_case_sensitive(ptr, content->binary_str, content->translated_length)) {
+            if (content_compare_case_sensitive(ptr, content->translated_str, content->translated_length)) {
                 *match_offset = m;
                 *match_length = content->translated_length;
                 return TRUE;
@@ -342,7 +393,12 @@ static gboolean get_content_match(Alert_t *alert, guint content_idx,
     content = &(rule->contents[content_idx]);
 
     /* Look for content match in the packet */
-    return look_for_content(content, tvb, content_start_match, match_offset, match_length);
+    if (content->content_type == Pcre) {
+        return look_for_pcre(content, tvb, content_start_match, match_offset, match_length);
+    }
+    else {
+        return look_for_content(content, tvb, content_start_match, match_offset, match_length);
+    }
 }
 
 
@@ -843,7 +899,7 @@ static void snort_show_alert(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo
                 case Pcre:
                     content_hf_item = hf_snort_pcre;
                     content_text_template = "Pcre: \"%s\"";
-                    attempt_match = FALSE;
+                    attempt_match = TRUE;
                     break;
                 default:
                     continue;
@@ -864,6 +920,7 @@ static void snort_show_alert(proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo
                     offset_to_add = (content_last_match_end-content_start_match) + rule->contents[n].distance;
                 }
 
+
                 /* Now actually look for match from calculated position */
                 /* TODO: could take 'depth' and 'within' into account to limit extent of search,
                    but OK if just trying to verify what Snort already found. */
diff --git a/epan/dissectors/snort-config.c b/epan/dissectors/snort-config.c
index 1d39d8899f..24d9a99e72 100644
--- a/epan/dissectors/snort-config.c
+++ b/epan/dissectors/snort-config.c
@@ -652,7 +652,17 @@ static void process_rule_option(Rule_t *rule, char *options, int option_start_of
         rule_set_http_uri(rule);
     }
     else if (strcmp(name, "pcre") == 0) {
-        rule_add_pcre(rule, value);
+        int value_start = 0;
+
+        /* Need at least opening and closing / */
+        if (value_length < 3) {
+            return;
+        }
+
+        /* Not expecting negation (!)... */
+
+        value[options_end_offset-colon_offset-2] = '\0';
+        rule_add_pcre(rule, value+value_start+1);
     }
     else if (strcmp(name, "nocase") == 0) {
         rule_set_content_nocase(rule);
@@ -792,7 +802,7 @@ static gboolean delete_rule(gpointer  key _U_,
 
     for (n=0; n < rule->number_contents; n++) {
         g_free(rule->contents[n].str);
-        g_free(rule->contents[n].binary_str);
+        g_free(rule->contents[n].translated_str);
     }
 
     for (n=0; n < rule->number_references; n++) {
@@ -1081,14 +1091,67 @@ guint content_convert_to_binary(content_t *content)
     }
 
     /* Store result for next time. */
-    content->binary_str = (guchar*)g_malloc(output_idx+1);
-    memcpy(content->binary_str, binary_str, output_idx+1);
+    content->translated_str = (guchar*)g_malloc(output_idx+1);
+    memcpy(content->translated_str, binary_str, output_idx+1);
     content->translated = TRUE;
     content->translated_length = output_idx;
 
     return output_idx;
 }
 
+/* In order to use glib's regex library, need to trim
+  '/' delimiters and any modifiers from the end of the string */
+gboolean content_convert_pcre_for_regex(content_t *content)
+{
+    guint pcre_length, i, end_delimiter_offset = 0;
+
+    /* Return if already converted */
+    if (content->translated_str) {
+        return TRUE;
+    }
+
+    pcre_length = (guint)strlen(content->str);
+
+    /* Start with content->str */
+    if (pcre_length < 3) {
+        /* Can't be valid.  Expect /regex/[modifiers] */
+        return FALSE;
+    }
+
+    /* Verify that string starts with / */
+    if (content->str[0] != '/') {
+        return FALSE;
+    }
+
+    /* Next, look for closing / near end of string */
+    for (i=pcre_length-1; i > 2; i--) {
+        if (content->str[i] == '/') {
+            end_delimiter_offset = i;
+            break;
+        }
+        else {
+            if (content->str[i] == 'i') {
+                content->pcre_case_insensitive = TRUE;
+            }
+            /* TODO: note/handle other common modifiers (s/m/?) */
+        }
+
+    }
+    if (end_delimiter_offset == 0) {
+        /* Didn't find it */
+        return FALSE;
+    }
+
+    /* Store result for next time. */
+    content->translated_str = (guchar*)g_malloc(end_delimiter_offset);
+    memcpy(content->translated_str, content->str+1, end_delimiter_offset - 1);
+    content->translated_str[end_delimiter_offset-1] = '\0';
+    content->translated = TRUE;
+    content->translated_length = end_delimiter_offset - 1;
+
+    return TRUE;
+}
+
 /*
  * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
  *
diff --git a/epan/dissectors/snort-config.h b/epan/dissectors/snort-config.h
index ec0c23c761..acbd1fda7b 100644
--- a/epan/dissectors/snort-config.h
+++ b/epan/dissectors/snort-config.h
@@ -66,10 +66,13 @@ typedef struct content_t {
     gboolean http_client_body;
     gboolean http_cookie;
 
-    /* Pattern converted into bytes for matching against packet */
-    guchar   *binary_str;
+    /* Pattern converted into bytes for matching against packet.
+       Used for regular patterns and PCREs alike. */
+    guchar   *translated_str;
     gboolean translated;
     guint    translated_length;
+
+    gboolean pcre_case_insensitive;
 } content_t;
 
 /* This is to keep track of a variable referenced by a rule */
@@ -178,6 +181,8 @@ void reset_global_rule_stats(SnortConfig_t *snort_config);
 /* Expanding a content field string to the expected binary bytes */
 guint content_convert_to_binary(content_t *content);
 
+gboolean content_convert_pcre_for_regex(content_t *content);
+
 #endif
 
 /*
author	Martin Mathieson <martin.r.mathieson@googlemail.com>	2017-02-19 02:32:04 -0800
committer	Martin Mathieson <martin.r.mathieson@googlemail.com>	2017-02-21 20:03:45 +0000
commit	b9851c740836760ab36c4862f82f3f8cc83fde27 (patch)
tree	b07689270ab5202b657d3bdfd1a4de6c443377d7
parent	012a179785abada629fa324652755c6acb51be74 (diff)
download	wireshark-b9851c740836760ab36c4862f82f3f8cc83fde27.tar.gz