summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Zawadzki <darkjames-ws@darkjames.pl>2014-01-07 22:17:32 +0000
committerJakub Zawadzki <darkjames-ws@darkjames.pl>2014-01-07 22:17:32 +0000
commitabda30e9e6d8fd9aa28edc4677796e61a9c88997 (patch)
treeab90b4994f29142a1554c36b6160033b3d499405
parentd1dcee936b2a0ed257c526889b664e2b314d3eb0 (diff)
downloadwireshark-abda30e9e6d8fd9aa28edc4677796e61a9c88997.tar.gz
Fix bug #9618: Invalid utf8 causes JSON dissector assertion failure "g_utf8_validate"
Validate JSON UTF-8 characters, replace with '?' when invalid. svn path=/trunk/; revision=54633
-rw-r--r--epan/dissectors/packet-json.c41
-rw-r--r--wsutil/CMakeLists.txt2
-rw-r--r--wsutil/Makefile.am2
-rw-r--r--wsutil/Makefile.common6
-rw-r--r--wsutil/Makefile.nmake1
-rw-r--r--wsutil/unicode-utils.c21
-rw-r--r--wsutil/unicode-utils.h18
7 files changed, 71 insertions, 20 deletions
diff --git a/epan/dissectors/packet-json.c b/epan/dissectors/packet-json.c
index dc339b5e61..3c9f09193d 100644
--- a/epan/dissectors/packet-json.c
+++ b/epan/dissectors/packet-json.c
@@ -249,6 +249,30 @@ static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbpar
wmem_stack_pop(data->stack);
}
+static int
+json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max)
+{
+ int len = ws_utf8_char_len((guint8) *buf);
+
+ /* XXX, before moving to core API check if it's off-by-one safe.
+ * For JSON analyzer it's not a problem
+ * (string always terminated by ", which is not valid UTF-8 continuation character) */
+ if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) {
+ *buf = '?';
+ return 1;
+ }
+
+ /* assume it's valid UTF-8 */
+ tvb_memcpy(tvb, buf + 1, offset + 1, len - 1);
+
+ if (!g_utf8_validate(buf, len, NULL)) {
+ *buf = '?';
+ return 1;
+ }
+
+ return len;
+}
+
static char *json_string_unescape(tvbparse_elem_t *tok)
{
char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
@@ -266,7 +290,6 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
case '\"':
case '\\':
case '/':
- default:
str[j++] = ch;
break;
@@ -361,10 +384,22 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
str[j++] = '?';
break;
}
+
+ default:
+ /* not valid by JSON grammar (also tvbparse rules should not allow it) */
+ DISSECTOR_ASSERT_NOT_REACHED();
+ break;
}
- } else
- str[j++] = ch;
+ } else {
+ int utf_len;
+
+ str[j] = ch;
+ /* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
+ utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len);
+ j += utf_len;
+ i += (utf_len - 1);
+ }
}
str[j] = '\0';
diff --git a/wsutil/CMakeLists.txt b/wsutil/CMakeLists.txt
index b14c236238..c4251ea0bb 100644
--- a/wsutil/CMakeLists.txt
+++ b/wsutil/CMakeLists.txt
@@ -30,7 +30,6 @@ IF(WIN32)
inet_ntop.c
inet_pton.c
strptime.c
- unicode-utils.c
wsgetopt.c
)
ENDIF(WIN32)
@@ -69,6 +68,7 @@ set(WSUTIL_FILES
tempfile.c
type_util.c
u3.c
+ unicode-utils.c
${WSUTIL_PLATFORM_FILES}
)
diff --git a/wsutil/Makefile.am b/wsutil/Makefile.am
index 6ead86a92f..0b1256c7e8 100644
--- a/wsutil/Makefile.am
+++ b/wsutil/Makefile.am
@@ -99,8 +99,6 @@ EXTRA_DIST = \
Makefile.nmake \
file_util.c \
file_util.h \
- unicode-utils.c \
- unicode-utils.h \
wsgcrypt.h
CLEANFILES = \
diff --git a/wsutil/Makefile.common b/wsutil/Makefile.common
index 96b5a7cfa3..0efc26fb1a 100644
--- a/wsutil/Makefile.common
+++ b/wsutil/Makefile.common
@@ -61,7 +61,8 @@ LIBWSUTIL_SRC = \
report_err.c \
tempfile.c \
type_util.c \
- u3.c
+ u3.c \
+ unicode-utils.c
# Header files that are not generated from other files
LIBWSUTIL_INCLUDES = \
@@ -100,4 +101,5 @@ LIBWSUTIL_INCLUDES = \
report_err.h \
tempfile.h \
type_util.h \
- u3.h
+ u3.h \
+ unicode-utils.h
diff --git a/wsutil/Makefile.nmake b/wsutil/Makefile.nmake
index 673c6e4817..4da5224075 100644
--- a/wsutil/Makefile.nmake
+++ b/wsutil/Makefile.nmake
@@ -28,7 +28,6 @@ OBJECTS = file_util.obj \
inet_pton.obj \
$(LIBWSUTIL_SRC:.c=.obj) \
strptime.obj \
- unicode-utils.obj \
wsgetopt.obj
# For use when making libwsutil.dll
diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c
index 8935e46f38..21cc489df7 100644
--- a/wsutil/unicode-utils.c
+++ b/wsutil/unicode-utils.c
@@ -22,12 +22,23 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
-#ifndef _WIN32
-#error "This is only for Windows"
-#endif
-
#include "unicode-utils.h"
+int
+ws_utf8_char_len(guint8 ch)
+{
+ if (ch >= 0xfe) return -1;
+ if (ch >= 0xfc) return 6;
+ if (ch >= 0xf8) return 5;
+ if (ch >= 0xf0) return 4;
+ if (ch >= 0xe0) return 3;
+ if (ch >= 0xc0) return 2;
+ else return 1;
+}
+
+
+#ifdef _WIN32
+
#include <shellapi.h>
/** @file
@@ -157,3 +168,5 @@ arg_list_utf_16to8(int argc, char *argv[]) {
}
} /* XXX else bail because something is horribly, horribly wrong? */
}
+
+#endif
diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h
index a9d5318f6b..7b29d8460a 100644
--- a/wsutil/unicode-utils.h
+++ b/wsutil/unicode-utils.h
@@ -25,21 +25,25 @@
#ifndef __UNICODEUTIL_H__
#define __UNICODEUTIL_H__
-#include "ws_symbol_export.h"
-
-#ifdef _WIN32
-
#include "config.h"
+#include "ws_symbol_export.h"
+
#include <glib.h>
-#include <windows.h>
-#include <tchar.h>
-#include <wchar.h>
/**
* @file Unicode convenience routines.
*/
+WS_DLL_PUBLIC
+int ws_utf8_char_len(guint8 ch);
+
+#ifdef _WIN32
+
+#include <windows.h>
+#include <tchar.h>
+#include <wchar.h>
+
/** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
* to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
*