diff options
Diffstat (limited to 'wsutil')
-rw-r--r-- | wsutil/ws_mempbrk.c | 60 | ||||
-rw-r--r-- | wsutil/ws_mempbrk.h | 27 | ||||
-rw-r--r-- | wsutil/ws_mempbrk_sse42.c | 123 |
3 files changed, 94 insertions, 116 deletions
diff --git a/wsutil/ws_mempbrk.c b/wsutil/ws_mempbrk.c index 8ad1a17e78..6ed9ebcd9e 100644 --- a/wsutil/ws_mempbrk.c +++ b/wsutil/ws_mempbrk.c @@ -36,50 +36,54 @@ #include <glib.h> #include "ws_symbol_export.h" +#include "ws_mempbrk.h" + + +void +tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles) +{ + const gchar *n = needles; + while (*n) { + pattern->patt[(int)*n] = 1; + n++; + } + #ifdef HAVE_SSE4_2 -#include "ws_cpuid.h" + ws_mempbrk_sse42_compile(pattern, needles); #endif -#include "ws_mempbrk.h" +} + const guint8 * -_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles) +ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle) { - gchar tmp[256] = { 0 }; - const guint8 *haystack_end; + const guint8 *haystack_end = haystack + haystacklen; - while (*needles) - tmp[*needles++] = 1; + while (haystack < haystack_end) { + if (pattern->patt[*haystack]) { + if (found_needle) + *found_needle = *haystack; + return haystack; + } + haystack++; + } - haystack_end = haystack + haystacklen; - while (haystack < haystack_end) { - if (tmp[*haystack]) - return haystack; - haystack++; - } - - return NULL; + return NULL; } + WS_DLL_PUBLIC const guint8 * -ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles) +tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle) { #ifdef HAVE_SSE4_2 - static int have_sse42 = -1; + if (haystacklen >= 16 && pattern->use_sse42) + return ws_mempbrk_sse42_exec(haystack, haystacklen, pattern, found_needle); #endif - if (*needles == 0) - return NULL; - -#ifdef HAVE_SSE4_2 - if G_UNLIKELY(have_sse42 < 0) - have_sse42 = ws_cpuid_sse42(); - if (haystacklen >= 16 && have_sse42) - return _ws_mempbrk_sse42(haystack, haystacklen, needles); -#endif - - return _ws_mempbrk(haystack, haystacklen, needles); + return ws_mempbrk_exec(haystack, haystacklen, pattern, found_needle); } + /* * Editor modelines - http://www.wireshark.org/tools/modelines.html * diff --git a/wsutil/ws_mempbrk.h b/wsutil/ws_mempbrk.h index 72f37d574a..708d53cfff 100644 --- a/wsutil/ws_mempbrk.h +++ b/wsutil/ws_mempbrk.h @@ -24,13 +24,30 @@ #include "ws_symbol_export.h" -WS_DLL_PUBLIC const guint8 *ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles); +/** The pattern object used for tvb_pbrk_pattern_guint8(). + */ +typedef struct { + gchar patt[256]; + gboolean use_sse42; + void *mask; +} tvb_pbrk_pattern; + +/** The value to use when initializing a tvb_pbrk_pattern variable. + * For example: + * static tvb_pbrk_pattern pbrk_mypattern = INIT_PBRK_PATTERN; + */ +#define INIT_PBRK_PATTERN { { 0 }, FALSE, NULL } + +/** Compile the pattern for the needles to find using tvb_pbrk_pattern_guint8(). + */ +WS_DLL_PUBLIC void tvb_pbrk_compile(tvb_pbrk_pattern* pattern, const gchar *needles); + +WS_DLL_PUBLIC const guint8 *tvb_pbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle); -#ifdef HAVE_SSE4_2 -const char *_ws_mempbrk_sse42(const char* haystack, size_t haystacklen, const char *needles); -#endif +void ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles); +const char *ws_mempbrk_sse42_exec(const char* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle); -const guint8 *_ws_mempbrk(const guint8* haystack, size_t haystacklen, const guint8 *needles); +const guint8 *ws_mempbrk_exec(const guint8* haystack, size_t haystacklen, const tvb_pbrk_pattern* pattern, guchar *found_needle); #endif /* __WS_MEMPBRK_H__ */ diff --git a/wsutil/ws_mempbrk_sse42.c b/wsutil/ws_mempbrk_sse42.c index 2805c0240e..da37a665aa 100644 --- a/wsutil/ws_mempbrk_sse42.c +++ b/wsutil/ws_mempbrk_sse42.c @@ -23,7 +23,7 @@ #ifdef HAVE_SSE4_2 #include <glib.h> - +#include "ws_cpuid.h" #ifdef WIN32 #include <tmmintrin.h> @@ -59,6 +59,23 @@ __m128i_shift_right (__m128i value, unsigned long int offset) _mm_loadu_si128 (cast_128aligned__m128i(___m128i_shift_right + offset))); } + +void +ws_mempbrk_sse42_compile(tvb_pbrk_pattern* pattern, const gchar *needles) +{ + size_t length = strlen(needles); + + pattern->use_sse42 = ws_cpuid_sse42() && (length <= 16); + + if (pattern->use_sse42) { + __m128i *pmask = NULL; + pattern->mask = g_malloc(sizeof(__m128i)); + pmask = (__m128i *) pattern->mask; + *pmask = _mm_setzero_si128(); + memcpy(pmask, needles, length); + } +} + /* We use 0x2: _SIDD_SBYTE_OPS | _SIDD_CMP_EQUAL_ANY @@ -92,81 +109,12 @@ __m128i_shift_right (__m128i value, unsigned long int offset) X for case 1. */ const char * -_ws_mempbrk_sse42(const char *s, size_t slen, const char *a) +ws_mempbrk_sse42_exec(const char *s, size_t slen, const tvb_pbrk_pattern* pattern, guchar *found_needle) { const char *aligned; - __m128i mask; + __m128i *pmask = (__m128i *) pattern->mask; int offset; -#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) - { - /* As 'a' is not guarantueed to have a size of at least 16 bytes, and is not - * aligned, _mm_load_si128() cannot be used when ASAN is enabled. That - * triggers a buffer overflow which is harmless as 'a' is guaranteed to be - * '\0' terminated, and the PCMISTRI instruction always ignored everything - * starting from EOS ('\0'). A false positive indeed. */ - size_t length; - - length = strlen(a); - /* Don't use SSE4.2 if the length of A > 16. */ - if (length > 16) - return _ws_mempbrk(s, slen, a); - - mask = _mm_setzero_si128(); - memcpy(&mask, a, length); - } -#else /* else if ASAN is disabled */ - offset = (int) ((size_t) a & 15); - aligned = (const char *) ((size_t) a & -16L); - if (offset != 0) - { - int length; - - /* Load masks. */ - /* cast safe - _mm_load_si128() it's 16B aligned */ - mask = __m128i_shift_right(_mm_load_si128 (cast_128aligned__m128i(aligned)), offset); - - /* Find where the NULL terminator is. */ - length = _mm_cmpistri (mask, mask, 0x3a); - if (length == 16 - offset) - { - /* There is no NULL terminator. */ - __m128i mask1 = _mm_load_si128 (cast_128aligned__m128i(aligned + 16)); - int idx = _mm_cmpistri (mask1, mask1, 0x3a); - length += idx; - - /* Don't use SSE4.2 if the length of A > 16. */ - if (length > 16) - return _ws_mempbrk(s, slen, a); - - if (idx != 0) - { - /* Combine mask0 and mask1. We could play games with - palignr, but frankly this data should be in L1 now - so do the merge via an unaligned load. */ - mask = _mm_loadu_si128 (cast_128aligned__m128i(a)); - } - } - } - else - { - int length; - - /* A is aligned. (cast safe) */ - mask = _mm_load_si128 (cast_128aligned__m128i(a)); - - /* Find where the NULL terminator is. */ - length = _mm_cmpistri (mask, mask, 0x3a); - if (length == 16) - { - /* There is no NULL terminator. Don't use SSE4.2 if the length - of A > 16. */ - if (a[16] != 0) - return _ws_mempbrk(s, slen, a); - } - } -#endif /* ASAN disabled */ - offset = (int) ((size_t) s & 15); aligned = (const char *) ((size_t) s & -16L); if (offset != 0) @@ -174,18 +122,23 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a) /* Check partial string. cast safe it's 16B aligned */ __m128i value = __m128i_shift_right (_mm_load_si128 (cast_128aligned__m128i(aligned)), offset); - int length = _mm_cmpistri (mask, value, 0x2); + int length = _mm_cmpistri (*pmask, value, 0x2); /* No need to check ZFlag since ZFlag is always 1. */ - int cflag = _mm_cmpistrc (mask, value, 0x2); + int cflag = _mm_cmpistrc (*pmask, value, 0x2); + /* XXX: why does this compare value with value? */ int idx = _mm_cmpistri (value, value, 0x3a); - if (cflag) + if (cflag) { + if (found_needle) + *found_needle = *(s + length); return s + length; + } + /* Find where the NULL terminator is. */ if (idx < 16 - offset) { - /* fond NUL @ 'idx', need to switch to slower mempbrk */ - return _ws_mempbrk(s + idx + 1, slen - idx - 1, a); /* slen is bigger than 16 & idx < 16 so no undeflow here */ + /* found NUL @ 'idx', need to switch to slower mempbrk */ + return ws_mempbrk_exec(s + idx + 1, slen - idx - 1, pattern, found_needle); /* slen is bigger than 16 & idx < 16 so no undeflow here */ } aligned += 16; slen -= (16 - offset); @@ -196,23 +149,27 @@ _ws_mempbrk_sse42(const char *s, size_t slen, const char *a) while (slen >= 16) { __m128i value = _mm_load_si128 (cast_128aligned__m128i(aligned)); - int idx = _mm_cmpistri (mask, value, 0x2); - int cflag = _mm_cmpistrc (mask, value, 0x2); - int zflag = _mm_cmpistrz (mask, value, 0x2); + int idx = _mm_cmpistri (*pmask, value, 0x2); + int cflag = _mm_cmpistrc (*pmask, value, 0x2); + int zflag = _mm_cmpistrz (*pmask, value, 0x2); - if (cflag) + if (cflag) { + if (found_needle) + *found_needle = *(aligned + idx); return aligned + idx; + } + if (zflag) { /* found NUL, need to switch to slower mempbrk */ - return _ws_mempbrk(aligned, slen, a); + return ws_mempbrk_exec(aligned, slen, pattern, found_needle); } aligned += 16; slen -= 16; } /* XXX, use mempbrk_slow here? */ - return _ws_mempbrk(aligned, slen, a); + return ws_mempbrk_exec(aligned, slen, pattern, found_needle); } #endif /* HAVE_SSE4_2 */ |