summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--AUTHORS3
-rw-r--r--wiretap/file_access.c11
-rw-r--r--wiretap/file_wrappers.c333
-rw-r--r--wiretap/file_wrappers.h2
-rw-r--r--wiretap/wtap-int.h1
-rw-r--r--wiretap/wtap.c10
6 files changed, 304 insertions, 56 deletions
diff --git a/AUTHORS b/AUTHORS
index a035533377..04eab8e711 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -3246,7 +3246,8 @@ Martin Kaiser <martin [AT] kaiser.cx> {
Jakub Zawadzki <darkjames [AT] darkjames.ath.cx> {
JSON dissector
- Wiretap cleanup
+ Wiretap cleanup and support for fast random access to gzipped
+ files
}
Roland Knall <rknall [AT] gmail.com> {
diff --git a/wiretap/file_access.c b/wiretap/file_access.c
index c50e4b30a6..f9ff384ca8 100644
--- a/wiretap/file_access.c
+++ b/wiretap/file_access.c
@@ -333,6 +333,12 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
wth->priv = NULL;
init_open_routines();
+ if (wth->random_fh) {
+ wth->fast_seek = g_ptr_array_new();
+
+ file_set_random_access(wth->fh, FALSE, wth->fast_seek);
+ file_set_random_access(wth->random_fh, TRUE, wth->fast_seek);
+ }
/* Try all file types */
for (i = 0; i < open_routines_arr->len; i++) {
@@ -374,10 +380,7 @@ wtap* wtap_open_offline(const char *filename, int *err, char **err_info,
}
/* Well, it's not one of the types of file we know about. */
- if (wth->random_fh != NULL)
- file_close(wth->random_fh);
- file_close(wth->fh);
- g_free(wth);
+ wtap_close(wth);
*err = WTAP_ERR_FILE_UNKNOWN_FORMAT;
return NULL;
diff --git a/wiretap/file_wrappers.c b/wiretap/file_wrappers.c
index 713142e0ae..633204af75 100644
--- a/wiretap/file_wrappers.c
+++ b/wiretap/file_wrappers.c
@@ -74,6 +74,7 @@
struct wtap_reader {
int fd; /* file descriptor */
+ gint64 raw_pos; /* current position in file (just to not call lseek()) */
gint64 pos; /* current position in uncompressed data */
unsigned size; /* buffer size */
unsigned char *in; /* input buffer */
@@ -97,6 +98,9 @@ struct wtap_reader {
/* zlib inflate stream */
z_stream strm; /* stream structure in-place (not a pointer) */
#endif
+ /* fast seeking */
+ GPtrArray *fast_seek;
+ void *fast_seek_cur;
};
/* values for gz_state compression */
@@ -104,6 +108,7 @@ struct wtap_reader {
#define UNCOMPRESSED 1 /* copy input directly */
#ifdef HAVE_LIBZ
#define ZLIB 2 /* decompress a zlib stream */
+#define GZIP_AFTER_HEADER 3
#endif
static int /* gz_load */
@@ -117,6 +122,7 @@ raw_read(FILE_T state, unsigned char *buf, unsigned int count, unsigned *have)
if (ret <= 0)
break;
*have += ret;
+ state->raw_pos += ret;
} while (*have < count);
if (ret < 0) {
state->err = errno;
@@ -140,6 +146,88 @@ fill_in_buffer(FILE_T state)
return 0;
}
+#define ZLIB_WINSIZE 32768
+
+struct fast_seek_point {
+ gint64 out; /* corresponding offset in uncompressed data */
+ gint64 in; /* offset in input file of first full byte */
+
+ int compression;
+ union {
+ struct {
+ int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
+ unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
+
+ /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
+ guint32 adler;
+ guint32 total_out;
+ } zlib;
+ } data;
+};
+
+struct zlib_cur_seek_point {
+ unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
+ unsigned int pos;
+ unsigned int have;
+};
+
+#define SPAN G_GINT64_CONSTANT(1048576)
+static struct fast_seek_point *
+fast_seek_find(FILE_T file, gint64 pos)
+{
+ struct fast_seek_point *smallest = NULL;
+ struct fast_seek_point *item;
+ guint low, i, max;
+
+ if (!file->fast_seek)
+ return NULL;
+
+ for (low = 0, max = file->fast_seek->len; low < max; ) {
+ i = (low + max) / 2;
+ item = file->fast_seek->pdata[i];
+
+ if (pos < item->out)
+ max = i;
+ else if (pos > item->out) {
+ smallest = item;
+ low = i + 1;
+ } else {
+ return item;
+ }
+ }
+ return smallest;
+}
+
+static void
+fast_seek_header(FILE_T file, gint64 in_pos, gint64 out_pos, int compression)
+{
+ struct fast_seek_point *item = NULL;
+
+ if (file->fast_seek->len != 0)
+ item = file->fast_seek->pdata[file->fast_seek->len - 1];
+
+ if (!item || item->out < out_pos) {
+ struct fast_seek_point *val = g_malloc(sizeof(struct fast_seek_point));
+ val->in = in_pos;
+ val->out = out_pos;
+ val->compression = compression;
+
+ g_ptr_array_add(file->fast_seek, val);
+ }
+}
+
+static void
+fast_seek_reset(FILE_T state)
+{
+#ifdef HAVE_LIBZ
+ if (state->compression == ZLIB && state->fast_seek_cur) {
+ struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
+
+ cur->have = 0;
+ }
+#endif
+}
+
#ifdef HAVE_LIBZ
/* Get next byte from input, or -1 if end or error. */
@@ -166,13 +254,47 @@ gz_next4(FILE_T state, guint32 *ret)
return 0;
}
-static int /* gz_decomp */
+static void
+zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, gint64 in_pos, gint64 out_pos)
+{
+ /* it's for sure after gzip header, so file->fast_seek->len != 0 */
+ struct fast_seek_point *item = file->fast_seek->pdata[file->fast_seek->len - 1];;
+
+ /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
+ * Inserting value in middle of sorted array is expensive, so we want to add only in the end.
+ * It's not big deal, cause first-read don't usually invoke seeking
+ */
+ if (item->out + SPAN < out_pos) {
+ struct fast_seek_point *val = g_malloc(sizeof(struct fast_seek_point));
+ val->in = in_pos;
+ val->out = out_pos;
+ val->compression = ZLIB;
+
+ val->data.zlib.bits = bits;
+ if (point->pos != 0) {
+ unsigned int left = ZLIB_WINSIZE - point->pos;
+
+ memcpy(val->data.zlib.window, point->window + point->pos, left);
+ memcpy(val->data.zlib.window + left, point->window, point->pos);
+ } else
+ memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
+
+ val->data.zlib.adler = file->strm.adler;
+ val->data.zlib.total_out = file->strm.total_out;
+ g_ptr_array_add(file->fast_seek, val);
+ }
+}
+
+static void /* gz_decomp */
zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
{
- int ret;
+ int ret = 0; /* XXX */
guint32 crc, len;
z_streamp strm = &(state->strm);
+ unsigned char *buf2 = buf;
+ unsigned int count2 = count;
+
strm->avail_out = count;
strm->next_out = buf;
@@ -180,56 +302,86 @@ zlib_read(FILE_T state, unsigned char *buf, unsigned int count)
do {
/* get more input for inflate() */
if (state->avail_in == 0 && fill_in_buffer(state) == -1)
- return -1;
+ break;
if (state->avail_in == 0) {
state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
- return -1;
+ break;
}
strm->avail_in = state->avail_in;
strm->next_in = state->next_in;
/* decompress and handle errors */
- ret = inflate(strm, Z_NO_FLUSH);
+ /* ret = inflate(strm, Z_NO_FLUSH); */
+ ret = inflate(strm, Z_BLOCK);
state->avail_in = strm->avail_in;
state->next_in = strm->next_in;
if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
state->err = WTAP_ERR_ZLIB + Z_STREAM_ERROR;
- return -1;
+ break;
}
if (ret == Z_MEM_ERROR) {
state->err = WTAP_ERR_ZLIB + Z_MEM_ERROR; /* ENOMEM? */
- return -1;
+ break;
}
if (ret == Z_DATA_ERROR) { /* deflate stream invalid */
state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
- return -1;
+ break;
+ }
+
+ strm->adler = crc32(strm->adler, buf2, count2 - strm->avail_out);
+ if (state->fast_seek_cur) {
+ struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
+ unsigned int ready = count2 - strm->avail_out;
+
+ if (ready < ZLIB_WINSIZE) {
+ unsigned left = ZLIB_WINSIZE - cur->pos;
+
+ if (ready >= left) {
+ memcpy(cur->window + cur->pos, buf2, left);
+ if (ready != left)
+ memcpy(cur->window, buf2 + left, ready - left);
+
+ cur->pos = ready - left;
+ cur->have += ready;
+ } else {
+ memcpy(cur->window + cur->pos, buf2, ready);
+ cur->pos += ready;
+ cur->have += ready;
+ }
+
+ if (cur->have >= ZLIB_WINSIZE)
+ cur->have = ZLIB_WINSIZE;
+
+ } else {
+ memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
+ cur->pos = 0;
+ cur->have = ZLIB_WINSIZE;
+ }
+
+ if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
+ zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
}
+ buf2 = (buf2 + count2 - strm->avail_out);
+ count2 = strm->avail_out;
+
} while (strm->avail_out && ret != Z_STREAM_END);
/* update available output and crc check value */
state->next = buf;
state->have = count - strm->avail_out;
- strm->adler = crc32(strm->adler, state->next, state->have);
/* check gzip trailer if at end of deflate stream */
if (ret == Z_STREAM_END) {
- if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
+ if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1)
state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
- return -1;
- }
- if (crc != strm->adler) {
+ if (crc != strm->adler)
state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
- return -1;
- }
- if (len != (strm->total_out & 0xffffffffL)) {
+ if (len != (strm->total_out & 0xffffffffL))
state->err = WTAP_ERR_ZLIB + Z_DATA_ERROR;
- return -1;
- }
state->compression = UNKNOWN; /* ready for next stream, once have is 0 */
+ g_free(state->fast_seek_cur);
+ state->fast_seek_cur = NULL;
}
-
- /* good decompression */
- return 0;
}
#endif
@@ -299,6 +451,15 @@ gz_head(FILE_T state)
inflateReset(&(state->strm));
state->strm.adler = crc32(0L, Z_NULL, 0);
state->compression = ZLIB;
+
+ if (state->fast_seek) {
+ struct zlib_cur_seek_point *cur = g_malloc(sizeof(struct zlib_cur_seek_point));
+
+ cur->pos = cur->have = 0;
+ g_free(state->fast_seek_cur);
+ state->fast_seek_cur = cur;
+ fast_seek_header(state, state->raw_pos - state->avail_in, state->pos, GZIP_AFTER_HEADER);
+ }
return 0;
}
else {
@@ -312,6 +473,8 @@ gz_head(FILE_T state)
/* { 0xFD, '7', 'z', 'X', 'Z', 0x00 } */
/* FD 37 7A 58 5A 00 */
#endif
+ if (state->fast_seek)
+ fast_seek_header(state, state->raw_pos - state->avail_in - state->have, state->pos, UNCOMPRESSED);
/* doing raw i/o, save start of raw data for seeking, copy any leftover
input to output -- this assumes that the output buffer is larger than
@@ -343,8 +506,7 @@ fill_out_buffer(FILE_T state)
}
#ifdef HAVE_LIBZ
else if (state->compression == ZLIB) { /* decompress */
- if (zlib_read(state, state->out, state->size << 1) == -1)
- return -1;
+ zlib_read(state, state->out, state->size << 1);
}
#endif
return 0;
@@ -366,6 +528,10 @@ gz_skip(FILE_T state, gint64 len)
len -= n;
}
+ /* delayed error reporting */
+ else if (state->err)
+ return -1;
+
/* output buffer empty -- return if we're at the end of the input */
else if (state->eof && state->avail_in == 0)
break;
@@ -404,17 +570,21 @@ filed_open(int fd)
if (fd == -1)
return NULL;
- /* allocate gzFile structure to return */
+ /* allocate FILE_T structure to return */
state = g_try_malloc(sizeof *state);
if (state == NULL)
return NULL;
+ state->fast_seek_cur = NULL;
+ state->fast_seek = NULL;
+
/* open the file with the appropriate mode (or just use fd) */
state->fd = fd;
/* save the current position for rewinding (only if reading) */
state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
if (state->start == -1) state->start = 0;
+ state->raw_pos = state->start;
/* initialize stream */
gz_reset(state);
@@ -453,8 +623,6 @@ filed_open(int fd)
return NULL;
}
#endif
- gz_head(state); /* read first chunk */
-
/* return stream */
return state;
}
@@ -486,17 +654,18 @@ file_open(const char *path)
return ft;
}
+void
+file_set_random_access(FILE_T stream, gboolean random _U_, GPtrArray *seek)
+{
+ stream->fast_seek = seek;
+}
+
gint64
file_seek(FILE_T file, gint64 offset, int whence, int *err)
{
+ struct fast_seek_point *here;
unsigned n;
- /* check that there's no error */
- if (file->err) {
- *err = file->err;
- return -1;
- }
-
/* can only seek from start or relative to current position */
if (whence != SEEK_SET && whence != SEEK_CUR) {
g_assert_not_reached();
@@ -513,12 +682,85 @@ file_seek(FILE_T file, gint64 offset, int whence, int *err)
offset += file->skip;
file->seek = 0;
+ /* XXX, profile */
+ if ((here = fast_seek_find(file, file->pos + offset)) && (offset < 0 || offset > SPAN || here->compression == UNCOMPRESSED)) {
+ gint64 off, off2;
+
+#ifdef HAVE_LIBZ
+ if (here->compression == ZLIB) {
+ off = here->in - (here->data.zlib.bits ? 1 : 0);
+ off2 = here->out;
+ } else if (here->compression == GZIP_AFTER_HEADER) {
+ off = here->in;
+ off2 = here->out;
+ } else
+#endif
+ {
+ off2 = (file->pos + offset);
+ off = here->in + (off2 - here->out);
+ }
+
+ if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
+ *err = errno;
+ return -1;
+ }
+ fast_seek_reset(file);
+
+ file->raw_pos = off;
+ file->have = 0;
+ file->eof = 0;
+ file->seek = 0;
+ file->err = 0;
+ file->avail_in = 0;
+
+#ifdef HAVE_LIBZ
+ if (here->compression == ZLIB) {
+ z_stream *strm = &file->strm;
+ FILE_T state = file;
+
+ inflateReset(strm);
+ strm->adler = here->data.zlib.adler;
+ strm->total_out = here->data.zlib.total_out;
+ if (here->data.zlib.bits) {
+ int ret = NEXT();
+
+ if (ret == -1) {
+ /* *err = ???; */
+ return -1;
+ }
+
+ (void)inflatePrime(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
+ }
+ (void)inflateSetDictionary(strm, here->data.zlib.window, ZLIB_WINSIZE);
+ file->compression = ZLIB;
+ } else if (here->compression == GZIP_AFTER_HEADER) {
+ z_stream *strm = &file->strm;
+
+ inflateReset(strm);
+ strm->adler = crc32(0L, Z_NULL, 0);
+ file->compression = ZLIB;
+ } else
+#endif
+ file->compression = here->compression;
+
+ offset = (file->pos + offset) - off2;
+ file->pos = off2;
+ /* g_print("OK! %ld\n", offset); */
+
+ if (offset) {
+ file->seek = 1;
+ file->skip = offset;
+ }
+ return file->pos + offset;
+ }
+
/* if within raw area while reading, just go there */
if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw) {
if (ws_lseek64(file->fd, offset - file->have, SEEK_CUR) == -1) {
*err = errno;
return -1;
}
+ file->raw_pos += (offset - file->have);
file->have = 0;
file->eof = 0;
file->seek = 0;
@@ -542,6 +784,8 @@ file_seek(FILE_T file, gint64 offset, int whence, int *err)
*err = errno;
return -1;
}
+ fast_seek_reset(file);
+ file->raw_pos = file->start;
gz_reset(file);
}
@@ -572,10 +816,6 @@ file_read(void *buf, unsigned int len, FILE_T file)
{
unsigned got, n;
- /* check that we're reading and that there's no error */
- if (file->err)
- return -1;
-
/* if len is zero, avoid unnecessary operations */
if (len == 0)
return 0;
@@ -597,31 +837,21 @@ file_read(void *buf, unsigned int len, FILE_T file)
file->next += n;
file->have -= n;
}
+ /* delayed error reporting (for zlib) */
+ else if (file->err)
+ return -1;
/* output buffer empty -- return if we're at the end of the input */
else if (file->eof && file->avail_in == 0)
break;
- /* need output data -- for small len or new stream load up our output buffer */
- else if (file->compression == UNKNOWN || len < (file->size << 1)) {
+ /* need output data */
+ else {
/* get more output, looking for header if required */
if (fill_out_buffer(file) == -1)
return -1;
continue; /* no progress yet -- go back to memcpy() above */
-
- } else if (file->compression == UNCOMPRESSED) { /* large len -- read directly into user buffer */
- if (raw_read(file, buf, len, &n) == -1)
- return -1;
- }
-#ifdef HAVE_LIBZ
- /* large len -- decompress directly into user buffer */
- else { /* file->compression == ZLIB */
- if (zlib_read(file, buf, len) == -1)
- return -1;
- n = file->have;
- file->have = 0;
}
-#endif
/* update progress */
len -= n;
buf = (char *)buf + n;
@@ -751,6 +981,7 @@ file_close(FILE_T file)
g_free(file->out);
g_free(file->in);
}
+ g_free(file->fast_seek_cur);
file->err = 0;
g_free(file);
return close(fd);
diff --git a/wiretap/file_wrappers.h b/wiretap/file_wrappers.h
index 51d655a25a..8f8b68f144 100644
--- a/wiretap/file_wrappers.h
+++ b/wiretap/file_wrappers.h
@@ -37,6 +37,8 @@ extern char *file_gets(char *buf, int len, FILE_T stream);
extern int file_eof(FILE_T stream);
extern void file_clearerr(FILE_T stream);
+extern void file_set_random_access(FILE_T stream, gboolean random, GPtrArray *seek);
+
#ifdef HAVE_LIBZ
typedef struct wtap_writer *GZWFILE_T;
diff --git a/wiretap/wtap-int.h b/wiretap/wtap-int.h
index a9f0758663..aff5fde02f 100644
--- a/wiretap/wtap-int.h
+++ b/wiretap/wtap-int.h
@@ -72,6 +72,7 @@ struct wtap {
* e.g. WTAP_FILE_TSPREC_USEC */
wtap_new_ipv4_callback_t add_new_ipv4;
wtap_new_ipv6_callback_t add_new_ipv6;
+ GPtrArray *fast_seek;
};
struct wtap_dumper;
diff --git a/wiretap/wtap.c b/wiretap/wtap.c
index 084d3db4a0..9b44e3a61b 100644
--- a/wiretap/wtap.c
+++ b/wiretap/wtap.c
@@ -655,6 +655,12 @@ wtap_sequential_close(wtap *wth)
return ret;
}
+static void
+g_fast_seek_item_free(gpointer data, gpointer user_data _U_)
+{
+ g_free(data);
+}
+
int
wtap_close(wtap *wth)
{
@@ -679,6 +685,10 @@ wtap_close(wtap *wth)
if (wth->priv != NULL)
g_free(wth->priv);
+ if (wth->fast_seek != NULL) {
+ g_ptr_array_foreach(wth->fast_seek, g_fast_seek_item_free, NULL);
+ g_ptr_array_free(wth->fast_seek, TRUE);
+ }
g_free(wth);
return ret;