From 3ea14aaa16290458b0d0d54c3c922096ac58d514 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Thu, 1 Nov 2012 21:47:07 +0100 Subject: find-bytes: search for a byte sequence in a file --- find-bytes.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 find-bytes.c diff --git a/find-bytes.c b/find-bytes.c new file mode 100644 index 0000000..b81aebc --- /dev/null +++ b/find-bytes.c @@ -0,0 +1,113 @@ +/* + * Find a byte sequence in a file. + * + * Author: Peter Wu + * Date: 2012-11-01 + */ +#define _GNU_SOURCE +#include +#include +#include +#include + +#ifndef SEARCH_BUF_SIZE +# define SEARCH_BUF_SIZE 1024 * 1024 +#endif + +size_t read_key(char **dst) { + size_t len = 0; + ssize_t read_bytes; + char buff[512]; + *dst = NULL; + do { + *dst = realloc(*dst, len + sizeof(buff)); + if (!*dst) { + perror("realloc"); + abort(); + } + read_bytes = read(STDIN_FILENO, buff, sizeof(buff)); + if (read_bytes > 0) { + memcpy(*dst + len, buff, read_bytes); + len += read_bytes; + } + } while (read_bytes == sizeof(buff)); + if (!len) { + free(*dst); + return 0; + } + if (!(*dst = realloc(*dst, read_bytes))) { + perror("adjusting size, realloc"); + abort(); + } + return len; +} + +void find_key(FILE *fp, char *key, size_t key_len) { + int fd = fileno(fp); + char *buf; + size_t buf_size = key_len * 8; + if (buf_size < (SEARCH_BUF_SIZE)) buf_size = (SEARCH_BUF_SIZE); + buf = malloc(buf_size); + if (!buf) { + perror("malloc"); + abort(); + } + size_t fill_size = buf_size; + if (key_len > 1) fill_size -= key_len - 1; + size_t position = 0; + size_t new_bytes; + int as_hex = !!getenv("HEX"); + do { + /*----------------+------------------------------* + * prev data size | new data * + * key_len - 1 | size is max fill_size * + *----------------+------------------------------*/ + new_bytes = read(fd, buf + key_len - 1, fill_size); + /* initially, the beginning is uninitialized */ + char *new_data_p = buf; + char *end_p = buf + new_bytes; + if (position) { + end_p += key_len; + } else { + new_data_p += key_len - 1; + } + char *next_p = new_data_p; + while (next_p && end_p >= next_p + key_len) { + next_p = memmem(next_p, end_p - next_p, key, key_len); + if (next_p) { + size_t offset = (next_p - new_data_p) + position; + printf(as_hex ? "%zx\n" : "%zi\n", offset); + next_p += key_len; + } + } + /* copy trailing buffer chars to the beginning */ + if (key_len > 1) memcpy(buf, buf + fill_size, key_len - 1); + position += new_bytes; + } while (new_bytes == fill_size); + free(buf); +} + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Usage: %s file\nBinary search key should go" + "into stdin, newline is not eaten.\nOutputs " + "file offset, for hex format set env HEX.\n", + *argv); + return 1; + } + char *key; + size_t len = read_key(&key); + if (!len) { + fprintf(stderr, "Search key must not be empty\n"); + return 1; + } + FILE *fp = fopen(argv[1], "r"); + if (!fp) { + perror("fopen"); + return 1; + } + find_key(fp, key, len); + free(key); + fclose(fp); + return 0; +} -- cgit v1.2.1