summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <lekensteyn@gmail.com>2012-11-01 21:47:07 +0100
committerPeter Wu <lekensteyn@gmail.com>2012-11-01 21:47:07 +0100
commit3ea14aaa16290458b0d0d54c3c922096ac58d514 (patch)
tree26478815dd191ddc37e2375ad93a17433728c996
parent473822539e549cae1874d6ead6d04839ffb33316 (diff)
downloadc-files-3ea14aaa16290458b0d0d54c3c922096ac58d514.tar.gz
find-bytes: search for a byte sequence in a file
-rw-r--r--find-bytes.c113
1 files changed, 113 insertions, 0 deletions
diff --git a/find-bytes.c b/find-bytes.c
new file mode 100644
index 0000000..b81aebc
--- /dev/null
+++ b/find-bytes.c
@@ -0,0 +1,113 @@
+/*
+ * Find a byte sequence in a file.
+ *
+ * Author: Peter Wu <lekensteyn@gmail.com>
+ * Date: 2012-11-01
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#ifndef SEARCH_BUF_SIZE
+# define SEARCH_BUF_SIZE 1024 * 1024
+#endif
+
+size_t read_key(char **dst) {
+ size_t len = 0;
+ ssize_t read_bytes;
+ char buff[512];
+ *dst = NULL;
+ do {
+ *dst = realloc(*dst, len + sizeof(buff));
+ if (!*dst) {
+ perror("realloc");
+ abort();
+ }
+ read_bytes = read(STDIN_FILENO, buff, sizeof(buff));
+ if (read_bytes > 0) {
+ memcpy(*dst + len, buff, read_bytes);
+ len += read_bytes;
+ }
+ } while (read_bytes == sizeof(buff));
+ if (!len) {
+ free(*dst);
+ return 0;
+ }
+ if (!(*dst = realloc(*dst, read_bytes))) {
+ perror("adjusting size, realloc");
+ abort();
+ }
+ return len;
+}
+
+void find_key(FILE *fp, char *key, size_t key_len) {
+ int fd = fileno(fp);
+ char *buf;
+ size_t buf_size = key_len * 8;
+ if (buf_size < (SEARCH_BUF_SIZE)) buf_size = (SEARCH_BUF_SIZE);
+ buf = malloc(buf_size);
+ if (!buf) {
+ perror("malloc");
+ abort();
+ }
+ size_t fill_size = buf_size;
+ if (key_len > 1) fill_size -= key_len - 1;
+ size_t position = 0;
+ size_t new_bytes;
+ int as_hex = !!getenv("HEX");
+ do {
+ /*----------------+------------------------------*
+ * prev data size | new data *
+ * key_len - 1 | size is max fill_size *
+ *----------------+------------------------------*/
+ new_bytes = read(fd, buf + key_len - 1, fill_size);
+ /* initially, the beginning is uninitialized */
+ char *new_data_p = buf;
+ char *end_p = buf + new_bytes;
+ if (position) {
+ end_p += key_len;
+ } else {
+ new_data_p += key_len - 1;
+ }
+ char *next_p = new_data_p;
+ while (next_p && end_p >= next_p + key_len) {
+ next_p = memmem(next_p, end_p - next_p, key, key_len);
+ if (next_p) {
+ size_t offset = (next_p - new_data_p) + position;
+ printf(as_hex ? "%zx\n" : "%zi\n", offset);
+ next_p += key_len;
+ }
+ }
+ /* copy trailing buffer chars to the beginning */
+ if (key_len > 1) memcpy(buf, buf + fill_size, key_len - 1);
+ position += new_bytes;
+ } while (new_bytes == fill_size);
+ free(buf);
+}
+
+int main(int argc, char **argv) {
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s file\nBinary search key should go"
+ "into stdin, newline is not eaten.\nOutputs "
+ "file offset, for hex format set env HEX.\n",
+ *argv);
+ return 1;
+ }
+ char *key;
+ size_t len = read_key(&key);
+ if (!len) {
+ fprintf(stderr, "Search key must not be empty\n");
+ return 1;
+ }
+ FILE *fp = fopen(argv[1], "r");
+ if (!fp) {
+ perror("fopen");
+ return 1;
+ }
+ find_key(fp, key, len);
+ free(key);
+ fclose(fp);
+ return 0;
+}