/* * Find a byte sequence in a file. * * Author: Peter Wu * Date: 2012-11-01 */ #define _GNU_SOURCE #include #include #include #include #ifndef SEARCH_BUF_SIZE # define SEARCH_BUF_SIZE 1024 * 1024 #endif size_t read_key(char **dst) { size_t len = 0; ssize_t read_bytes; char buff[512]; *dst = NULL; do { *dst = realloc(*dst, len + sizeof(buff)); if (!*dst) { perror("realloc"); abort(); } read_bytes = read(STDIN_FILENO, buff, sizeof(buff)); if (read_bytes > 0) { memcpy(*dst + len, buff, read_bytes); len += read_bytes; } } while (read_bytes == sizeof(buff)); if (!len) { free(*dst); return 0; } if (!(*dst = realloc(*dst, read_bytes))) { perror("adjusting size, realloc"); abort(); } return len; } void find_key(FILE *fp, char *key, size_t key_len) { int fd = fileno(fp); char *buf; size_t buf_size = key_len * 8; if (buf_size < (SEARCH_BUF_SIZE)) buf_size = (SEARCH_BUF_SIZE); buf = malloc(buf_size); if (!buf) { perror("malloc"); abort(); } size_t fill_size = buf_size - key_len + 1; size_t position = 0; size_t new_bytes; int as_hex = !!getenv("HEX"); /*----------------+------------------------------* * prev data size | new data * * key_len - 1 | size is max fill_size * *----------------+------------------------------*/ char *new_data_p = buf + key_len - 1; /* initially, data is valid starting at the point where it gets filled */ char *buf_start = new_data_p; do { new_bytes = read(fd, new_data_p, fill_size); if ((ssize_t) new_bytes == -1) { perror("read"); break; } char *next_p = buf_start; char *end_p = new_data_p + new_bytes; while (next_p && end_p >= next_p + key_len) { next_p = memmem(next_p, end_p - next_p, key, key_len); if (next_p) { /* if there is an overlap with the previous chunk, then the * offset may be smaller than position. */ size_t offset = position + (next_p - new_data_p); printf(as_hex ? "%zx\n" : "%zi\n", offset); next_p += key_len; } } /* copy trailing buffer chars to the beginning, just before new data */ if (key_len > 1) { size_t keep_size = key_len - 1; if (keep_size > new_bytes) { keep_size = new_bytes; } buf_start = new_data_p - keep_size; memmove(buf_start, end_p - keep_size, keep_size); } position += new_bytes; } while (new_bytes == fill_size); free(buf); } int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "Usage: %s file\nBinary search key should go" "into stdin, newline is not eaten.\nOutputs " "file offset, for hex format set env HEX.\n", *argv); return 1; } char *key; size_t len = read_key(&key); if (!len) { fprintf(stderr, "Search key must not be empty\n"); return 1; } FILE *fp = fopen(argv[1], "r"); if (!fp) { perror("fopen"); return 1; } find_key(fp, key, len); free(key); fclose(fp); return 0; }