/* * Find a byte sequence in a file. * * Author: Peter Wu * Date: 2012-11-01 */ #define _GNU_SOURCE #include #include #include #include #ifndef SEARCH_BUF_SIZE # define SEARCH_BUF_SIZE 1024 * 1024 #endif size_t read_key(char **dst) { size_t len = 0; ssize_t read_bytes; char buff[512]; *dst = NULL; do { *dst = realloc(*dst, len + sizeof(buff)); if (!*dst) { perror("realloc"); abort(); } read_bytes = read(STDIN_FILENO, buff, sizeof(buff)); if (read_bytes > 0) { memcpy(*dst + len, buff, read_bytes); len += read_bytes; } } while (read_bytes == sizeof(buff)); if (!len) { free(*dst); return 0; } if (!(*dst = realloc(*dst, read_bytes))) { perror("adjusting size, realloc"); abort(); } return len; } void find_key(FILE *fp, char *key, size_t key_len) { int fd = fileno(fp); char *buf; size_t buf_size = key_len * 8; if (buf_size < (SEARCH_BUF_SIZE)) buf_size = (SEARCH_BUF_SIZE); buf = malloc(buf_size); if (!buf) { perror("malloc"); abort(); } size_t fill_size = buf_size; if (key_len > 1) fill_size -= key_len - 1; size_t position = 0; size_t new_bytes; int as_hex = !!getenv("HEX"); do { /*----------------+------------------------------* * prev data size | new data * * key_len - 1 | size is max fill_size * *----------------+------------------------------*/ new_bytes = read(fd, buf + key_len - 1, fill_size); /* initially, the beginning is uninitialized */ char *new_data_p = buf; char *end_p = buf + new_bytes; if (position) { end_p += key_len; } else { new_data_p += key_len - 1; } char *next_p = new_data_p; while (next_p && end_p >= next_p + key_len) { next_p = memmem(next_p, end_p - next_p, key, key_len); if (next_p) { size_t offset = (next_p - new_data_p) + position; printf(as_hex ? "%zx\n" : "%zi\n", offset); next_p += key_len; } } /* copy trailing buffer chars to the beginning */ if (key_len > 1) memcpy(buf, buf + fill_size, key_len - 1); position += new_bytes; } while (new_bytes == fill_size); free(buf); } int main(int argc, char **argv) { if (argc < 2) { fprintf(stderr, "Usage: %s file\nBinary search key should go" "into stdin, newline is not eaten.\nOutputs " "file offset, for hex format set env HEX.\n", *argv); return 1; } char *key; size_t len = read_key(&key); if (!len) { fprintf(stderr, "Search key must not be empty\n"); return 1; } FILE *fp = fopen(argv[1], "r"); if (!fp) { perror("fopen"); return 1; } find_key(fp, key, len); free(key); fclose(fp); return 0; }