summaryrefslogtreecommitdiff
path: root/find-bytes.c
blob: 8415d4d8a86aa5d64573d32f751af80b8d16946c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/*
 * Find a byte sequence in a file.
 *
 * Author: Peter Wu <lekensteyn@gmail.com>
 * Date: 2012-11-01
 */
#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>

#ifndef SEARCH_BUF_SIZE
#	define SEARCH_BUF_SIZE 1024 * 1024
#endif

size_t read_key(char **dst) {
	size_t len = 0;
	ssize_t read_bytes;
	char buff[512];
	*dst = NULL;
	do {
		*dst = realloc(*dst, len + sizeof(buff));
		if (!*dst) {
			perror("realloc");
			abort();
		}
		read_bytes = read(STDIN_FILENO, buff, sizeof(buff));
		if (read_bytes > 0) {
			memcpy(*dst + len, buff, read_bytes);
			len += read_bytes;
		}
	} while (read_bytes == sizeof(buff));
	if (!len) {
		free(*dst);
		return 0;
	}
	if (!(*dst = realloc(*dst, read_bytes))) {
		perror("adjusting size, realloc");
		abort();
	}
	return len;
}

void find_key(FILE *fp, char *key, size_t key_len) {
	int fd = fileno(fp);
	char *buf;
	size_t buf_size = key_len * 8;
	if (buf_size < (SEARCH_BUF_SIZE)) buf_size = (SEARCH_BUF_SIZE);
	buf = malloc(buf_size);
	if (!buf) {
		perror("malloc");
		abort();
	}
	size_t fill_size = buf_size - key_len + 1;
	size_t position = 0;
	size_t new_bytes;
	int as_hex = !!getenv("HEX");
	/*----------------+------------------------------*
	 * prev data size |          new data            *
	 *   key_len - 1  |     size is max fill_size    *
	 *----------------+------------------------------*/
	char *new_data_p = buf + key_len - 1;
	/* initially, data is valid starting at the point where it gets filled */
	char *buf_start = new_data_p;
	do {
		new_bytes = read(fd, new_data_p, fill_size);
		if ((ssize_t) new_bytes == -1) {
			perror("read");
			break;
		}

		char *next_p = buf_start;
		char *end_p = new_data_p + new_bytes;
		while (next_p && end_p >= next_p + key_len) {
			next_p = memmem(next_p, end_p - next_p, key, key_len);
			if (next_p) {
				/* if there is an overlap with the previous chunk, then the
				 * offset may be smaller than position. */
				size_t offset = position + (next_p - new_data_p);
				printf(as_hex ? "%zx\n" : "%zi\n", offset);
				next_p += key_len;
			}
		}
		/* copy trailing buffer chars to the beginning, just before new data */
		if (key_len > 1) {
			size_t keep_size = key_len - 1;
			if (keep_size > new_bytes) {
				keep_size = new_bytes;
			}
			buf_start = new_data_p - keep_size;
			memmove(buf_start, end_p - keep_size, keep_size);
		}
		position += new_bytes;
	} while (new_bytes == fill_size);
	free(buf);
}

int main(int argc, char **argv) {
	if (argc < 2) {
		fprintf(stderr, "Usage: %s file\nBinary search key should go"
				"into stdin, newline is not eaten.\nOutputs "
				"file offset, for hex format set env HEX.\n",
				*argv);
		return 1;
	}
	char *key;
	size_t len = read_key(&key);
	if (!len) {
		fprintf(stderr, "Search key must not be empty\n");
		return 1;
	}
	FILE *fp = fopen(argv[1], "r");
	if (!fp) {
		perror("fopen");
		return 1;
	}
	find_key(fp, key, len);
	free(key);
	fclose(fp);
	return 0;
}