1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
|
#!/usr/bin/python
# Given lines of fixed length, mark uncommon chars
# Copyright (C) 2013 Peter Wu <lekensteyn@gmail.com>
# License: GPLv3+
import sys
import fileinput
CLEAR_COLOR = "\033[m"
# Reads lines from files and/or stdin
def read_lines(files, line_width, lines_limit):
lines = []
lineno = 0
for line in fileinput.input(files=files):
lineno += 1
line = line.rstrip()
line_len = len(line)
if line_width > 0:
line = line[:line_width]
if line_width > 0 and line_len != line_width:
print("Line {} has a different length {} (expected {})"\
.format(lineno, line_len, line_width),
file=sys.stderr)
return None, None
lines.append(line)
# set expected line width if unavailable
if not (line_width > 0):
line_width = line_len
if lines_limit > 0 and lineno >= lines_limit:
print("Stopped reading after {} lines".format(lineno),
file=sys.stderr)
break
if not lines:
print("Input is empty", file=sys.stderr)
return line_width, lines
def count_chars(lines, line_width):
# an array for each column, storing a map counting byte occurences
counts = []
# for each column, store a tuple (highest count, most common chars)
top_counts = []
# Process columns and rows, building a map of counters
for col in range(0, line_width):
col_ctr = {}
for line in lines:
char = line[col]
if not char in col_ctr:
col_ctr[char] = 1
else:
col_ctr[char] += 1
counts.append(col_ctr)
# Find highest common chars count
top_count = -1
top_chars = []
for char, count in col_ctr.items():
if count > top_count:
top_count = count
top_chars = [char]
elif count == top_count:
top_chars.append(char)
top_counts.append((top_count, len(top_chars), top_chars))
return top_counts
# Calculate which columns need to get colored.
# 0 = the whole column is has the same single char (uncolored)
# 1 = this cell is the most common char (green)
# 2 = this cell is one of the most common chars (yellow)
# 3 = this cell is not most common (red)
CELL_ALLSAME = 0
CELL_COMMON = 1
CELL_COMMON_SHARED = 2
CELL_OUTLIER = 3
def color_cells(lines, line_width, top_counts):
lines_count = len(lines)
colormap = [None] * lines_count
for lineno, line in enumerate(lines):
line_color = colormap[lineno] = [CELL_ALLSAME] * line_width
for col in range(0, line_width):
char = line[col]
max_count, common_count, common_chars = top_counts[col]
if char in common_chars:
# the most common char has no other chars in the same range:
if common_count == 1:
# color columns with changed
if max_count != lines_count:
line_color[col] = CELL_COMMON
else:
line_color[col] = CELL_COMMON_SHARED
else: # not common at all
line_color[col] = CELL_OUTLIER
return colormap
# Find columns that can be skipped
# common at begin: N..[other] (threshold: 5)
# common at end: [other]..N (threshold: 5)
# common in middle: [other]..N..[other] (threshold: 7)
def mark_skippable(colormap, line_width):
skip_mask = [0] * line_width
skip_cols = 0
threshold = 5
for col in reversed(range(0, line_width)):
if colormap[0][col] == 0:
# this column can maybe be skipped
skip_cols += 1
skip_mask[col] = -1
else:
# this column cannot be skipped
if skip_cols > threshold:
skip_mask[col + 1] = skip_cols
# Must be the middle now.
threshold = 10
skip_cols = 0
# at begin, are there any cells to skip?
threshold = 5
if skip_cols > threshold:
skip_mask[0] = skip_cols
return skip_mask
colors = {
CELL_ALLSAME: CLEAR_COLOR,
CELL_COMMON: "\033[0;32m", # green,
CELL_COMMON_SHARED: "\033[0;33m", # yellow
CELL_OUTLIER: "\033[1;31m", # red
}
# Display input, coloring changes
def display_changes(lines, line_width, colormap, skip_mask, enable_skip):
skipping = False
for lineno, line in enumerate(lines):
line_out = ""
prev_color = CLEAR_COLOR
line_color = colormap[lineno]
for col in range(0, line_width):
# skip common cells?
if enable_skip:
if skipping and skip_mask[col] == -1:
# This line is still the same
continue
elif skip_mask[col] > 0:
# N columns can be skipped (N = skip_mask[col])
prev_color = "\033[1;30m"
line_out += prev_color + "..{}..".format(skip_mask[col])
skipping = True
continue
else:
# end of skippable columns
skipping = False
# apply coloring
coloring = line_color[col]
new_color = colors[coloring]
if prev_color != new_color:
line_out += new_color
prev_color = new_color
line_out += line[col]
# Any colors must be cleared at the end of the line
if prev_color != CLEAR_COLOR:
line_out += CLEAR_COLOR
print(line_out)
def main(args):
# Grab lines from stdin and/ or command line
line_width, lines = read_lines(args.files, args.line_width, args.lines_limit)
if not lines:
return 1
# Calculate character frequency per column
top_counts = count_chars(lines, line_width)
# calculate coloring
colormap = color_cells(lines, line_width, top_counts)
# Shorten lines by cutting out common stuff
skip_mask = mark_skippable(colormap, line_width)
# Finally display the input, colored.
display_changes(lines, line_width, colormap, skip_mask, args.enable_skip)
return 0
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(
description="Assume common lines, mark tiny differences")
parser.add_argument("-c", required=False, type=int, default=0,
dest="line_width", metavar="LENGTH",
help="Truncate input lines to LENGTH (default 0, unlimited)")
parser.add_argument("-l", required=False, type=int, default=0,
dest="lines_limit", metavar="COUNT",
help="Limit the number of lines to COUNT (default 0, unlimited)")
parser.add_argument("-s", required=False, action="store_true",
dest="enable_skip",
help="Enable skipping common columns")
parser.add_argument("files", nargs=argparse.REMAINDER)
args = parser.parse_args()
sys.exit(main(args))
|