From 7dbff5189e7ab2e31c1b711ce25be067c648a3a2 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sun, 28 Jun 2015 01:08:51 +0200 Subject: Add cleanup routine script and notes For tracking purposes and in case I need to do something similar again. --- one-off/cleanup-notes.txt | 54 +++++ one-off/cleanup-rewrite.py | 480 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 534 insertions(+) create mode 100644 one-off/cleanup-notes.txt create mode 100755 one-off/cleanup-rewrite.py diff --git a/one-off/cleanup-notes.txt b/one-off/cleanup-notes.txt new file mode 100644 index 0000000..7383598 --- /dev/null +++ b/one-off/cleanup-notes.txt @@ -0,0 +1,54 @@ + +grep -rnw register_init_routine | grep -P 'register_init_routine *\( *&?\K[a-z0-9_]+(?= *\))' -o + +grep -P 'register_init_routine *\( *&?\K[a-z0-9_]+(?= *\))' -oHr + +# Read files and format in a standard way +time while IFS=: read filename func; do out=/tmp/out/$filename; + mkdir -p "${out%/*}" && clang-format "$filename" >"$out"; done < /tmp/1 +# definition is in different file +clang-format plugins/mate/mate_runtime.c > /tmp/out/plugins/mate/packet-mate.c +# change initialize_mate_runtime from extern to static +clang-format epan/dissectors/packet-ncp2222.inc > /tmp/out/tools/ncp2222.py + +# Extract all functions +while IFS=: read filename func; do out=/tmp/fns/$filename; mkdir -p "${out%/*}" && awk "/^(static )?void $func\\(/&&! /;$/{p=1;if(/}$/){print;exit}};p;p&&/^}/{exit}" "/tmp/out/$filename" > "$out"; done < /tmp/1 + +Do not forget to regenerate: +--- asn1/camel/packet-camel-template.c 2015-06-23 22:58:45.547098846 +0200 ++++ - 2015-06-23 23:40:03.580116775 +0200 +@@ -504,7 +504,7 @@ + * Routine called when the TAP is initialized. + * so hash table are (re)created + */ +-void camelsrt_init_routine(void) ++static void camelsrt_init_routine(void) + { + + /* free hash-table for SRT */ + +TODO lua only has init, needs free? +wslua_init_routine + +TODO rtp uses wmem_register_callback() to clean memory... move this to cleanup +func? rtp_dyn_payloads_init + +mp2t dissector leaks at: +1524 heur_subdissector_list = register_heur_dissector_list("mp2t.pid"); +view -p $(cat /tmp/fns/.x/epan.txt) + +# Extract functions from a file given file:funcName +while IFS=: read filename func; do awk "function f(){print \"/// $func\n\";exit};/\\<$func *\(/&&!/;$/{p=1;print \"//\", FILENAME; if(/}$/){print;f()}};p;p&&/^}/{f()}" $filename; done < /tmp/fns/.x/epan-func.txt + +# given a list of files, extract the functions from files +while read filename; do +func=$(awk -F: "\"$filename\"~\$1{print \$2;exit}" /tmp/fns/.x/epan-func.txt) +[ -n "$func" ] || continue + awk "function f(){print \"/// $func\n\";exit};/\\<$func *\(/&&!/;$/{p=1;print \"//\", FILENAME; if(/}$/){print;f()}};p;p&&/^}/{f()}" $filename; done + +# Test dissectors with cleanup script, run from /tmp/wireshark +f(){ read x;n=${x:-$n}; f=epan/dissectors/packet-$n.c && [ -f $f ] && ~/projects/wireshark-notes/one-off/cleanup-rewrite.py $f |& colordiff -u $f - | less -R;} +while :;do f;done + +# convert all dissectors with cleanup script +time for i in $(cat /tmp/fns/.x/epan.txt);do ../wireshark-notes/one-off/cleanup-rewrite.py -w $i 2>/tmp/errs/${i##*/}.txt;done diff --git a/one-off/cleanup-rewrite.py b/one-off/cleanup-rewrite.py new file mode 100755 index 0000000..0024e8e --- /dev/null +++ b/one-off/cleanup-rewrite.py @@ -0,0 +1,480 @@ +#!/usr/bin/env python +# Detects init functions with just reassembly functionality and adds a +# corresponding cleanup function for it. + +# 1. Load file containing lines with: path/to/file.c:foo_init +# 2. Find function, extract it. +# 3. Append cleanup func. +# 4. Find register_init_routine call and append cleanup call. +# +# Detect init function: +# static void foo_init(void) { +# // one or more lines. Non-empty lines are processed as shown below. +# // Note that functions may split over multiple lines and that indent +# // might differ. +# } +# +# Keep comments in output: +# /* optional comments, +# * possibly multiline */ +# +# Keep reassembly, remember R_NAME: +# reassembly_table_init(&R_NAME, &functions); +# +# Strip hash table destroy and if conditions, remember name: +# if (HT_NAME) g_hash_table_destroy(HT_NAME); +# if (HT_NAME != NULL) { /* ... */ } +# if (HT_NAME) { +# g_hash_table_destroy(HT_NAME); +# HT_NAME = NULL; // ignore this as well if any +# } +# +# Keep hash table init: +# HT_NAME = g_hash_table_new_full(...); +# HT_NAME = g_hash_table_new(...); +# +# Keep, but mark as TODO (or ignore for now?): +# varname = 0; +# varname = NULL; +# +# +# After init function: +# Output g_hash_table_destroy for each HT_NAME +# Output reassembly_table_destroy for each R_NAME. + +import sys, re, logging +_logger = logging.getLogger(__name__) + +# For quick sanity checking (funcName) +RE_FUNCTION_HEADER = re.compile( + r'(?:static\s+)?void\s+(?P\w+)\s*\(\s*void\s*\)') +# TODO: maybe detect prototypes? +# Matches init/cleanup function signature (funcName, body) +RE_FUNCTION = re.compile( + r''' + ^(?:static \s+ )?void \s+ # "static void" - prefix + (?P\w+) \s* # "foo_init" - function name + \([^)]*\) \s* \{ # "(void) {" - function params + (?P + [^\n]+ # everything on one line { ... } + | + (?: # Handle multiple lines + \n[^}][^\n]+ # heh, forget '\n' and you run into a loop... + | + \n # Handle empty lines + )+ + ) \}[^\n]*\n # "} /* foo_init */" - end of function + ''', re.M | re.X) +RE_IF = re.compile( + r''' + if\s*\(\s* # "if (" + (?P[.\w]+)\s* # "HT_NAME " + (?:!=\s* (?:NULL|0))? # "!= NULL + \) # ")" + ''', re.X) +# Matches reassembly lines +RE_REASS = re.compile('reassembly_table_init\s*\(\s*(?P[^\s,]+)') +# Matches "g_hash_table_destroy(HT_NAME)" +RE_HT_DESTROY = re.compile(r''' + g_hash_table_destroy\s*\(\s* # "g_hash_table_destroy(" + (?P[.\w]+)\s* # "struct.ht_name" + \) # ")" + ''', re.X) +RE_ASSIGNMENT = re.compile(r'(?P[.\w]+)\s*=\s*(?P[^;]*)') + +class Function(object): + def __init__(self, name, body, func_match): + self.name = name + self.body = body + self.func_match = func_match + self.lines_keep = '' + self.reassemble_names = [] + self.ht_names = [] + self.unknown_lines = '' + + def detect_comment(self, text, multiline_comment): + if multiline_comment: + multiline_comment = not text.endswith('*/') + # Assume that there is no code after the end marker + return True, multiline_comment + else: + multiline_comment = text.startswith('/*') + if multiline_comment: + multiline_comment = not text.endswith('*/') + return True, multiline_comment + if text.startswith('//'): + return True, False + # Not a comment, not a multi-line comment + return False, False + + def parse(self): + """Call it once to parse the given function body.""" + multiline_comment = False + # Find all functional lines + self._lines_iter = iter(self.body.splitlines(True)) + for line in self._lines_iter: + # Track whether the line was understood or not + # None = needs check, False = invalid, True = handled + handled = None + + # Ignore empty lines. + text = line.strip() + if not text: + continue + + # Keep comments, but ignore them for parsing + is_comment, multiline_comment = self.detect_comment(text, + multiline_comment) + if is_comment: + # Uncomment to keep comments (might also have to do this for + # RE_ASSIGNMENT below). + #self.lines_keep += line + handled = True + + # detect reassembly function + if handled is None: + reass_match = RE_REASS.match(text) + if reass_match: + handled = self.handle_reasembly(reass_match, line) + + if handled is None: + # Find if/hashtable stuff + if_match = RE_IF.match(text) + if if_match: + _logger.debug('Found if in: %s', text) + handled = self.handle_if(if_match, line) + + if handled is None: + # Find assignments such as hash table things + assignment_match = RE_ASSIGNMENT.match(text) + if assignment_match: + _logger.debug('Found assignment in: %s', text) + varName = assignment_match.group('varName') + # Hash table creation + line, text = self._read_stmt(line) + if 'g_hash_table_new' in text: + _logger.debug('Found hash table in: %s', text) + if varName not in self.ht_names: + _logger.warn('HT %s was not destructed', varName) + #self.ht_names.append(varName) + self.lines_keep += line + handled = True + + if not handled: + self.unknown_lines += line + + if self.unknown_lines: + _logger.error('Unknown lines in %s:\n%s', + self.name, self.unknown_lines) + return False + _logger.info('Found function %s', self.name) + _logger.info('Keep function %s:\n%s', self.name, self.lines_keep) + return True + + def _read_stmt(self, line='', terminator=';'): + """ + Reads lines until a full statement is ready. + :param line: current buffer that needs to be finished + """ + text = line.strip() + ml_comment = False + while terminator not in text: + line2 = next(self._lines_iter) + text2 = line2.strip() + is_comment, ml_comment = self.detect_comment(text2, ml_comment) + line += line2 + if not is_comment: + text += '\n' + text2 + return line, text + + def handle_reasembly(self, reass_match, line): + self.reassemble_names.append(reass_match.group('name')) + # Handle following lines and jump to next detection. + line, _ = self._read_stmt(line) + self.lines_keep += line + return True + + def handle_if(self, if_match, line): + text = line.strip() + # Expected more? + if '{' in text: + # Look for if (...) { ... } + line, text = self._read_stmt(line, '}') + else: + # Look for if (...) ...; + line, text = self._read_stmt(line, ';') + + # Check for else that is not understood. + if re.search('\}\s*else\b', text): + self.unknown_lines += line + return True # Cannot handle else yet! True to avoid double append + + # Get rid of if condition and brackets + if '{' in text: + text = text.split('{', 1)[1].split('}')[0] + else: + text = text.split(')', 1)[1] + + # The variable that was tested for destruction + varName = if_match.group('varName') + # For each statement in the if-body, check validity + for stmt in text.split(';'): + stmt = stmt.strip() + if not stmt: + continue + ht_destroy_match = RE_HT_DESTROY.match(stmt) + if ht_destroy_match: + if ht_destroy_match.group('varName') != varName: + _logger.error('cond %s != destroy %s' % + (varName, ht_destroy_match.group('varName'))) + self.unknown_lines += line + return True + # Remember name for later destruction + self.ht_names.append(varName) + _logger.debug('Skipping line for ht destroy %s', varName) + continue + assignment_match = RE_ASSIGNMENT.match(stmt) + if assignment_match: + if assignment_match.group('varName') == varName and \ + assignment_match.group('value') in ('NULL', '0') and \ + self._is_ht_name(varName): + # Ignore clearing variable for hash table + continue + _logger.warn('Unhandled if stmt: %s', stmt) + self.unknown_lines += line + return True + + return True + + def _is_ht_name(self, varName): + patt_ht_new = r'^\s*' + re.escape(varName) + r'\s*=\s*g_hash_table_new' + return re.search(patt_ht_new, self.body, re.M) is not None + + def get_indent(self): + indent_match = re.search(r'^\n*([ \t]+)', self.body, re.M) + if not indent_match: + _logger.error('Could not detect indent level for %s!', funcName) + # XXX can this actually happen? + return '' + return indent_match.group(1) + + def _make_function(self, funcName, body, keep_trailer=False): + # "static void" funcName "(void) {" body "}\n" + begin, end = self.func_match.span() + f_begin, f_end = self.func_match.span('funcName') + b_begin, b_end = self.func_match.span('body') + context = self.func_match.string + code = '' + code += context[begin:f_begin] + funcName # "static void" funcName + code += context[f_end:b_begin] + '\n' # "(void) {\n" + code += body + # Strip comments in "}\n" unless requested otherwise (for init) + code += context[b_end:] if keep_trailer else '}\n' + return code + + def make_cleanup_function(self, cleanupFuncName): + body = self._make_cleanup_function_body() + if not body: + return + code = self._make_function(cleanupFuncName, body) + _logger.debug('Emitting cleanup routine %s:\n%s', cleanupFuncName, code) + return code + + def _make_cleanup_function_body(self): + body = '' + indent = self.get_indent() + for name in self.reassemble_names: + body += '%sreassembly_table_destroy(%s);\n' % (indent, name) + for name in self.ht_names: + body += '%sg_hash_table_destroy(%s);\n' % (indent, name) + return body + + def make_init_function(self): + """Generates the stripped init routine.""" + code = self._make_function(self.name, self.lines_keep, keep_trailer=True) + assert code + _logger.debug('Emitting init routine %s:\n%s', self.name, code) + # As the block is replaced, remember the context + begin, end = self.func_match.span() + context = self.func_match.string + return context[0:begin] + code + context[end:] + + +class Source(object): + def __init__(self, filename): + self.filename = filename + self.blocks = [] + # map from function names to a tuple + # (blockIndex:int, func:Function, func_match:re.Match) + self.functions = {} + + def parse_func(self, block, blockIndex): + """ + Parses the code block. The blockIndex parameter is used for indexing the + functions. + """ + # Quick sanity check (multiple names may show up as it matches + # prototypes and other functions with any number of parameters). + funcNames_guessed = RE_FUNCTION_HEADER.findall(block) + if not funcNames_guessed: + return + _logger.debug('Found functions %s', ', '.join(funcNames_guessed)) + + # Try to match the init function + func_match = RE_FUNCTION.search(block) + if not func_match: + _logger.info('No function body detected for %s', + ', '.join(funcNames_guessed)) + return + + # Try to parse everything from the function body + funcName = func_match.group('funcName') + body = func_match.group('body') + func = Function(funcName, body, func_match) + if funcName in self.functions: + _logger.error('Function %s is already known, overwriting!', funcName) + _logger.debug('Saving function %s', funcName) + self.functions[funcName] = (blockIndex, func, func_match) + + def parse_block(self, block): + self.parse_func(block, len(self.blocks)) + self.blocks.append(block) + + def parse(self): + block = '' + # Pass 1: read file contents and extract functions + with open(self.filename) as f: + for line in f: + block += line + # Assume end of line / begin of block + # use heuristics to match: + # static void reset_dissector(void) { ...; } + if line.startswith('}') or ( + line.startswith('static void') and + '(void)' in line and + line.endswith('}\n') + ): + self.parse_block(block) + block = '' + continue + # Remainder + if block: + self.parse_block(block) + block = '' + + # Pass 2: find register_init_routine, append cleanup call and append + # cleanup function. + for blockIndex, block in enumerate(self.blocks): + if self.try_init_fix(block, blockIndex): + # Ok, cleanup routine is fixed. + return True + return False + + def make_cleanup_name(self, funcName): + newName = funcName.replace('init', 'cleanup') + newName = newName.replace('setup', 'cleanup') + if funcName == newName: + _logger.error('Cannot create unique cleanup function name %s', + funcName) + return newName + + def try_init_fix(self, block, blockIndex): + # Matches " register_init_routine (&foo_init);" + caller_match = re.search( + r''' + ^([ \t]*)register_init_routine\s* + \(\s* &? \s*(?P\w+)\s* \);\n + ''', block, re.M | re.X) + if not caller_match: + # Sanity check + if re.search(r'register_init_routine\s*\(', block): + _logger.error('Could not detect register_init_routine properly!') + return False # Continue searching + + # Locate init function and generate matching cleanup function + funcName = caller_match.group('name') + cleanupFuncName = self.make_cleanup_name(funcName) + if not self.fix_cleanup_function(funcName, cleanupFuncName): + return + + # Yields " register_cleanup_routine (&foo_cleanup);" + extra_line = caller_match.group() \ + .replace('register_init_routine', 'register_cleanup_routine') \ + .replace(funcName, cleanupFuncName) + begin, end = caller_match.span() + self.blocks[blockIndex] = block[0:end] + extra_line + block[end:] + return True # Done searching + + def fix_cleanup_function(self, funcName, cleanupFuncName): + if not funcName in self.functions: + _logger.error('Init routine %s not found!', funcName) + return False + + if cleanupFuncName in self.functions: + _logger.error('Cleanup routine %s already exists!', cleanupFuncName) + return False + + blockIndex, func, func_match = self.functions[funcName] + if not func.parse(): + return False + + initCode = func.make_init_function() + cleanupCode = func.make_cleanup_function(cleanupFuncName) + if not cleanupCode: + return False # Empty function + + self.blocks[blockIndex] = initCode + self.blocks[blockIndex] += '\n' + cleanupCode + return True + + def __str__(self): + return ''.join(self.blocks) + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG, + format='%(name)s:%(levelname)s: %(message)s') + # Color! + for _level, _color in { + 'ERROR': 31, + 'WARNING': 33, + 'INFO': 37, + 'DEBUG': 34, + }.items(): + logging.addLevelName(getattr(logging, _level), + '\033[%d;1m%s\033[m' % (_color, _level)) + + write_file = lambda f, data: sys.stdout.write(data) + + args = sys.argv[1:] + if not args: + _logger.error('Usage: cleanup-rewrite.py [-w] files..') + sys.exit(1) + + if args[0] == '-w': + args = args[1:] + _logger.info('Will write new files') + write_file = lambda f, data: open(f, 'w').write(data) + + ok = None + for filename in args: + # Support aliasing files such as /dev/stdin:/dev/stdout + if ':' in filename: + filename_in, filename = filename.split(':', 1) + else: + filename_in = filename + + # Linux-only hack: alias - as stdin or stdout + if filename_in == '-': + filename_in = '/dev/stdin' + if filename == '-': + filename = '/dev/stdout' + + src = Source(filename_in) + if src.parse(): + if ok is None: + ok = True + write_file(filename, str(src)) + else: + ok = False + + sys.exit(0 if ok else 1) -- cgit v1.2.1