Re-implemention of checkhf.pl:

Main objective: reduce the number of false positives. (Committed as a patch to the original checkhf.pl written by Joerg Mayer so as to keep the history for the original program). svn path=/trunk/; revision=47331
author: Bill Meier <wmeier@newsguy.com> 2013-01-28 22:23:32 +0000
committer: Bill Meier <wmeier@newsguy.com> 2013-01-28 22:23:32 +0000
commit: d1ae197ae4f645538752c9ee4bf85023be71eeda (patch)
tree: 40bf802a399a0ca0dfb359186ef071e2302849e3 /tools/checkhf.pl
parent: 7540ccc0ed1fbb3f9d58920cd57068328a3a863f (diff)
download: wireshark-d1ae197ae4f645538752c9ee4bf85023be71eeda.tar.gz
1 files changed, 397 insertions, 208 deletions
diff --git a/tools/checkhf.pl b/tools/checkhf.pl
index e9bde6ddfd..458ac08ee7 100755
--- a/tools/checkhf.pl
+++ b/tools/checkhf.pl
@@ -1,20 +1,12 @@
 #!/usr/bin/perl
-
-my $debug = 0;
-# 0: off
-# 1: specific debug
-# 2: full debug
-
 #
-# find unbalanced hf_ variables: Compare hf_ variable usage with the hf_ variables
-#  declared in the hf_register_info array.
+# Copyright 2013, William Meier (See AUTHORS file)
 #
-# Usage: checkhf.pl <file or files>
-
-# $Id$
-
+# Validate hf_... usage for a dissector file;
+#
+# Usage: checkhf.pl [--debug=?] <file or files>
 #
-# Copyright 2005 Joerg Mayer (see AUTHORS file)
+# $Id$
 #
 # Wireshark - Network traffic analyzer
 # By Gerald Combs <gerald@wireshark.org>
@@ -33,210 +25,407 @@ my $debug = 0;
 # You should have received a copy of the GNU General Public License
 # along with this program; if not, write to the Free Software
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-
-#
-# Example:
-# ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
-# Unused entry: packet-afs.c, hf_afs_ubik_voteend
-# Unused entry: packet-afs.c, hf_afs_ubik_errcode
-# Unused entry: packet-afs.c, hf_afs_ubik_votetype
-# NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
 #
-# or checkhf.pl packet-*.c, which will check all the dissector files.
-#
-# NOTE: This tool currently generates false positives!
+
+## Note: This program is a re-implementation of the
+##       original checkhf.pl written and (C) by Joerg Mayer.
+##       The overall objective of the new implementation was to reduce
+##         the number of false positives which occurred with the
+##         original checkhf.pl
+##
+## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
+## Example:
+## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
+## Unused entry: packet-afs.c, hf_afs_ubik_voteend
+## Unused entry: packet-afs.c, hf_afs_ubik_errcode
+## Unused entry: packet-afs.c, hf_afs_ubik_votetype
+## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
+##
+## or checkhf.pl packet-*.c, which will check all the dissector files.
+##
+## NOTE: This tool currently generates false positives!
+##
+## The "NO ARRAY" messages - if accurate - points to an error that will
+## cause (t|wire)shark to terminate with an assertion when a packet containing
+## this particular element is being dissected.
+##
+## The "Unused entry" message indicates the opposite: We define an entry but
+## never use it in a proto_...add... function.
+## ------------------------------------------------------------------------------------
+
+# ------------------------------------------------------------------------------------
+# Main
 #
-# The "NO ARRAY" messages - if accurate - points to an error that will
-# cause (t|wire)shark to terminate with an assertion when a packet containing
-# this particular element is being dissected.
+# Logic:
+# 1. Clean the input: remove comments and code under '#if 0'.
+# 2. hfDefs:
+#            Find (and remove from input) list of static hf_ variable
+#            definitions ('static int hf_... ;')
+# 2. hfArrayEntries:
+#            Find (and remove from input) list of hf_... variables
+#            referenced in the hf[] entries;
+# 3. hfUsage:
+#            From the remaining input, extract list of all strings of form hf_...
+#             (which may include strings which are not actually valid
+#              hf_... variable references).
+# 4. Checks:
+#            If entries in hfDefs not in hfUsage then "unused";
+#            If entries in hfDefs not in hfArrayEntries then "ERROR: NO ARRAY";
+
+use strict;
+use warnings;
+
+use Getopt::Long;
+
+my $helpFlag  = '';
+my $debug     = 0;  # default: off; 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfusage (See code)
+
+my $sts = GetOptions(
+                     'debug=i' => \$debug,
+                     'help|?'  => \$helpFlag
+                    );
+if (!$sts || $helpFlag || !$ARGV[0]) {
+    usage();
+}
+
+while (my $fileName = $ARGV[0]) {
+    shift;
+
+    my ($fileContents, $fileCleanedContents);
+    my ($hfDefHRef, $hfArrayEntryHRef, $hfUsageHRef);
+    my ($unUsedHRef, $noArrayHRef);
+
+    read_file(\$fileName, \$fileContents);
+
+    $fileCleanedContents = $fileContents;
+
+    remove_comments(\$fileCleanedContents, $fileName);
+    remove_if0_code(\$fileCleanedContents, $fileName);
+
+    $hfDefHRef        = find_remove_hf_defs(\$fileCleanedContents, $fileName);
+    $hfArrayEntryHRef = find_remove_hf_array_entries(\$fileCleanedContents, $fileName);
+    $hfUsageHRef      = find_hf_usage(\$fileCleanedContents);
+
+# Tests (See above)
+# 1. Are all the hfDef entries in hfUsage ?
+#    if not: "Unused entry:"
 #
-# The "Unused entry" message indicates the opposite: We define an entry but
-# never use it in a proto_...add... function.
+    $unUsedHRef = diff_hash($hfDefHRef, $hfUsageHRef);
+    print_list("Unused entry: $fileName, ", $unUsedHRef);
+
+# 2. Are all the hfDef entries in hfArrayEntry ?
+#    (Note: if hfDef is "unused", don't check for same in hfArrayEntry)
+#    if not: "ERROR: NO ARRAY"
+
+    $noArrayHRef  = diff_hash($hfDefHRef, $hfArrayEntryHRef);
+    $noArrayHRef  = diff_hash($noArrayHRef, $unUsedHRef);     # Remove "unused" hf_... from noArray list
+    print_list("ERROR: NO ARRAY: $fileName, ", $noArrayHRef);
+}
+
+# ---------------------------------------------------------------------
 #
+sub usage {
+    print "Usage: $0 [--debug=n] Filename [...] #debug: 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfUsage\n";
+    exit(1);
+}
 
-use warnings;
-use strict;
+# ---------------------------------------------------------------------
+# action:  read contents of a file to specified string
+# arg:     fileNameRef, fileContentsRef
+# returns: fileContentsRef (containing the contents of the file)
+
+sub read_file {
+    my ($fileNameRef, $fileContentsRef) = @_;
+
+    die "No such file: \"$$fileNameRef\"\n" if (! -e $$fileNameRef);
+
+    # delete leading './'
+    $$fileNameRef =~ s{ ^ \. / } {}xo;
+
+    $$fileContentsRef = '';
+    # Read in the file (ouch, but it's easier that way)
+    open(FCI, "<", $$fileNameRef) || die("Couldn't open $$fileNameRef");
+    while (<FCI>) {
+        $$fileContentsRef .= $_;
+    }
+    close(FCI);
+
+    return $fileContentsRef;
+}
+
+# ---------------------------------------------------------------------
+# action:  Create a hash containing entries in 'a' that are not in 'b'
+# arg:     aHRef, bHref
+# returns: pointer to hash
+
+sub diff_hash {
+    my ($aHRef, $bHRef) = @_;
+
+    my %diffs;
+
+    @diffs{grep {! exists $$bHRef{$_}} keys %$aHRef} = {};  # each key in the new hash
+                                                            #  will have value 'undef'
+
+    return \%diffs;
+}
+
+# ---------------------------------------------------------------------
+# action:  print a list
+# arg:     hdr, listHRef
+# returns: nothing
+
+sub print_list {
+    my ($hdr, $listHRef) = @_;
 
-my $D;
-
-my %elements;
-my $element;
-my %skip;
-
-my $state;
-my $newstate;
-# "s_unknown",
-# "s_declared",
-# "s_used",
-# "s_array",
-# "s_usedarray",
-# "s_error"
-
-my $type;
-# "t_declaration";
-# "t_usage";
-# "t_array";
-
-my $restofline;
-my $currfile = "";
-
-my $comment = 0;
-my $brace = 0;
-
-my $error = 0;
-
-sub printprevfile {
-	my $state;
-
-	foreach $element (keys %elements) {
-		$state = $elements{$element};
-		$debug>=2 && print "$currfile, $element: PRINT $state\n";
-		if ($state eq "s_usedarray") {
-			# Everything is fine
-		} elsif ($state eq "s_used") {
-			# A value is declared and used but the hf_ array
-			# entry is missing
-			print "ERROR: NO ARRAY: $currfile, $element\n";
-			$error = 1;
-		} elsif ($state eq "s_array") {
-			# A value is declared, has an hf_ array entry
-			# but it is never used
-			print "Unused entry: $currfile, $element\n";
-		} elsif ($state eq "s_declared") {
-			# A value is declared but no error value exists.
-			# This doesn't matter as it isn't used either.
-			print "Declared only entry: $currfile, $element\n"
-		} elsif ($state eq "s_unknown") {
-			print "UNKNOWN: $currfile, $element\n";
-		} else {
-			die "Impossible: State $state for $currfile, $element\n";
-		}
-	}
+    print
+      map {"$hdr$_\n"}
+        sort
+          keys %$listHRef;
 }
 
-while (<>) {
-	if ($currfile !~ /$ARGV/) {
-		&printprevfile();
-		# New file - reset array and state
-		$currfile = $ARGV;
-		%elements = ( );
-		%skip = ( "hf_register_info" => 1 );
-		$state = "s_unknown";
-	}
-	# opening then closing comment
-	if (/(.*?)\/\*.*\*\/(.*)/) {
-		$comment = 0;
-		$_ = "$1$2";
-	# closing then opening comment
-	} elsif (/.*?\*\/(.*?)\/\*/) {
-		$comment = 1;
-		$_ = "$1";
-	# opening comment
-	} elsif (/(.*?)\/\*/) {
-		$comment = 1;
-		$_ = "$1";
-	# closing comment
-	} elsif (/\*\/(.*?)/) {
-		$comment = 0;
-		$_ = "$1";
-	} elsif ($comment == 1) {
-		next;
-	}
-	# unhandled: more than one complete comment per line
-
-	chomp;
-	if ($debug) {
-		$D = " ($_)";
-	} else {
-		$D = "";
-	}
-
-	# Read input
-	if (/^\s*(?:static\s+)?.*int\s+(hf_\w*)\s*=\s*-1\s*;/) {
-		$element = $1;
-		$debug && print "t_declaration for $element$D\n";
-		$type = "t_declaration";
-		if (/^\s*static/) {
-			# ignore: static declarations without any use are detected by the compiler
-			next;
+# ------------
+# action:  remove comments from input string
+# arg:     codeRef, fileName
+# returns: codeRef
+
+sub remove_comments {
+    my ($codeRef, $fileName) = @_;
+
+    # The below Regexp is based on one from:
+    # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
+    # It is in the public domain.
+    # A complicated regex which matches C-style comments.
+    my $CCommentRegEx = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xo;
+
+    $$codeRef =~ s {$CCommentRegEx} []xg;
+
+    ($debug == 1) && print "==> After Remove Comments: code: [$fileName]\n$$codeRef\n===<\n";
+
+    return $codeRef
+}
+
+# -------------
+# action:  remove '#if 0'd code from the input string
+# args     codeRef, fileName
+# returns: codeRef
+#
+# Essentially: Use s//patsub/meg to pass each line to patsub.
+#              patsub monitors #if/#if 0/etc and determines
+#               if a particular code line should be removed.
+# XXX: This is probably pretty inefficient;
+#      I could imagine using another approach such as converting
+#       the input string to an array of lines and then making
+#       a pass through the array deleting lines as needed.
+
+{  # block begin
+my ($if_lvl, $if0_lvl, $if0); # shared vars
+
+    sub remove_if0_code {
+        my ($codeRef, $fileName)  = @_;
+
+        my ($preprocRegEx) = qr {
+                                    (                                    # $1 [complete line)
+                                        ^
+                                        (?:                              # non-capturing
+                                            \s* \# \s*
+                                            (if \s 0| if | else | endif) # $2 (only if #...)
+                                        ) ?
+                                        .*
+                                        $
+                                    )
+                            }xom;
+
+        ($if_lvl, $if0_lvl, $if0) = (0,0,0);
+        $$codeRef =~ s{ $preprocRegEx }{patsub($1,$2)}xegm;
+
+        ($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n";
+        return $codeRef;
+    }
+
+    sub patsub {
+        if ($debug == 99) {
+            print "-->$_[0]\n";
+            (defined $_[1]) && print "  >$_[1]<\n";
+        }
+
+        # #if/#if 0/#else/#ndif processing
+        if (defined $_[1]) {
+            my ($if) = $_[1];
+            if ($if eq 'if') {
+                $if_lvl += 1;
+            } elsif ($if eq 'if 0') {
+                $if_lvl += 1;
+                if ($if0_lvl == 0) {
+                    $if0_lvl = $if_lvl;
+                    $if0     = 1;  # inside #if 0
+                }
+            } elsif ($if eq 'else') {
+                if ($if0_lvl == $if_lvl) {
+                    $if0 = 0;
+                }
+            } elsif ($if eq 'endif') {
+                if ($if0_lvl == $if_lvl) {
+                    $if0     = 0;
+                    $if0_lvl = 0;
+                }
+                $if_lvl -= 1;
+                if ($if_lvl < 0) {
+                    die "patsub: #if/#endif mismatch"
                 }
-		$debug && print "extern declaration: t_usage for $element$D\n";
-                $type = "t_usage";  # hack: decl is extern so pretend we've seen a usage
-	# Skip function parameter declarations with hf_ names
-	} elsif (/(int\s+?|int\s*?\*\s*?|header_field_info\s+?|header_field_info\s*?\*\s*?|hf_register_info\s+?|hf_register_info\s*?\*\s*?|->\s*?)(hf_\w*)\W(.*)/) {
-		$element = $2;
-		$restofline = $3;
-		$debug && print "Setting skip for $element$D\n";
-		$skip{$element} = 1;
-		# Handle functions with multiple hf_ parameters
-		while ($restofline =~ /(int\s+?|int\s*?\*\s*?|header_field_info\s+?|header_field_info\s*?\*\s*?|hf_register_info\s+?|hf_register_info\s*?\*\s*?|->\s*?)(hf_\w*)\W(.*)/) {
-			$element = $2;
-			$restofline = $3;
-			$debug && print "Setting skip for $element$D\n";
-			$skip{$element} = 1;
-		}
-		next;
-	} elsif ($brace == 1 && /^\s*&\s*(hf_\w*)\W+/) {
-		$element = $1;
-		$debug && print "t_array1 for $element$D\n";
-		$type = "t_array";
-	} elsif (/^\s*\{\s*?&\s*?(hf_\w*)\W+/) {
-		$element = $1;
-		$debug && print "t_array2 for $element$D\n";
-		$type = "t_array";
-	# Order matters: catch all remaining hf_ lines
-	} elsif (/\w*\s*=\s*(hf_\w*)\s*;/) {
-		# Catches: variable = hf_*
-		# But we don't check if the variable actually gets used...
-		$element = $1;
-		$debug && print "in variable usage for $element$D\n";
-		next if ($skip{$element});
-		$debug && print "set t_usage for $element$D\n";
-		$type = "t_usage";
-	} elsif (/\W(hf_\w*)\W*/) {
-		$element = $1;
-		$debug && print "in usage for $element$D\n";
-		next if ($skip{$element});
-		$debug && print "set t_usage for $element$D\n";
-		$type = "t_usage";
-	} else {
-		# Line with only a {
-		if (/^\s+\{\s*$/) {
-			$brace = 1;
-		} else {
-			$brace = 0;
-			$debug && print "else: $D\n";
-		}
-		next;
-	}
-
-	# Get current state
-	if (!defined($elements{$element})) {
-		$state = "s_unknown";
-	} else {
-		$state = $elements{$element};
-	}
-
-	# current state + input ==> new state
-	# we currently ignore t_declaration
-	if ($state eq "s_error") {
-		$newstate = $state;
-	} elsif ($state eq "s_unknown" && $type eq "t_usage") {
-			$newstate = "s_used";
-	} elsif ($state eq "s_unknown" && $type eq "t_array") {
-			$newstate = "s_array";
-	} elsif ($state eq "s_used" && $type eq "t_array") {
-			$newstate = "s_usedarray";
-	} elsif ($state eq "s_array" && $type eq "t_usage") {
-			$newstate = "s_usedarray";
-	} else {
-		$newstate = $state;
-	}
-	$elements{$element} = $newstate;
-	$debug>=2 && print "$currfile, $element: SET $state + $type => $newstate$D\n";
+            }
+            return $_[0];  # don't remove preprocessor lines themselves
+        }
+
+        # not preprocessor line: See if under #if 0: If so, remove
+        if ($if0 == 1) {
+            return '';  # remove
+        }
+        return $_[0];
+    }
+}  # block end
+
+# ---------------------------------------------------------------------
+# action:  Create a hash containing an entry (hf_... => 1) for each
+#             'static g?int hf_...' definition (including array names)
+#             in the input string.
+#          Remove each definition found from the input string.
+# args:    codeRef, fileName
+# returns: ref to the hash
+
+sub find_remove_hf_defs {
+    my ($codeRef, $fileName) = @_;
+
+    # Build pattern to match any of the following
+    #  static g?int hf_foo = -1;
+    #  static g?int hf_foo = HF_EMPTY;
+    #  static g?int hf_foo[xxx];
+    #  static g?int hf_foo[xxx] = {
+
+    # p1: 'static g?int hf_foo'
+    my $p1RegEx = qr {
+                          ^
+                          \s*
+                          static
+                          \s+
+                          g?int
+                          \s+
+                          (hf_[a-zA-Z0-9_]+)          # hf_..
+                  }xom;
+
+    # p2a: ' = -1;' or ' = HF_EMPTY;'
+    my  $p2aRegEx = qr {
+                           \s* = \s*
+                           (?:
+                               - \s* 1 | HF_EMPTY
+                           )
+                           \s* ;
+                   }xom;
+
+    # p2b: '[xxx];' or '[xxx] = {'
+    my  $p2bRegEx = qr !
+                           \s* \[ [^]]+ \] \s*
+                           (?:
+                               = \s* \{ | ;
+                           )
+                       !xom;
+
+    my $hfDefRegEx = qr { $p1RegEx (?: $p2aRegEx | $p2bRegEx ) }xom;
+
+    my %hfDefs;
+    while ($$codeRef =~ m{ $hfDefRegEx }xogm) {
+        #print "$1\n";
+        $hfDefs{$1} = 1;
+    }
+    ($debug == 3) && print_hash("VD: $fileName", \%hfDefs); # VariableDefinition
+
+    # remove all
+    $$codeRef =~ s{ $hfDefRegEx }{}xiogm;
+    ($debug == 3) && print "==> After remove hfDefs: code: [$fileName]\n$$codeRef\n===<\n";
+
+    return \%hfDefs;
 }
-&printprevfile();
 
-exit $error;
+# ---------------------------------------------------------------------
+# action:  Create a hash containing an entry (hf_...) for each hf[] entry.
+#          Remove each hf[] entry found from the input string.
+# args:    codeRef, fileName
+# returns: ref to the hfArrayEntry hash
 
-__END__
+sub find_remove_hf_array_entries {
+    my ($codeRef, $fileName) = @_;
+
+#    hf[] entry regex (to extract an hf_index_name and associated field type)
+    my $hfArrayEntryRegEx = qr {
+                                   \{
+                                   \s*
+                                   & \s* ( [a-zA-Z0-9_]+ )   # &hf
+                                   (?:
+                                       \s* \[ [^]]+ \]       # optional array ref
+                                   ) ?
+                                   \s* , \s*
+                                   \{ \s*
+                                   .+?                       # Fix: (a bit dangerous)
+                                   \s* , \s*
+                                   (FT_[a-zA-Z0-9_]+)        # field type
+                                   \s* , \s*
+                                   .+?                       # Fix: (also a bit dangerous)
+                                   \s* , \s*
+                                   HFILL ,?                  # HFILL
+                                   \s* \}
+                           }xos;
+
+    # find all the hf[] entries (searching $$codeRef).
+    # Create a hash keyed by the hf_... string
+    my %hfArrayEntry;
+    while ($$codeRef =~ m{ $hfArrayEntryRegEx }xgos) {
+        ($debug == 98) && print "+++ $1 $2\n";
+        $hfArrayEntry{$1} = 1;
+    }
+
+    ($debug == 4) && print_hash("AE: $fileName", \%hfArrayEntry);  # ArrayEntry
+
+    # now remove all
+    $$codeRef =~ s{ $hfArrayEntryRegEx }{}xgois;
+    ($debug == 4) && print "==> After remove hfArrayEntry: code: [$fileName]\n$$codeRef\n===<\n";
+
+    return \%hfArrayEntry;
+}
+
+# ---------------------------------------------------------------------
+# action: create hash of all hf_... strings remaining in input string.
+# arga:   codeRef, fileName
+# return: ref to hfUsage hash
+#
+# The hash will include *all* strings of form hf_...
+#   which are in the input string (even strings which
+#   aren't actually vars).
+#   We don't care since we'll be checking only
+#   known valid vars against these strings.
+
+sub find_hf_usage {
+    my ($codeRef, $fileName) = @_;
+
+    my $hfUsageRegEx = qr {
+                              \b ( hf_[a-zA-Z0-9_]+ )      # hf_...
+                      }ox;
+    my %hfUsage;
+
+    while ($$codeRef =~ m{ $hfUsageRegEx }xog) {
+        #print "$1\n";
+        $hfUsage{$1} += 1;
+    }
+
+    ($debug == 5) && print_hash("VU: $fileName", \%hfUsage); # VariableUsage
+
+    return \%hfUsage;
+}
+
+# ---------------------------------------------------------------------
+sub print_hash {
+    my ($title, $HRef) = @_;
+
+    ##print "==> $title\n";
+    for my $k (sort keys %$HRef) {
+        printf "%-40.40s %s\n", $title, $k;
+    }
+}
author	Bill Meier <wmeier@newsguy.com>	2013-01-28 22:23:32 +0000
committer	Bill Meier <wmeier@newsguy.com>	2013-01-28 22:23:32 +0000
commit	d1ae197ae4f645538752c9ee4bf85023be71eeda (patch)
tree	40bf802a399a0ca0dfb359186ef071e2302849e3 /tools/checkhf.pl
parent	7540ccc0ed1fbb3f9d58920cd57068328a3a863f (diff)
download	wireshark-d1ae197ae4f645538752c9ee4bf85023be71eeda.tar.gz