summaryrefslogtreecommitdiff
path: root/tools/fix-encoding-args.pl
blob: 591a4b97c02f85b14fed1c081b59697f7d50a5bd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
#!/usr/bin/perl -w
#
# Copyright 2011, William Meier <wmeier[AT]newsguy.com>
#
# A program to fix proto_tree_add_item() encoding args from TRUE/FALSE to ENC_?? as appropriate (and possible)
#
#
# $Id$
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#

use strict;
use Getopt::Long;

# Conversion "Requests"

# Standard conversions
my $searchReplaceFalseTrueHRef =
  {
   "FALSE"              => "ENC_BIG_ENDIAN",
   "0"                  => "ENC_BIG_ENDIAN",
   "TRUE"               => "ENC_LITTLE_ENDIAN",
   "1"                  => "ENC_LITTLE_ENDIAN"
  };

my $searchReplaceEncNAHRef =
   {
    "FALSE"             => "ENC_NA",
    "0"                 => "ENC_NA",
    "TRUE"              => "ENC_NA",
    "1"                 => "ENC_NA",
    "ENC_LITTLE_ENDIAN" => "ENC_NA",
    "ENC_BIG_ENDIAN"    => "ENC_NA"
   };


# Conversion "request" structure
# (
#   [ <list of field types for which this conversion request applies> ],
#   { <hash of desired encoding arg conversions> }
# }

my @types_NA  =
  (
   [ qw (FT_NONE FT_BYTES FT_ETHER FT_IPv6 FT_IPXNET FT_OID)],
   $searchReplaceEncNAHRef
  );

my @types_INT =
  (
   [ qw (FT_UINT8 FT_UINT16 FT_UINT24 FT_UINT32 FT_UINT64 FT_INT8
         FT_INT16 FT_INT24 FT_INT32 FT_INT64 FT_FLOAT FT_DOUBLE)],
   $searchReplaceFalseTrueHRef
  );

my @types_MISC =
  (
   [ qw (FT_BOOLEAN FT_IPv4 FT_GUID FT_EUI64)],
   $searchReplaceFalseTrueHRef
  );

my @types_STRING =
  (
   [qw (FT_STRING FT_STRINGZ)],
   {
    "FALSE"                        => "ENC_ASCII|ENC_NA",
    "0"                            => "ENC_ASCII|ENC_NA",
    "TRUE"                         => "ENC_ASCII|ENC_NA",
    "1"                            => "ENC_ASCII|ENC_NA",
    "ENC_LITTLE_ENDIAN"            => "ENC_ASCII|ENC_NA",
    "ENC_BIG_ENDIAN"               => "ENC_ASCII|ENC_NA",
    "ENC_NA"                       => "ENC_ASCII|ENC_NA",

    "ENC_ASCII"                    => "ENC_ASCII|ENC_NA",
    "ENC_ASCII|ENC_LITTLE_ENDIAN"  => "ENC_ASCII|ENC_NA",
    "ENC_ASCII|ENC_BIG_ENDIAN"     => "ENC_ASCII|ENC_NA",

    "ENC_UTF_8"                    => "ENC_UTF_8|ENC_NA",
    "ENC_UTF_8|ENC_LITTLE_ENDIAN"  => "ENC_UTF_8|ENC_NA",
    "ENC_UTF_8|ENC_BIG_ENDIAN"     => "ENC_UTF_8|ENC_NA",

    "ENC_EBCDIC"                   => "ENC_EBCDIC|ENC_NA",
    "ENC_EBCDIC|ENC_LITTLE_ENDIAN" => "ENC_EBCDIC|ENC_NA",
    "ENC_EBCDIC|ENC_BIG_ENDIAN"    => "ENC_EBCDIC|ENC_NA",
   }
  );

my @types_UINT_STRING =
  (
   [qw (FT_UINT_STRING)],
   {
    "FALSE"                   => "ENC_ASCII|ENC_BIG_ENDIAN",
    "0"                       => "ENC_ASCII|ENC_BIG_ENDIAN",
    "TRUE"                    => "ENC_ASCII|ENC_LITTLE_ENDIAN",
    "1"                       => "ENC_ASCII|ENC_LITTLE_ENDIAN",
    "ENC_BIG_ENDIAN"          => "ENC_ASCII|ENC_BIG_ENDIAN",
    "ENC_LITTLE_ENDIAN"       => "ENC_ASCII|ENC_LITTLE_ENDIAN",
   }
  );

my @types_REG_PROTO  =
  (
   [ qw (REG_PROTO)],
   $searchReplaceEncNAHRef
  );

# For searching with no substitutions
my @types_TIME =  (
                    [qw (FT_ABSOLUTE_TIME FT_RELATIVE_TIME)],
                    {}
                   );

my @types_ALL =
  (
   [qw (
           FT_NONE
           FT_PROTOCOL
           FT_BOOLEAN
           FT_UINT8
           FT_UINT16
           FT_UINT24
           FT_UINT32
           FT_UINT64
           FT_INT8
           FT_INT16
           FT_INT24
           FT_INT32
           FT_INT64
           FT_FLOAT
           FT_DOUBLE
           FT_ABSOLUTE_TIME
           FT_RELATIVE_TIME
           FT_STRING
           FT_STRINGZ
           FT_UINT_STRING
           FT_ETHER
           FT_BYTES
           FT_UINT_BYTES
           FT_IPv4
           FT_IPv6
           FT_IPXNET
           FT_FRAMENUM
           FT_PCRE
           FT_GUID
           FT_OID
           FT_EUI64
      )],
   {# valid encoding args
    "a"=>"ENC_NA",
    "b"=>"ENC_LITTLE_ENDIAN",
    "c"=>"ENC_BIG_ENDIAN",

    "d"=>"ENC_ASCII|ENC_NA",
    "e"=>"ENC_ASCII|ENC_LITTLE_ENDIAN",
    "f"=>"ENC_ASCII|ENC_BIG_ENDIAN",

    "g"=>"ENC_UTF_8|ENC_NA",
    "h"=>"ENC_UTF_8|ENC_LITTLE_ENDIAN",
    "i"=>"ENC_UTF_8|ENC_BIG_ENDIAN",

    "j"=>"ENC_EBCDIC|ENC_NA",
    "k"=>"ENC_EBCDIC|ENC_LITTLE_ENDIAN",
    "l"=>"ENC_EBCDIC|ENC_BIG_ENDIAN",
   }
  );

#
# MAIN
#
my $writeFlag = '';
my $helpFlag  = '';

my $result = GetOptions(
                        'write'   => \$writeFlag,
                        'help|?'  => \$helpFlag
			);

if (!$result || $helpFlag || !$ARGV[0]) {
	print "\nUsage: $0 [--write] FILENAME [...]\n\n";
        print "  Fix proto_tree_add_item() encoding arg when possible in file(s)\n";
        print "  Fixes (if any) are listed on stdout)\n\n";
        print "  --write     create FILENAME.encoding-arg-fixes (original file with fixes)\n";
	exit(1);
}

# Read through the files; fix up encoding parameter of proto_tree_add_item() calls
# Essentially:
#  For each file {
#  .  Create a hash of the hf_index_names & associated field types from the entries in hf[]
#  .  For each requested "conversion request" {
#  .  .  For each hf[] entry hf_index_name with a field type in a set of specified field types {
#  .  .  .  For each proto_tree_add_item() statement
#  .  .  .  .  - replace encoding arg in proto_tree_add_item(..., hf_index_name, ..., 'encoding-arg')
#                  specific values ith new values
#  .  .  .  .  - print the statement showing the change
#  .  .  .  }
#  .  .  }
#  .  }
#  .  If requested and if replacements done: write new file "orig-filename.encoding-arg-fixes"
#  }
#
# Note: The proto_tree_add_item() encoding arg will be converted only if
#        the hf_index_name referenced is in one of the entries in hf[] in the same file

while (my $fileName = $ARGV[0]) {
    shift;
    my $fileContents = '';

    die "No such file: \"$fileName\"\n" if (! -e $fileName);

    # delete leading './'
    $fileName =~ s{ ^ \. / } {}xo;

    # Read in the file (ouch, but it's easier that way)
    open(FCI, "<", $fileName) || die("Couldn't open $fileName");
    while (<FCI>) {
        $fileContents .= $_;
    }
    close(FCI);

    # Create a hash of the hf[] entries (name_index_name=>field_type)
    my $hfArrayEntryFieldTypeHRef = find_hf_array_entries(\$fileContents, $fileName);

    my $found = 0;

    # Find and replace: alters proto_tree_add_item() encoding arg in $fileContents for:
    #     - hf[] entries with specified field types;
    #     - 'proto' as returned from proto_register_protocol()
    $found += fix_encoding_args(1, \@types_NA,          \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
    $found += fix_encoding_args(1, \@types_INT,         \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
    $found += fix_encoding_args(1, \@types_MISC,        \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
    $found += fix_encoding_args(1, \@types_STRING,      \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
    $found += fix_encoding_args(1, \@types_UINT_STRING, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
    $found += fix_encoding_args(1, \@types_REG_PROTO,   \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);

    # If desired and if any changes, write out the changed version to a file
    if (($writeFlag) && ($found > 0)) {
        open(FCO, ">", $fileName . ".encoding-arg-fixes");
#        open(FCO, ">", $fileName );
        print FCO "$fileContents";
        close(FCO);
    }
    exit $found;

# Optional searches:
# search for (and output) proto_tree_add_item() statements with invalid encoding arg for specified field types
#    fix_encoding_args(2, \@types_NA,          \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#    fix_encoding_args(2, \@types_INT,         \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#    fix_encoding_args(2, \@types_MISC,        \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#    fix_encoding_args(2, \@types_STRING,      \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#    fix_encoding_args(2, \@types_UINT_STRING, \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#    fix_encoding_args(2, \@types_ALL,         \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
# search for (and output) proto_tree_add_item() statements with any encoding arg for specified field types
#    fix_encoding_args(3, \@types_TIME,        \$fileContents, $hfArrayEntryFieldTypeHRef, $fileName);
#
# Find all proto_tree_add_item() statements
#  and output same highlighting the encoding arg
#    find_all(\$fileContents, $fileName);

}

#==================================================================================

# Create a hash containing an entry (hf_index_name => field_type) for each hf[]entry.
# also: create an entry in the hash for the 'protocol name' variable (proto... => FT_PROTOCOL)
# returns: ref to the hash

sub find_hf_array_entries {
    my ($fileContentsRef, $fileName) = @_;

    # The below Regexp is based on one from:
    # http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
    # It is in the public domain.
    # A complicated regex which matches C-style comments.
    my $CCommentRegEx = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xo;

    # hf[] entry regex (to extract an hf_index_name and associated field type)
    my $hfArrayFieldTypeRegEx = qr {
                                       \{
                                       \s*
                                       &\s*([A-Z0-9_\[\]-]+)                # &hf
                                       \s*,\s*
                                       \{\s*
                                       .+?                                  # (a bit dangerous)
                                       \s*,\s*
                                       (FT_[A-Z0-9_]+)                      # field type
                                       \s*,\s*
                                       .+?
                                       \s*,\s*
                                       HFILL                                # HFILL
                               }xios;

    # create a copy of $fileContents with comments removed
    my $fileContentsWithoutComments = $$fileContentsRef;
    $fileContentsWithoutComments =~ s {$CCommentRegEx} []xg;

    # find all the hf[] entries (searching $fileContentsWithoutComments).
    # Create a hash keyed by the hf_index_name with the associated value being the field_type
    my %hfArrayEntryFieldType;
    while ($fileContentsWithoutComments =~ m{ $hfArrayFieldTypeRegEx }xgis) {
#        print "$1 $2\n";
        if (exists $hfArrayEntryFieldType{$1}) {
            printf "%-35.35s: ? duplicate hf[] entry: no fixes done for: $1; manual action may be req'd\n", $fileName;
            $hfArrayEntryFieldType{$1} = "???"; # prevent any substitutions for this hf_index_name
        } else {
            $hfArrayEntryFieldType{$1} = $2;
        }
    }

    # RegEx to get "proto" variable name
    my $protoRegEx = qr /
                            ^ \s*                     # note m modifier below
                            (
                                [a-zA-Z0-9_]+
                            )
                            \s*
                            =
                            \s*
                            proto_register_protocol
                            \s*
                            \(
                        /xoms;

    # Find all registered protocols
    while ($fileContentsWithoutComments =~ m { $protoRegEx }xgioms ) {
        ##print "$1\n";
        if (exists $hfArrayEntryFieldType{$1}) {
            printf "%-35.35s: ? duplicate 'proto': no fixes done for: $1; manual action may be req'd\n", $fileName;
            $hfArrayEntryFieldType{$1} = "???"; # prevent any substitutions for this protocol
        } else {
            $hfArrayEntryFieldType{$1} = "REG_PROTO";
        }
    }

    return \%hfArrayEntryFieldType;
}

{  # block begin

# shared variables
    my $fileName;
    my $searchReplaceHRef;
    my $found;
    my $hf_field_type;

# Substitute new values for certain proto_tree_add_item() encoding arg values (for specified hf field types)
#  Variants: search for and display for "exceptions" to allowed encoding arg values;
#            search for and display all encoding arg values
# args:
#   substitute_flag: 1: replace specified encoding arg values by a new value (keys/values in search hash);
#                    2: search for "exceptions" to allowed encoding arg values (values in search hash);
#                    3: search for all encoding arg values
#   ref to array containing two elements:
#      - ref to array containing hf[] types to be processed (FT_STRING, etc)
#      - ref to hash containing search (keys) and replacement (values) for encoding arg
#   ref to hfArrayEntries hash (key: hf name; value: field type)
#   ref to string containing file contents
#   filename

    sub fix_encoding_args {

        (my $subFlag, my $mapArg, my $fileContentsRef, my $hfArrayEntryFieldTypeHRef, $fileName) = @_;

        my $hf_index_name;
        my $hfTypesARef;
        my $encArgPat;

        $hfTypesARef       = $$mapArg[0];
        $searchReplaceHRef = $$mapArg[1];

        my %hfTypes;
        @hfTypes{@$hfTypesARef}=();

        # set up the encoding arg match pattern
        if ($subFlag == 1) {
            # just match for proto_tree_add_item() statements which have an encoding arg matching one of the
            #   keys in the searchReplace hash.
            # Escape any "|" characters in the keys
            #  and then create "alternatives" string containing all the values (A|B|C\|D|...)
            $encArgPat = join "|",  map { s{ ( \| ) }{\\$1}gx; $_ } keys %$searchReplaceHRef;
        } elsif ($subFlag == 2) {
            # Find all the proto_tree_add_item statements wherein the encoding arg is a value other than
            #      one of the "replace" values.
            #  Uses zero-length negative-lookahead to find proto_tree_add_item statements for which the encoding
            #    arg is something other than one of the the provided replace values.
            # Escape any "|" characters in the values to be matched
            #  and then create "alternatives" string containing all the values (A|B|C\|D|...)
            my $match_str = join "|",  map { s{ ( \| ) }{\\$1}gx; $_ } values %$searchReplaceHRef;
            $encArgPat = qr /
                                (?!                  # negative zero-length look-ahead
                                    \s*
                                    (?: $match_str ) # alternatives we don't want to match
                                    \s*
                                )
                                [^,)]+?              # OK: enoding arg is other than one of the alternatives:
                                                     #   match to end of the arg
                            /x;
        } elsif ($subFlag == 3) {
            # match for proto_tree_add_item statements for any value of the encoding parameter
            # IOW: find all the proto_tree_add_item statements with an hf entry of the desired types
            $encArgPat = qr / [^,)]+? /x;
        }

        # For each hf[] entry which matches a type in %hfTypes do replacements
        $found = 0;
        foreach my $key (keys %$hfArrayEntryFieldTypeHRef) {
            $hf_index_name = $key;
            $hf_index_name =~ s{ ( \[ | \] ) }{\\$1}xg;     # escape any "[" or "]" characters
            $hf_field_type = $$hfArrayEntryFieldTypeHRef{$key};
            ##printf "--> %-35.35s: %s\n", $hf_index_name,  $hf_field_type;

            next unless exists $hfTypes{$hf_field_type};    # Do we want to process for this hf[] entry type ?

            # build the complete pattern
            my $patRegEx = qr /
                                  ( # part 1: $1
                                      proto_tree_add_item \s* \(
                                      [^;]+?
                                      ,\s*
                                      $hf_index_name
                                      \s*,
                                      [^;]+
                                      ,\s*
                                  )
                                  ( # part 2: $2
                                      $encArgPat
                                  )
                                  ( # part 3: $3
                                      \s* \)
                                      \s* ;
                                  )
                              /xs;

            ##print "\n$hf_index_name $hf_field_type\n";

            ## Match and substitute as specified
            $$fileContentsRef =~ s/ $patRegEx /patsub($1,$2,$3)/xges;

        }

        return $found;
    }

    # Called from fix_encoding_args to determine replacement string when a regex match is encountered
    #  $_[0]: part 1
    #  $_[1]: part 2: encoding arg
    #  $_[2]: part 3
    #  lookup the desired replacement value for the encoding arg
    #  print match string showing and highlighting the encoding arg replacement
    #  return "replacement" string
    sub patsub {
        $found += 1;
        my $substr = exists $$searchReplaceHRef{$_[1]} ? $$searchReplaceHRef{$_[1]} : "???";
        my $str = sprintf("%s[[%s]-->[%s]]%s", $_[0], $_[1], $substr,  $_[2]);
        $str =~ tr/\t\n\r/ /d;
        printf "%s:  %-17.17s $str\n", $fileName, $hf_field_type . ":";
        return $_[0] . $substr . $_[2];
    }
}  # block end


# Find all proto_tree_add_item() statements
#  and output same highlighting the encoding arg
sub find_all {
    my( $fileContentsRef, $fileName) = @_;

    my $pat = qr /
                     (
                         proto_tree_add_item \s* \(
                         [^;]+
                         , \s*
                     )
                     (
                         [^ \t,)]+?
                     )
                     (
                         \s* \)
                         \s* ;
                     )
                 /xs;

    while ($$fileContentsRef =~ / $pat /xgso) {
        my $str = "${1}[[${2}]]${3}\n";
        $str =~ tr/\t\n\r/ /d;
        $str =~ s/ \s+ / /xg;
        print "$fileName: $str\n";
    }
}