From f1751ef22fc9877e59fad3b1c60826bff8569d94 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sat, 8 Apr 2017 19:34:24 +0200 Subject: checkhf: handle C++-style comments "tools/checkhf.pl epan/dissectors/packet-umts_fp.c" gave this error: Complex regular subexpression recursion limit (32766) exceeded at tools/checkhf.pl line 273. This is caused by the comment "// ... Indicator's ..." which failed to match the single-quoted string and caused "catastrophic backtracking". Solution to fix this case is to disallow unescaped newlines (add "\n" to the negated character class). Additionally, add logic to remove C++-style comments for completeness. Tested against all epan/dissectors/packet-*.c files, the only difference in output is this umts_fp case, the running time has not regressed. Change-Id: I7b43e01e1580acf831c0485513135b613bb02e8b Reported-by: Joerg Mayer Reviewed-on: https://code.wireshark.org/review/20965 Petri-Dish: Peter Wu Reviewed-by: Michael Mann --- tools/checkhf.pl | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/checkhf.pl b/tools/checkhf.pl index 93bcb316a5..5059374474 100755 --- a/tools/checkhf.pl +++ b/tools/checkhf.pl @@ -235,6 +235,18 @@ sub remove_blank_lines { return; } +sub get_quoted_str_regex { + # A regex which matches double-quoted strings. + # 's' modifier added so that strings containing a 'line continuation' + # ( \ followed by a new-line) will match. + my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\\n])* ["]) }xmso; + + # A regex which matches single-quoted strings. + my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\\n])* [']) }xmso; + + return qr{ $double_quoted_str | $single_quoted_str }xmso; +} + # ------------ # action: remove comments from input string # arg: code_ref, filename @@ -250,6 +262,13 @@ sub remove_comments { ${$code_ref} =~ s{ $c_comment_regex } {}xmsog; + # Remove single-line C++-style comments. Be careful not to break up strings + # like "coap://", so match double quoted strings, single quoted characters, + # division operator and other characters before the actual "//" comment. + my $quoted_str = get_quoted_str_regex(); + my $cpp_comment_regex = qr{ ^((?: $quoted_str | /(?!/) | [^'"/\n] )*) // .*$ }xm; + ${$code_ref} =~ s{ $cpp_comment_regex } { $1 }xmg; + ($debug == 1) && print "==> After Remove Comments: code: [$filename]\n${$code_ref}\n===<\n"; return; @@ -262,15 +281,8 @@ sub remove_comments { sub remove_quoted_strings { my ($code_ref, $filename) = @_; - # A regex which matches double-quoted strings. - # 's' modifier added so that strings containing a 'line continuation' - # ( \ followed by a new-line) will match. - my $double_quoted_str = qr{ (?: ["] (?: \\. | [^\"\\])* ["]) }xmso; - - # A regex which matches single-quoted strings. - my $single_quoted_str = qr{ (?: ['] (?: \\. | [^\'\\])* [']) }xmso; - - ${$code_ref} =~ s{ $double_quoted_str | $single_quoted_str } {}xmsog; + my $quoted_str = get_quoted_str_regex(); + ${$code_ref} =~ s{ $quoted_str } {}xmsog; ($debug == 1) && print "==> After Remove quoted strings: code: [$filename]\n${$code_ref}\n===<\n"; -- cgit v1.2.1