#!/usr/bin/perl -w # # Make-manuf - Creates a file containing ethernet OUIs and their # company IDs. It merges the databases at IEEE with entries in our # template file. Our file in turn contains entries from # http://www.cavebear.com/archive/cavebear/Ethernet/Ethernet.txt # along with our own. # # The script reads the comments at the top of "manuf.tmpl" and writes # them to "manuf". It then joins the manufacturer listing in "manuf.tmpl" # with the listing in "oui.txt", "iab.txt", etc, with the entries in # "manuf.tmpl" taking precedence. # LWP is part of the standard Perl module libwww # As of April 2012 the IEEE content is mostly UTF-8 encoded although some # of the entries feature sequences listed at # http://www.i18nqa.com/debug/utf8-debug.html # As of October 2016 the download links at # http://standards.ieee.org/develop/regauth/ # point to CSV files. We might want to download and parse those in favor # of our current text munging. use Encode; use open ':encoding(utf8)'; eval "require LWP::UserAgent;"; if( $@ ) { die "LWP isn't installed. It is part of the standard Perl\n" . " module libwww. Bailing.\n"; } $agent = LWP::UserAgent->new; $agent->env_proxy; $agent->agent("Wireshark make-manuf"); $template = "manuf.tmpl"; $wkatmpl = "wka.tmpl"; $outfile = "manuf"; $inheader = 1; $oui_url = "http://standards.ieee.org/develop/regauth/oui/oui.txt"; $cid_url = "http://standards.ieee.org/develop/regauth/cid/cid.txt"; $iab_url = "http://standards.ieee.org/develop/regauth/iab/iab.txt"; $oui28_url = "http://standards.ieee.org/develop/regauth/oui28/oui28.txt"; $oui36_url = "http://standards.ieee.org/develop/regauth/oui36/oui36.txt"; %oui_list = (); $hp = "[0-9a-fA-F]{2}"; $oui_re = "$hp:$hp:$hp"; $ieee_re = "$hp-$hp-$hp"; $min_entries = 1000; $min_cid_entries = 30; $min_total = 27000; # 27196 as of 2016-10-02 $tmpl_added = 0; $oui_added = 0; $oui_skipped = 0; $oui_total = 0; $iab_added = 0; $iab_skipped = 0; $iab_total = 0; $oui28_added = 0; $oui28_skipped = 0; $oui28_total = 0; $oui36_added = 0; $oui36_skipped = 0; $oui36_total = 0; $cid_skipped = 0; $cid_total = 0; sub shorten { my $origmanuf = shift; # Trim leading (probably not needed) & trailing (absolutely needed) space. $origmanuf =~ s/^\s+|\s+$//g; my $manuf = " " . $origmanuf . " "; # Remove any punctuation $manuf =~ tr/',.()/ /; # & isn't needed when Standalone $manuf =~ s/ \& / /g; # Remove any "the", "inc", "plc" ... $manuf =~ s/\s(the|inc|incorporated|plc||systems|corp|corporation|s\/a|a\/s|ab|ag|kg|gmbh|co|company|limited|ltd|holding|spa)(?= )//gi; # Convert to consistent case $manuf =~ s/(\w+)/\u\L$1/g; # Remove all spaces $manuf =~ s/\s+//g; # Truncate all names to a reasonable length, say, 8 characters. # If the string contains UTF-8, this may be substantially more than 8 bytes. $manuf = substr($manuf, 0, 8); if ($manuf =~ /\Q$origmanuf\E/i) { return $manuf; } else { return sprintf("%-22s # %s", $manuf, $origmanuf); } } sub fetch { my $url = shift; print "Fetching $url.\n"; $request = HTTP::Request->new(GET => $url); $result = $agent->request($request); if (!$result->is_success) { die ("Error fetching $url: " . $result->status_line . "\n"); } return decode("utf-8", $result->content); } # Write out the header and populate the OUI list with our entries. open (TMPL, "< $template") || die "Couldn't open template file for reading ($template)\n"; while ($line = ) { chomp($line); if ($line !~ /^$oui_re\s+\S/ && $inheader) { $header .= "$line\n"; } elsif (($oui, $manuf) = ($line =~ /^($oui_re)\s+(\S.*)$/)) { $inheader = 0; # Ensure OUI is all upper-case $oui =~ tr/a-f/A-F/; # $oui_list{$oui} = &shorten($manuf); $oui_list{$oui} = $manuf; $tmpl_added++; } } # Add IEEE entries for IABs $ieee_list = fetch($iab_url); foreach $line (split(/[\r\n]+/, $ieee_list)) { # determine the OUI used for IAB (currently only 00-50-C2) if (($iab_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) { $iab_base = $iab_tmp; } # determine next two bytes if (($iab4, $iab5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) { $iab = "$iab_base:$iab4:$iab5:00/36"; $iab =~ tr /-/:/; # The IEEE bytes are separated by dashes. # Ensure IAB is all upper-case $iab =~ tr/a-f/A-F/; if (exists $oui_list{$iab}) { printf "$iab - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$iab}\"\n"; $iab_skipped++; } else { $oui_list{$iab} = &shorten($manuf); $iab_added++; } } $iab_total++; } if ($iab_total < $min_entries) { die "Too few IAB entries ($iab_total)\n"; } # Add IEEE entries for OUI-28 $ieee_list = fetch($oui28_url); foreach $line (split(/[\r\n]+/, $ieee_list)) { chomp($line); # determine the OUI used for OUI-28 if (($oui28_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) { $oui28_base = $oui28_tmp; } # determine next two bytes if (($oui28_4, $oui28_5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) { $oui28 = "$oui28_base:$oui28_4:$oui28_5:00/28"; $oui28 =~ tr /-/:/; # The IEEE bytes are separated by dashes. # Ensure OUI-28 is all upper-case $oui28 =~ tr/a-f/A-F/; if (exists $oui_list{$oui28}) { printf "$oui28 - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui28}\"\n"; $oui28_skipped++; } else { $oui_list{$oui28} = &shorten($manuf); $oui28_added++; } } $oui28_total++; } if ($oui28_total < $min_entries) { die "Too few OUI-28 entries ($oui28_total)\n"; } # Add IEEE entries for OUI-36 $ieee_list = fetch($oui36_url); foreach $line (split(/[\r\n]+/, $ieee_list)) { chomp($line); # determine the OUI used for OUI-36 (currently only 00-1B-C5) if (($oui36_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) { $oui36_base = $oui36_tmp; } # determine next two bytes if (($oui36_4, $oui36_5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) { $oui36 = "$oui36_base:$oui36_4:$oui36_5:00/36"; $oui36 =~ tr /-/:/; # The IEEE bytes are separated by dashes. # Ensure OUI-36 is all upper-case $oui36 =~ tr/a-f/A-F/; if (exists $oui_list{$oui36}) { printf "$oui36 - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui36}\"\n"; $oui36_skipped++; } else { $oui_list{$oui36} = &shorten($manuf); $oui36_added++; } } $oui36_total++; } if ($oui36_total < $min_entries) { die "Too few OUI-36 entries ($oui36_total)\n"; } # Add IEEE entries for CIDs. $ieee_list = fetch($cid_url); foreach $line (split(/[\r\n]+/, $ieee_list)) { if (($cid, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) { $cid =~ tr /-/:/; # The IEEE bytes are separated by dashes. # Ensure OUI is all upper-case $cid =~ tr/a-f/A-F/; if (exists $oui_list{$cid}) { printf "$cid - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$cid}\"\n"; $cid_skipped++; } else { $oui_list{$cid} = &shorten($manuf); $cid_added++; } } $cid_total++; } if ($cid_total < $min_cid_entries) { die "Too few CID entries ($cid_total)\n"; } # Add IEEE entries for OUIs not yet known. $ieee_list = fetch($oui_url); foreach $line (split(/[\r\n]+/, $ieee_list)) { if (($oui, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) { $oui =~ tr /-/:/; # The IEEE bytes are separated by dashes. # Ensure OUI is all upper-case $oui =~ tr/a-f/A-F/; if (exists $oui_list{$oui}) { printf "$oui - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui}\"\n"; $oui_skipped++; } else { $oui_list{$oui} = &shorten($manuf); $oui_added++; } } $oui_total++; } if ($oui_total < $min_entries) { die "Too few OUI entries ($oui_total)\n"; } $total_added = $tmpl_added + $oui_added + $iab_added + $oui36_added + $cid_added; if ($total_added < $min_total) { die "Too few total entries ($total_added)\n"; } # Write output file open (OUT, "> $outfile") || die "Couldn't open output file for writing ($outfile)\n"; print(OUT "# This file was generated by running ./make-manuf.\n"); print(OUT "# Don't change it directly, change manuf.tmpl and wka.tmpl instead.\n#\n"); print(OUT "$header"); foreach $oui (sort(keys %oui_list)) { print(OUT "$oui\t$oui_list{$oui}\n"); } # Write out a blank line separating the OUIs from the well-known # addresses, and then read the well-known address template file # and write it to the manuf file. open (WKATMPL, "< $wkatmpl") || die "Couldn't open well-known address template file for reading ($wkatmpl)\n"; # XXX - it'd be nice to get this from the Cavebear file, but inferring # the address mask from entries in that file involves some work. # print(OUT "\n"); while ($line = ) { chomp($line); print(OUT "$line\n"); } print <<"Fin" Original entries : $tmpl_added IEEE OUI added : $oui_added IEEE IAB added : $iab_added IEEE OUI28 added : $oui28_added IEEE OUI36 added : $oui36_added IEEE CID added : $cid_added Total added : $total_added IEEE OUI total : $oui_total IEEE IAB total : $iab_total IEEE OUI28 total : $oui28_total IEEE OUI36 total : $oui36_total IEEE CID total : $cid_total IEEE OUI skipped : $oui_skipped IEEE IAB skipped : $iab_skipped IEEE OUI28 skipped : $oui28_skipped IEEE OUI36 skipped : $oui36_skipped IEEE CID skipped : $cid_skipped Fin