From bb0c17546f8caf817f767ab48b12c66572da4f08 Mon Sep 17 00:00:00 2001
From: Guy Harris <guy@alum.mit.edu>
Date: Sun, 24 Aug 2014 01:06:35 -0700
Subject: Strengthen the heuristics for pcap subtypes.

When trying to guess what type of capture a file is, look for as many
bogosities (caplen > len, microseconds >= 10^6/nanoseconds >= 10^9,
too-high caplen, too-high original len, caplen > snapshort length), to
increase the chances of guessing correctly.

(Every time somebody uses 0xa1b2c3d4 as the magic number for a capture
file that isn't standard pcap format, God kills a kitten.  Please, think
of the kittens.)

Change-Id: I3f397d598ed61dc82e2832be30452ebe8ace98e8
Reviewed-on: https://code.wireshark.org/review/3808
Reviewed-by: Guy Harris <guy@alum.mit.edu>
---
 wiretap/libpcap.c | 431 ++++++++++++++++++++++++++----------------------------
 1 file changed, 205 insertions(+), 226 deletions(-)

(limited to 'wiretap/libpcap.c')

diff --git a/wiretap/libpcap.c b/wiretap/libpcap.c
index 4d3c8a5224..dfab314a24 100644
--- a/wiretap/libpcap.c
+++ b/wiretap/libpcap.c
@@ -56,24 +56,20 @@ typedef struct {
 #endif
 
 /* Try to read the first two records of the capture file. */
-typedef enum {
-	THIS_FORMAT,		/* the reads succeeded, assume it's this format */
-	BAD_READ,		/* the file is probably not valid */
-	OTHER_FORMAT		/* the file may be valid, but not in this format */
-} libpcap_try_t;
-static libpcap_try_t libpcap_try(wtap *wth, int *err);
+static int libpcap_try(wtap *wth, int *err);
+static int libpcap_try_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
+    struct pcaprec_ss990915_hdr *hdr);
 
 static gboolean libpcap_read(wtap *wth, int *err, gchar **err_info,
     gint64 *data_offset);
 static gboolean libpcap_seek_read(wtap *wth, gint64 seek_off,
     struct wtap_pkthdr *phdr, Buffer *buf, int *err, gchar **err_info);
-static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
-    struct pcaprec_ss990915_hdr *hdr);
-static void adjust_header(wtap *wth, struct pcaprec_hdr *hdr);
 static gboolean libpcap_read_packet(wtap *wth, FILE_T fh,
     struct wtap_pkthdr *phdr, Buffer *buf, int *err, gchar **err_info);
 static gboolean libpcap_dump(wtap_dumper *wdh, const struct wtap_pkthdr *phdr,
     const guint8 *pd, int *err);
+static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
+    struct pcaprec_ss990915_hdr *hdr);
 
 int libpcap_open(wtap *wth, int *err, gchar **err_info)
 {
@@ -86,6 +82,28 @@ int libpcap_open(wtap *wth, int *err, gchar **err_info)
 	int file_encap;
 	gint64 first_packet_offset;
 	libpcap_t *libpcap;
+	static const int subtypes_modified[] = {
+		WTAP_FILE_TYPE_SUBTYPE_PCAP_SS991029,
+		WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990915
+	};
+#define N_SUBTYPES_MODIFIED	G_N_ELEMENTS(subtypes_modified)
+	static const int subtypes_standard[] = {
+		WTAP_FILE_TYPE_SUBTYPE_PCAP,
+		WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990417,
+		WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA
+	};
+#define N_SUBTYPES_STANDARD	G_N_ELEMENTS(subtypes_standard)
+	static const int subtypes_nsec[] = {
+		WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC
+	};
+#define N_SUBTYPES_NSEC		G_N_ELEMENTS(subtypes_nsec)
+#define MAX_FIGURES_OF_MERIT \
+	MAX(MAX(N_SUBTYPES_MODIFIED, N_SUBTYPES_STANDARD), N_SUBTYPES_NSEC)
+	int figures_of_merit[MAX_FIGURES_OF_MERIT];
+	const int *subtypes;
+	int n_subtypes;
+	int best_subtype;
+	int i;
 
 	/* Read in the number that should be at the start of a "libpcap" file */
 	errno = WTAP_ERR_CANT_READ;
@@ -348,133 +366,76 @@ int libpcap_open(wtap *wth, int *err, gchar **err_info)
 	if (modified) {
 		/*
 		 * Well, we have the magic number from Alexey's
-		 * later two patches.
-		 *
-		 * Try ss991029, the last of his patches, first.
+		 * later two patches.  Try the subtypes for that.
 		 */
-		wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS991029;
-		first_packet_offset = file_tell(wth->fh);
-		switch (libpcap_try(wth, err)) {
-
-		case BAD_READ:
-			/*
-			 * Well, we couldn't even read it.
-			 * Give up.
-			 */
-			return -1;
-
-		case THIS_FORMAT:
+		subtypes = subtypes_modified;
+		n_subtypes = N_SUBTYPES_MODIFIED;
+	} else {
+		if (wth->tsprecision == WTAP_FILE_TSPREC_NSEC) {
 			/*
-			 * Well, it looks as if it might be 991029.
-			 * Put the seek pointer back, and finish.
-			 */
-			if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
-				return -1;
-			}
-			goto done;
-
-		case OTHER_FORMAT:
+			 * We have nanosecond-format libpcap's magic
+			 * number.  Try the subtypes for that.
+			 */ 
+			subtypes = subtypes_nsec;
+			n_subtypes = N_SUBTYPES_NSEC;
+		} else {
 			/*
-			 * Try the next format.
+			 * We have the regular libpcap magic number.
+			 * Try the subtypes for that.
 			 */
-			break;
+			subtypes = subtypes_standard;
+			n_subtypes = N_SUBTYPES_STANDARD;
 		}
+	}
 
-		/*
-		 * Well, it's not completely unreadable,
-		 * but it's not ss991029.  Try ss990915;
-		 * there are no other types to try after that,
-		 * so we put the seek pointer back and treat
-		 * it as 990915.
-		 */
-		wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990915;
-		if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
-			return -1;
-		}
-	} else {
-		/*
-		 * Well, we have the standard magic number.
-		 *
-		 * Try the standard format first.
-		 */
-		if(wth->tsprecision == WTAP_FILE_TSPREC_NSEC) {
-			wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC;
-		} else {
-			wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP;
-		}
-		first_packet_offset = file_tell(wth->fh);
-		switch (libpcap_try(wth, err)) {
-
-		case BAD_READ:
+	/*
+	 * Try all the subtypes.
+	 */
+	first_packet_offset = file_tell(wth->fh);
+	for (i = 0; i < n_subtypes; i++) {
+		wth->file_type_subtype = subtypes[i];
+		figures_of_merit[i] = libpcap_try(wth, err);
+		if (figures_of_merit[i] == -1) {
 			/*
 			 * Well, we couldn't even read it.
 			 * Give up.
 			 */
 			return -1;
-
-		case THIS_FORMAT:
+		}
+		if (figures_of_merit[i] == 0) {
 			/*
-			 * Well, it looks as if it might be a standard
-			 * libpcap file.
+			 * This format doesn't have any issues.
 			 * Put the seek pointer back, and finish.
 			 */
 			if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
 				return -1;
 			}
 			goto done;
-
-		case OTHER_FORMAT:
-			/*
-			 * Try the next format.
-			 */
-			break;
 		}
 
 		/*
-		 * Well, it's not completely unreadable, but it's not
-		 * a standard file.  Put the seek pointer back and try
-		 * ss990417.
+		 * OK, we've recorded the figure of merit for this one;
+		 * go back to the first packet and try the next one.
 		 */
-		wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_SS990417;
 		if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
 			return -1;
 		}
-		switch (libpcap_try(wth, err)) {
-
-		case BAD_READ:
-			/*
-			 * Well, we couldn't even read it.
-			 * Give up.
-			 */
-			return -1;
-
-		case THIS_FORMAT:
-			/*
-			 * Well, it looks as if it might be ss990417.
-			 * Put the seek pointer back, and finish.
-			 */
-			if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
-				return -1;
-			}
-			goto done;
-
-		case OTHER_FORMAT:
-			/*
-			 * Try the next format.
-			 */
-			break;
-		}
+	}
 
+	/*
+	 * OK, none are perfect; let's see which one is least bad.
+	 */
+	best_subtype = INT_MAX;
+	for (i = 0; i < n_subtypes; i++) {
 		/*
-		 * Well, it's not completely unreadable,
-		 * but it's not a standard file *nor* is it ss990417.
-		 * Try it as a Nokia file; there are no other types
-		 * to try after that, so we put the seek pointer back
-		 * and treat it as a Nokia file.
+		 * Is this subtype better than the last one we saw?
 		 */
-		wth->file_type_subtype = WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA;
-		if (file_seek(wth->fh, first_packet_offset, SEEK_SET, err) == -1) {
-			return -1;
+		if (figures_of_merit[i] < best_subtype) {
+			/*
+			 * Yes.  Choose it until we find a better one.
+			 */
+			wth->file_type_subtype = subtypes[i];
+			best_subtype = figures_of_merit[i];
 		}
 	}
 
@@ -488,7 +449,8 @@ done:
 	 * If this is a Nokia capture, treat 13 as WTAP_ENCAP_ATM_PDUS,
 	 * rather than as what we normally treat it.
 	 */
-	if (wth->file_type_subtype == WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA && hdr.network == 13)
+	if (wth->file_type_subtype == WTAP_FILE_TYPE_SUBTYPE_PCAP_NOKIA &&
+	    hdr.network == 13)
 		wth->file_encap = WTAP_ENCAP_ATM_PDUS;
 
 	if (wth->file_encap == WTAP_ENCAP_ERF) {
@@ -502,8 +464,10 @@ done:
 }
 
 /* Try to read the first two records of the capture file. */
-static libpcap_try_t libpcap_try(wtap *wth, int *err)
+static int libpcap_try(wtap *wth, int *err)
 {
+	int ret;
+
 	/*
 	 * pcaprec_ss990915_hdr is the largest header type.
 	 */
@@ -513,7 +477,8 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
 	/*
 	 * Attempt to read the first record's header.
 	 */
-	if (libpcap_read_header(wth, wth->fh, err, NULL, &first_rec_hdr) == -1) {
+	ret = libpcap_try_header(wth, wth->fh, err, NULL, &first_rec_hdr);
+	if (ret == -1) {
 		if (*err == 0 || *err == WTAP_ERR_SHORT_READ) {
 			/*
 			 * EOF or short read - assume the file is in this
@@ -522,24 +487,17 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
 			 * they will presumably get the same EOF or short
 			 * read.
 			 */
-			return THIS_FORMAT;
-		}
-
-		if (*err == WTAP_ERR_BAD_FILE) {
-			/*
-			 * The first record is bogus, so this is probably
-			 * a corrupt file.  Assume the file is in this
-			 * format.  When our client tries to read the
-			 * first packet they will presumably get the
-			 * same bogus record.
-			 */
-			return THIS_FORMAT;
+			return 0;
 		}
 
+		return ret;
+	}
+	if (ret != 0) {
 		/*
-		 * Some other error, e.g. an I/O error; just give up.
+		 * Probably a mismatch; return the figure of merit
+		 * (demerit?).
 		 */
-		return BAD_READ;
+		return ret;
 	}
 
 	/*
@@ -547,12 +505,13 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
 	 * that the header is sane.
 	 */
 	if (file_seek(wth->fh, first_rec_hdr.hdr.incl_len, SEEK_CUR, err) == -1)
-		return BAD_READ;
+		return -1;
 
 	/*
 	 * Now attempt to read the second record's header.
 	 */
-	if (libpcap_read_header(wth, wth->fh, err, NULL, &second_rec_hdr) == -1) {
+	ret = libpcap_try_header(wth, wth->fh, err, NULL, &second_rec_hdr);
+	if (ret == -1) {
 		if (*err == 0 || *err == WTAP_ERR_SHORT_READ) {
 			/*
 			 * EOF or short read - assume the file is in this
@@ -561,32 +520,105 @@ static libpcap_try_t libpcap_try(wtap *wth, int *err)
 			 * they will presumably get the same EOF or short
 			 * read.
 			 */
-			return THIS_FORMAT;
+			return 0;
 		}
 
-		if (*err == WTAP_ERR_BAD_FILE) {
-			/*
-			 * The second record is bogus; maybe it's a
-			 * Capture File From Hell, and what looks like
-			 * the "header" of the next packet is actually
-			 * random junk from the middle of a packet.
-			 * Try the next format; if we run out of formats,
-			 * it probably *is* a corrupt file.
-			 */
-			return OTHER_FORMAT;
-		}
+		return ret;
+	}
+
+	return ret;
+}
+
+/* Read the header of the next packet.
+
+   Return -1 on an I/O error, 0 on success, or a positive number if the
+   header looks corrupt.  The higher the positive number, the more things
+   are wrong with the header; this is used by the heuristics that try to
+   guess what type of file it is, with the type with the fewest problems
+   being chosen. */
+static int libpcap_try_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
+    struct pcaprec_ss990915_hdr *hdr)
+{
+	int	ret;
+
+	if (!libpcap_read_header(wth, fh, err, err_info, hdr))
+		return -1;
+
+	ret = 0;	/* start out presuming everything's OK */
+	switch (wth->file_type_subtype) {
+
+	case WTAP_FILE_TYPE_SUBTYPE_PCAP_NSEC:
+	case WTAP_FILE_TYPE_SUBTYPE_PCAP_AIX:
+		/*
+		 * Nanosecond resolution; treat fractions-of-a-second
+		 * values >= 1 000 000 000 as an indication that
+		 * the header format might not be what we think it is.
+		 */
+		if (hdr->hdr.ts_usec >= 1000000000)
+			ret++;
+		break;
 
+	default:
 		/*
-		 * Some other error, e.g. an I/O error; just give up.
+		 * Microsecond resolution; treat fractions-of-a-second
+		 * values >= 1 000 000 as an indication that the header
+		 * format might not be what we think it is.
 		 */
-		return BAD_READ;
+		if (hdr->hdr.ts_usec >= 1000000)
+			ret++;
+		break;
+	}
+	if (hdr->hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
+		/*
+		 * Probably either a corrupt capture file or a file
+		 * of a type different from the one we're trying.
+		 */
+		ret++;
 	}
 
-	/*
-	 * OK, the first two records look OK; assume this is the
-	 * right format.
-	 */
-	return THIS_FORMAT;
+        if (hdr->hdr.orig_len > 64*1024*1024) {
+                /*
+                 * In theory I guess the on-the-wire packet size can be
+                 * arbitrarily large, and it can certainly be larger than the
+                 * maximum snapshot length which bounds the snapshot size,
+                 * but any file claiming 64MB in a single packet is *probably*
+                 * corrupt, and treating them as such makes the heuristics
+                 * much more reliable. See, for example,
+                 *
+                 *    https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=9634
+                 *
+                 * (64MB is an arbitrary size at this point).
+                 */
+                ret++;
+        }
+
+	if (hdr->hdr.incl_len > wth->snapshot_length) {
+	        /*
+	         * This is not a fatal error, and packets that have one
+	         * such packet probably have thousands. For discussion,
+	         * see
+	         * https://www.wireshark.org/lists/wireshark-dev/201307/msg00076.html
+	         * and related messages.
+	         *
+	         * The packet contents will be copied to a Buffer, which
+	         * expands as necessary to hold the contents; we don't have
+	         * to worry about fixed-length buffers allocated based on
+	         * the original snapshot length.
+	         *
+	         * We just treat this as an indication that we might be
+	         * trying the wrong file type here.
+	         */
+		ret++;
+	}
+
+	if (hdr->hdr.incl_len > hdr->hdr.orig_len) {
+		/*
+		 * Another hint that this might be the wrong file type.
+		 */
+		ret++;
+	}
+
+	return ret;
 }
 
 /* Read the next packet */
@@ -622,15 +654,23 @@ libpcap_read_packet(wtap *wth, FILE_T fh, struct wtap_pkthdr *phdr,
 	struct pcaprec_ss990915_hdr hdr;
 	guint packet_size;
 	guint orig_size;
-	int bytes_read;
 	int phdr_len;
 	libpcap_t *libpcap;
 
-	bytes_read = libpcap_read_header(wth, fh, err, err_info, &hdr);
-	if (bytes_read == -1) {
+	if (!libpcap_read_header(wth, fh, err, err_info, &hdr))
+		return FALSE;
+
+	if (hdr.hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
 		/*
-		 * We failed to read the header.
+		 * Probably a corrupt capture file; return an error,
+		 * so that our caller doesn't blow up trying to allocate
+		 * space for an immensely-large packet.
 		 */
+		*err = WTAP_ERR_BAD_FILE;
+		if (err_info != NULL) {
+			*err_info = g_strdup_printf("pcap: File has %u-byte packet, bigger than maximum of %u",
+			    hdr.hdr.incl_len, WTAP_MAX_PACKET_SIZE);
+		}
 		return FALSE;
 	}
 
@@ -701,13 +741,14 @@ libpcap_read_packet(wtap *wth, FILE_T fh, struct wtap_pkthdr *phdr,
 
 /* Read the header of the next packet.
 
-   Return -1 on an error, or the number of bytes of header read on success. */
+   Return FALSE on an error, TRUE on success. */
 static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
     struct pcaprec_ss990915_hdr *hdr)
 {
-	int	bytes_to_read, bytes_read;
+	int bytes_to_read, bytes_read;
+	guint32 temp;
+	libpcap_t *libpcap;
 
-	/* Read record header. */
 	errno = WTAP_ERR_CANT_READ;
 	switch (wth->file_type_subtype) {
 
@@ -740,80 +781,16 @@ static int libpcap_read_header(wtap *wth, FILE_T fh, int *err, gchar **err_info,
 		if (*err == 0 && bytes_read != 0) {
 			*err = WTAP_ERR_SHORT_READ;
 		}
-		return -1;
-	}
-
-	adjust_header(wth, &hdr->hdr);
-
-	if (hdr->hdr.incl_len > WTAP_MAX_PACKET_SIZE) {
-		/*
-		 * Probably a corrupt capture file; return an error,
-		 * so that our caller doesn't blow up trying to allocate
-		 * space for an immensely-large packet, and so that
-		 * the code to try to guess what type of libpcap file
-		 * this is can tell when it's not the type we're guessing
-		 * it is.
-		 */
-		*err = WTAP_ERR_BAD_FILE;
-		if (err_info != NULL) {
-			*err_info = g_strdup_printf("pcap: File has %u-byte packet, bigger than maximum of %u",
-			    hdr->hdr.incl_len, WTAP_MAX_PACKET_SIZE);
-		}
-		return -1;
-	}
-
-        if (hdr->hdr.orig_len > 64*1024*1024) {
-                /*
-                 * In theory I guess the on-the-wire packet size can be
-                 * arbitrarily large, and it can certainly be larger than the
-                 * maximum snapshot length which bounds the snapshot size,
-                 * but any file claiming 64MB in a single packet is *probably*
-                 * corrupt, and treating them as such makes the heuristics
-                 * much more reliable. See, for example,
-                 *
-                 *    https://bugs.wireshark.org/bugzilla/show_bug.cgi?id=9634
-                 *
-                 * (64MB is an arbitrary size at this point).
-                 */
-                *err = WTAP_ERR_BAD_FILE;
-                if (err_info != NULL) {
-                        *err_info = g_strdup_printf("pcap: File claims packet was %u bytes on the wire",
-                            hdr->hdr.orig_len);
-                }
-                return -1;
-        }
-
-        /* Disabling because this is not a fatal error, and packets that have
-         * one such packet probably have thousands. For discussion, see
-         * https://www.wireshark.org/lists/wireshark-dev/201307/msg00076.html
-         * and related messages.
-         *
-         * The packet contents will be copied to a Buffer, which expands
-         * as necessary to hold the contents; we don't have to worry
-         * about fixed-length buffers allocated based on the original
-         * snapshot length. */
-#if 0
-	if (hdr->hdr.incl_len > wth->snapshot_length) {
-		g_warning("pcap: File has packet larger than file's snapshot length.");
+		return FALSE;
 	}
-#endif
-
-	return bytes_read;
-}
-
-static void
-adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
-{
-	guint32 temp;
-	libpcap_t *libpcap;
 
 	libpcap = (libpcap_t *)wth->priv;
 	if (libpcap->byte_swapped) {
 		/* Byte-swap the record header fields. */
-		hdr->ts_sec = GUINT32_SWAP_LE_BE(hdr->ts_sec);
-		hdr->ts_usec = GUINT32_SWAP_LE_BE(hdr->ts_usec);
-		hdr->incl_len = GUINT32_SWAP_LE_BE(hdr->incl_len);
-		hdr->orig_len = GUINT32_SWAP_LE_BE(hdr->orig_len);
+		hdr->hdr.ts_sec = GUINT32_SWAP_LE_BE(hdr->hdr.ts_sec);
+		hdr->hdr.ts_usec = GUINT32_SWAP_LE_BE(hdr->hdr.ts_usec);
+		hdr->hdr.incl_len = GUINT32_SWAP_LE_BE(hdr->hdr.incl_len);
+		hdr->hdr.orig_len = GUINT32_SWAP_LE_BE(hdr->hdr.orig_len);
 	}
 
 	/* Swap the "incl_len" and "orig_len" fields, if necessary. */
@@ -823,7 +800,7 @@ adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
 		break;
 
 	case MAYBE_SWAPPED:
-		if (hdr->incl_len <= hdr->orig_len) {
+		if (hdr->hdr.incl_len <= hdr->hdr.orig_len) {
 			/*
 			 * The captured length is <= the actual length,
 			 * so presumably they weren't swapped.
@@ -833,11 +810,13 @@ adjust_header(wtap *wth, struct pcaprec_hdr *hdr)
 		/* FALLTHROUGH */
 
 	case SWAPPED:
-		temp = hdr->orig_len;
-		hdr->orig_len = hdr->incl_len;
-		hdr->incl_len = temp;
+		temp = hdr->hdr.orig_len;
+		hdr->hdr.orig_len = hdr->hdr.incl_len;
+		hdr->hdr.incl_len = temp;
 		break;
 	}
+
+	return TRUE;
 }
 
 /* Returns 0 if we could write the specified encapsulation type,
-- 
cgit v1.2.1