Make "tvb_find_line_end()" and "tvb_find_line_end_unquoted()" treat CR

by itself as a line ending, as well as treating CR LF and LF as line endings. Tweak the Telnet dissector to treat LF and CR NUL as line endings, but not to treat CR by itself as a line ending (that's not exactly what the NVT specification in the Telnet RFC specifies, but the resulting output may be a bit more readable that way). svn path=/trunk/; revision=2613
author: Guy Harris <guy@alum.mit.edu> 2000-11-12 00:59:09 +0000
committer: Guy Harris <guy@alum.mit.edu> 2000-11-12 00:59:09 +0000
commit: f8934eb9e7f25d7b39c4014e18b2cdc6237a26d8 (patch)
tree: 06845fdff4d72546b57dc60f095536100c4031be
parent: 9226b24e601a584ae7a572e2225ddae6918b283c (diff)
download: wireshark-f8934eb9e7f25d7b39c4014e18b2cdc6237a26d8.tar.gz
2 files changed, 95 insertions, 77 deletions
diff --git a/epan/tvbuff.c b/epan/tvbuff.c
index 4c689ba99f..6b72151d18 100644
--- a/epan/tvbuff.c
+++ b/epan/tvbuff.c
@@ -9,7 +9,7 @@
  * 		the data of a backing tvbuff, or can be a composite of
  * 		other tvbuffs.
  *
- * $Id: tvbuff.c,v 1.6 2000/11/11 19:55:48 guy Exp $
+ * $Id: tvbuff.c,v 1.7 2000/11/12 00:59:09 guy Exp $
  *
  * Copyright (c) 2000 by Gilbert Ramirez <gram@xiexie.org>
  *
@@ -1283,10 +1283,13 @@ tvb_find_line_end(tvbuff_t *tvb, gint offset, int len, gint *next_offset)
 	 */
 	eob_offset = offset + len;
 
-	eol_offset = tvb_find_guint8(tvb, offset, len, '\n');
+	/*
+	 * Look either for a CR or an LF.
+	 */
+	eol_offset = tvb_pbrk_guint8(tvb, offset, len, "\r\n");
 	if (eol_offset == -1) {
 		/*
-		 * No LF - line is presumably continued in next packet.
+		 * No CR or LF - line is presumably continued in next packet.
 		 * We pretend the line runs to the end of the tvbuff.
 		 */
 		linelen = eob_offset - offset;
@@ -1294,59 +1297,30 @@ tvb_find_line_end(tvbuff_t *tvb, gint offset, int len, gint *next_offset)
 	} else {
 		/*
 		 * Find the number of bytes between the starting offset
-		 * and the LF.
+		 * and the CR or LF.
 		 */
 		linelen = eol_offset - offset;
 
 		/*
-		 * Is the LF at the beginning of the line?
+		 * Is it a CR?
 		 */
-		if (linelen > 0) {
+		if (tvb_get_guint8(tvb, eol_offset) == '\r') {
 			/*
-			 * No - is it preceded by a carriage return?
-			 * (Perhaps it's supposed to be, but that's not
-			 * guaranteed....)
+			 * Yes - is it followed by an LF?
 			 */
-			if (tvb_get_guint8(tvb, eol_offset - 1) == '\r') {
-				/*
-				 * Yes.  The EOL starts with the CR;
-				 * don't count it as part of the data
-				 * in the line.
-				 */
-				linelen--;
-			} else {
+			if (eol_offset + 1 < eob_offset &&
+			    tvb_get_guint8(tvb, eol_offset + 1) == '\n') {
 				/*
-				 * No.  The EOL starts with the LF.
+				 * Yes; skip over the CR.
 				 */
-
-				/*
-				 * I seem to remember that we once saw lines
-				 * ending with LF-CR in an HTTP request or
-				 * response, so check if it's *followed*
-				 * by a carriage return.
-				 *
-				 * XXX - what about <LF><CR> with the <LF>
-				 * not preceded by non-LF/non-CR data?
-				 * Should we check for that, or do we
-				 * run the risk of misinterpreting a
-				 * sequence of multiple <CR><LF> as having
-				 * a bunch of <LF><CR> in it?
-				 */
-				if (eol_offset + 1 < eob_offset &&
-				    tvb_get_guint8(tvb, eol_offset + 1) == '\r') {
-					/*
-					 * It's <non-LF><LF><CR>; say it ends
-					 * with the CR, and skip past the
-					 * LF.
-					 */
-					eol_offset++;
-				}
+				eol_offset++;
 			}
 		}
 
 		/*
 		 * Return the offset of the character after the last
-		 * character in the line.
+		 * character in the line, skipping over the last character
+		 * in the line terminator.
 		 */
 		*next_offset = eol_offset + 1;
 	}
@@ -1403,10 +1377,10 @@ tvb_find_line_end_unquoted(tvbuff_t *tvb, gint offset, int len,
 			    '"');
 		} else {
 			/*
-			 * Look either for an LF or a '"'.
+			 * Look either for a CR, an LF, or a '"'.
 			 */
 			char_offset = tvb_pbrk_guint8(tvb, cur_offset, len,
-			    "\n\"");
+			    "\r\n\"");
 		}
 		if (cur_offset == -1) {
 			/*
@@ -1419,57 +1393,61 @@ tvb_find_line_end_unquoted(tvbuff_t *tvb, gint offset, int len,
 			break;
 		}
 			
-		/*
-		 * OK, which is it?
-		 */
-		c = tvb_get_guint8(tvb, char_offset);
-		if (c == '\n') {
-			if (is_quoted) {
+		if (is_quoted) {
+			/*
+			 * We're processing a quoted string.
+			 * We only looked for ", so we know it's a ";
+			 * as we're processing a quoted string, it's a
+			 * closing quote.
+			 */
+			is_quoted = FALSE;
+		} else {
+			/*
+			 * OK, what is it?
+			 */
+			c = tvb_get_guint8(tvb, char_offset);
+			if (c == '"') {
 				/*
-				 * Quoted LF; it's part of the string, not
-				 * a line terminator.
-				 * Do nothing. Wait for next quote.
+				 * Un-quoted "; it begins a quoted
+				 * string.
 				 */
+				is_quoted = TRUE;
 			} else {
 				/*
-				 * Un-quoted LF; it's a line terminator.
+				 * It's a CR or LF; we've found a line
+				 * terminator.
+				 *
 				 * Find the number of bytes between the
-				 * starting offset and the LF.
+				 * starting offset and the CR or LF.
 				 */
 				linelen = char_offset - offset;
 
 				/*
-				 * Is the LF at the beginning of the line?
+				 * Is it a CR?
 				 */
-				if (linelen > 0) {
+				if (c == '\r') {
 					/*
-					 * No - is it preceded by a carriage
-					 * return?
-					 * (Perhaps it's supposed to be, but
-					 * that's not guaranteed....)
+					 * Yes; is it followed by an LF?
 					 */
-					if (tvb_get_guint8(tvb, char_offset-1)
-					    == '\r') {
+					if (char_offset + 1 < eob_offset &&
+					    tvb_get_guint8(tvb, char_offset + 1)
+					      == '\n') {
 						/*
-						 * Yes.  The EOL starts with
-						 * the CR; don't count it as
-						 * part of the data in the
-						 * line.
+						 * Yes; skip over the CR.
 						 */
-						linelen--;
+						char_offset++;
 					}
 				}
 
 				/*
 				 * Return the offset of the character after
-				 * the last character in the line, and
-				 * quit.
+				 * the last character in the line, skipping
+				 * over the last character in the line
+				 * terminator, and quit.
 				 */
 				*next_offset = char_offset + 1;
 				break;
 			}
-		} else if (c == '"') {
-			is_quoted = !is_quoted;
 		}
 
 		/*
diff --git a/packet-telnet.c b/packet-telnet.c
index d5ca334044..28696277e6 100644
--- a/packet-telnet.c
+++ b/packet-telnet.c
@@ -2,7 +2,7 @@
  * Routines for telnet packet dissection
  * Copyright 1999, Richard Sharpe <rsharpe@ns.aus.com>
  *
- * $Id: packet-telnet.c,v 1.18 2000/11/09 10:56:32 guy Exp $
+ * $Id: packet-telnet.c,v 1.19 2000/11/12 00:59:07 guy Exp $
  *
  * Ethereal - Network traffic analyzer
  * By Gerald Combs <gerald@zing.org>
@@ -302,19 +302,59 @@ telnet_add_text(proto_tree *tree, tvbuff_t *tvb, int offset, int len)
 {
   gint next_offset;
   int linelen;
+  guint8 c;
+  gboolean last_char_was_cr;
 
-  while (len != 0 && tvb_length_remaining(tvb, offset)) {
+  while (len != 0 && tvb_length_remaining(tvb, offset) != 0) {
     /*
      * Find the end of the line.
      */
-    tvb_find_line_end(tvb, offset, len, &next_offset);
+    linelen = tvb_find_line_end(tvb, offset, len, &next_offset);
+    len -= next_offset - offset;	/* subtract out the line's characters */
+
+    /*
+     * In Telnet, CR NUL is the way you send a CR by itself in the
+     * default ASCII mode; don't treat CR by itself as a line ending,
+     * treat only CR NUL, CR LF, or LF by itself as a line ending.
+     */
+    if (next_offset == offset + linelen + 1 && len >= 1) {
+      /*
+       * Well, we saw a one-character line ending, so either it's a CR
+       * or an LF; we have at least two characters left, including the
+       * CR.
+       *
+       * If the line ending is a CR, skip all subsequent CRs; at
+       * least one capture appeared to have multiple CRs at the end of
+       * a line.
+       */
+      if (tvb_get_guint8(tvb, offset + linelen) == '\r') {
+      	last_char_was_cr = TRUE;
+      	while (len != 0 && tvb_length_remaining(tvb, next_offset) != 0) {
+          c = tvb_get_guint8(tvb, next_offset);
+      	  next_offset++;	/* skip over that character */
+      	  len--;
+          if (c == '\n' || (c == '\0' && last_char_was_cr)) {
+            /*
+	     * LF is a line ending, whether preceded by CR or not.
+	     * NUL is a line ending if preceded by CR.
+	     */
+            break;
+          }
+      	  last_char_was_cr = (c == '\r');
+      	}
+      }
+    }
+
+    /*
+     * Now compute the length of the line *including* the end-of-line
+     * indication, if any; we display it all.
+     */
     linelen = next_offset - offset;
 
     proto_tree_add_text(tree, tvb, offset, linelen,
 			"Data: %s",
 			tvb_format_text(tvb, offset, linelen));
-    offset += linelen;
-    len -= linelen;
+    offset = next_offset;
   }
 }
author	Guy Harris <guy@alum.mit.edu>	2000-11-12 00:59:09 +0000
committer	Guy Harris <guy@alum.mit.edu>	2000-11-12 00:59:09 +0000
commit	f8934eb9e7f25d7b39c4014e18b2cdc6237a26d8 (patch)
tree	06845fdff4d72546b57dc60f095536100c4031be
parent	9226b24e601a584ae7a572e2225ddae6918b283c (diff)
download	wireshark-f8934eb9e7f25d7b39c4014e18b2cdc6237a26d8.tar.gz