summaryrefslogtreecommitdiff
path: root/epan/charsets.c
diff options
context:
space:
mode:
authorGuy Harris <guy@alum.mit.edu>2016-12-11 21:49:14 -0800
committerGuy Harris <guy@alum.mit.edu>2016-12-12 05:49:50 +0000
commit4d47c9a84144510a33747801cf4ef65f1b3da422 (patch)
tree8b6f3a7651c209b7ec634f78292b432b7f6cda66 /epan/charsets.c
parent6bc65121287e585d06755b3406c571ad9ba4d766 (diff)
downloadwireshark-4d47c9a84144510a33747801cf4ef65f1b3da422.tar.gz
Fix handling of EBCDIC string fields.
Have a routine that takes a 256-element translation table and uses it to map various flavors of EBCDIC to Unicode. Have separate translation tables for "common" EBCDIC (everything that's the same in all EBCDIC code pages that include the original EBCDIC characters) and EBCDIC code page 037. Add ENC_EBCDIC_CP037 for code page 037. Change-Id: Ia882b3c0abef9e30eb54cd47396e6fa0d6342044 Reviewed-on: https://code.wireshark.org/review/19212 Reviewed-by: Guy Harris <guy@alum.mit.edu>
Diffstat (limited to 'epan/charsets.c')
-rw-r--r--epan/charsets.c153
1 files changed, 116 insertions, 37 deletions
diff --git a/epan/charsets.c b/epan/charsets.c
index 5c3e5e0389..c7559257e7 100644
--- a/epan/charsets.c
+++ b/epan/charsets.c
@@ -473,7 +473,7 @@ const gunichar2 charset_table_cp437[0x80] = {
};
/*
- * Given a wmem scope, a pointer, and a length, and a translation table,
+ * Given a wmem scope, a pointer, a length, and a translation table,
* treat the string of bytes referred to by the pointer and length as a
* string encoded using one octet per character, with octets with the
* high-order bit clear being ASCII and octets with the high-order bit
@@ -959,38 +959,38 @@ ASCII_to_EBCDIC1(guint8 c)
#endif
static const guint8 EBCDIC_translate_ASCII [ 256 ] = {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
- 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
- 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
- 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
- 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
- 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
- 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
- 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
- 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
- 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
- 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
- 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
- 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
- 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
- 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
- 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
- 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51,
- 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
- 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
- 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
+ 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
+ 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
+ 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
+ 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
+ 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
+ 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
+ 0x71, 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
+ 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50,
+ 0x51, 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+ 0x59, 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
};
void
@@ -1012,14 +1012,93 @@ EBCDIC_to_ASCII1(guint8 c)
return EBCDIC_translate_ASCII[c];
}
+/* Tables for EBCDIC code pages */
+
+/* EBCDIC common; based on the table in appendix H of ESA/370 Principles
+ of Operation, but with some code points that don't correspond to
+ the same characters in code pages 037 and 1158 mapped to REPLACEMENT
+ CHARACTER - there may be more code points of that sort */
+const gunichar2 charset_table_ebcdic[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f,
+ 0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x009d, 0x0085, 0x0008, 0x0087,
+ 0x0018, 0x0019, 0x0092, 0x008f, 0x001c, 0x001d, 0x001e, 0x001f,
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000a, 0x0017, 0x001b,
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x0005, 0x0006, 0x0007,
+ UNREPL, UNREPL, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004,
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x0014, 0x0015, UNREPL, 0x001a,
+ 0x0020, 0x00a0, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, UNREPL, UNREPL, 0x002e, 0x003c, 0x0028, 0x002b, UNREPL,
+ 0x0026, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, UNREPL, UNREPL, 0x0024, 0x002a, 0x0029, 0x003b, UNREPL,
+ 0x002d, 0x002f, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, UNREPL, UNREPL, 0x002c, 0x0025, 0x005f, 0x003e, 0x003f,
+ UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, 0x0060, 0x003a, 0x0023, 0x0040, 0x0027, 0x003d, 0x0022,
+ UNREPL, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
+ 0x0071, 0x0072, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, 0x007e, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078,
+ 0x0079, 0x007a, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ 0x007b, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ 0x007d, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
+ 0x0051, 0x0052, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ 0x005c, UNREPL, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
+ 0x0059, 0x005a, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL, UNREPL,
+};
+
+/* EBCDIC code page 037 */
+const gunichar2 charset_table_ebcdic_cp037[256] = {
+ 0x0000, 0x0001, 0x0002, 0x0003, 0x009c, 0x0009, 0x0086, 0x007f,
+ 0x0097, 0x008d, 0x008e, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
+ 0x0010, 0x0011, 0x0012, 0x0013, 0x009d, 0x0085, 0x0008, 0x0087,
+ 0x0018, 0x0019, 0x0092, 0x008f, 0x001c, 0x001d, 0x001e, 0x001f,
+ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x000a, 0x0017, 0x001b,
+ 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x0005, 0x0006, 0x0007,
+ 0x0090, 0x0091, 0x0016, 0x0093, 0x0094, 0x0095, 0x0096, 0x0004,
+ 0x0098, 0x0099, 0x009a, 0x009b, 0x0014, 0x0015, 0x009e, 0x001a,
+ 0x0020, 0x00a0, 0x00e2, 0x00e4, 0x00e0, 0x00e1, 0x00e3, 0x00e5,
+ 0x00e7, 0x00f1, 0x00a2, 0x002e, 0x003c, 0x0028, 0x002b, 0x007c,
+ 0x0026, 0x00e9, 0x00ea, 0x00eb, 0x00e8, 0x00ed, 0x00ee, 0x00ef,
+ 0x00ec, 0x00df, 0x0021, 0x0024, 0x002a, 0x0029, 0x003b, 0x00ac,
+ 0x002d, 0x002f, 0x00c2, 0x00c4, 0x00c0, 0x00c1, 0x00c3, 0x00c5,
+ 0x00c7, 0x00d1, 0x00a6, 0x002c, 0x0025, 0x005f, 0x003e, 0x003f,
+ 0x00f8, 0x00c9, 0x00ca, 0x00cb, 0x00c8, 0x00cd, 0x00ce, 0x00cf,
+ 0x00cc, 0x0060, 0x003a, 0x0023, 0x0040, 0x0027, 0x003d, 0x0022,
+ 0x00d8, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+ 0x0068, 0x0069, 0x00ab, 0x00bb, 0x00f0, 0x00fd, 0x00fe, 0x00b1,
+ 0x00b0, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070,
+ 0x0071, 0x0072, 0x00aa, 0x00ba, 0x00e6, 0x00b8, 0x00c6, 0x00a4,
+ 0x00b5, 0x007e, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078,
+ 0x0079, 0x007a, 0x00a1, 0x00bf, 0x00d0, 0x00dd, 0x00de, 0x00ae,
+ 0x005e, 0x00a3, 0x00a5, 0x00b7, 0x00a9, 0x00a7, 0x00b6, 0x00bc,
+ 0x00bd, 0x00be, 0x005b, 0x005d, 0x00af, 0x00a8, 0x00b4, 0x00d7,
+ 0x007b, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+ 0x0048, 0x0049, 0x00ad, 0x00f4, 0x00f6, 0x00f2, 0x00f3, 0x00f5,
+ 0x007d, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, 0x0050,
+ 0x0051, 0x0052, 0x00b9, 0x00fb, 0x00fc, 0x00f9, 0x00fa, 0x00ff,
+ 0x005c, 0x00f7, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058,
+ 0x0059, 0x005a, 0x00b2, 0x00d4, 0x00d6, 0x00d2, 0x00d3, 0x00d5,
+ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+ 0x0038, 0x0039, 0x00b3, 0x00db, 0x00dc, 0x00d9, 0x00da, 0x009f,
+};
+
/*
- * Given a wmem scope, a pointer, and a length, and a translation table,
+ * Given a wmem scope, a pointer, a length, and a translation table,
* treat the string of bytes referred to by the pointer and length as a
- * string encoded in EBCDIC using one octet per character, and return a
+ * string encoded using one octet per character, with octets being
+ * mapped by the translation table to 2-byte Unicode Basic Multilingual
+ * Plane characters (including REPLACEMENT CHARACTER), and return a
* pointer to a UTF-8 string, allocated using the wmem scope.
*/
guint8 *
-get_ebcdic_string(wmem_allocator_t *scope, const guint8 *ptr, gint length)
+get_ebcdic_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[256])
{
wmem_strbuf_t *str;
@@ -1028,7 +1107,7 @@ get_ebcdic_string(wmem_allocator_t *scope, const guint8 *ptr, gint length)
while (length > 0) {
guint8 ch = *ptr;
- wmem_strbuf_append_unichar(str, EBCDIC_translate_ASCII[ch]);
+ wmem_strbuf_append_unichar(str, table[ch]);
ptr++;
length--;
}