summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2016-12-22 11:56:45 +0100
committerPeter Wu <peter@lekensteyn.nl>2016-12-22 11:56:45 +0100
commit8e33b47913fbcfa8fdfd7e58bf6cd1c3f525a448 (patch)
tree3cbac889d1c089ca6b06a1799a40e34f86a63fc9
parent6fbbd39dbe5ec20c9776ac4789de10dcdc92dfaa (diff)
downloadwireshark-notes-8e33b47913fbcfa8fdfd7e58bf6cd1c3f525a448.tar.gz
file-zip: further speed up DD search
The previous implementation took 8.9 seconds with this command: tshark -Xlua_script:file-zip.lua -r TechnicLauncher.jar -Vx -ozip_archive.decompress:FALSE If the signature was not optional, we could optimize and avoid a linear search, using string.find with steps of four bytes on negative match. This would take 5.6 seconds (but does not handle a missing signature). The combined approach that first scans with string.find (assuming a signature) and then falling back to a linear search (assuming no signature) would take 14.4 seconds (terrible in the worst case). So try another approach, doing a byte for byte search (as before), but then delaying the signature check until the length is valid. This improves the running time to 7.5 seconds.
-rw-r--r--lua/file-zip.lua27
1 files changed, 13 insertions, 14 deletions
diff --git a/lua/file-zip.lua b/lua/file-zip.lua
index dcfb165..d455540 100644
--- a/lua/file-zip.lua
+++ b/lua/file-zip.lua
@@ -196,21 +196,20 @@ local function find_data_desc(tvb)
-- Scans (byte for byte) for the size field and try to confirm the validity
-- of this length field. It might still have a false positive, but at least
-- it allows for a linear scan through the file (without consulting CD).
- while dd_offset + 8 < #data do
- -- Size field is at offset 4 (if sig exists) or at offset 8 otherwise.
- local size_offset
- if Struct.unpack("<I4", data, dd_offset + 1) == 0x08074b50 then
- -- Expecting Data descriptor past the signature (of length 16)
- if dd_offset + 16 > #data then return end
- size_offset = dd_offset + 8
- else
- size_offset = dd_offset + 4
- end
- -- Validata size or continue with next byte on failure.
- local comp_size = Struct.unpack("<I4", data, size_offset + 1)
- if comp_size == dd_offset then
- return dd_offset, (size_offset - dd_offset) + 8
+ while dd_offset + 16 <= #data do
+ -- Try to locate the begin of the Data descriptor header (dd_offset).
+ -- Assume no signature, so begin is at CRC-32 and size is next dword.
+ -- If there is actually a signature, then dd_offset-4 is the begin.
+ local comp_size = Struct.unpack("<I4", data, dd_offset + 5)
+ if comp_size == dd_offset - 4 and
+ Struct.unpack("<I4", data, dd_offset - 3) == 0x08074b50 then
+ -- Signature found, data ends four bytes ago.
+ return dd_offset - 4, 16
+ elseif comp_size == dd_offset then
+ -- Signature not found, but length matches.
+ return dd_offset, 12
else
+ -- Continue with next byte.
dd_offset = dd_offset + 1
end
end