summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2014-10-24 11:33:01 +0200
committerPeter Wu <peter@lekensteyn.nl>2014-10-24 12:54:38 +0200
commit8b29dcf2a1d4e9cb34a1b7756e4e6b6f110c7b1f (patch)
tree8f54a18e91b7437ef1640f953461c53564524b84
parent5ce76c6289b072254bec1d749a2983b62e835df7 (diff)
downloadscripts-8b29dcf2a1d4e9cb34a1b7756e4e6b6f110c7b1f.tar.gz
digest.py: use threading instead
At first I used multithreading because threading would still run with one CPU core due to the GIL. That probably happened because I accessed _hashes[algos] in the loop of _queue_updater. Now that this is not done anymore, and only the hash update function is called which releases the GIL for data larger than 2 KiB, multiple cores are actually used. For comparison, for a file of 2.3 GiB (min/avg/max/sd secs for n=10): - pee sha256sum md5sum < file: 16.5/16.9/17.4/.305 - python3 digest.py -sha256 -md5 < file: 13.7/15.0/18.7/1.77 - python2 digest.py -sha256 -md5 < file: 13.7/15.9/18.7/1.64 - jacksum -a sha256+md5 -F '#CHECKSUM{i} #FILENAME': 32.7/37.1/50/6.91 The file is actually 2367029248 bytes, resident in the disk cache. Environment: - CPU: Intel i5-460M - Arch Linux x86_64 - Linux 3.17-rc4 - coreutils 8.23 - moreutils 0.51 - jacksum 1.7.0 on OpenJDK 1.7.0_71 - Python 3.4.2, Python 2.7.8
-rwxr-xr-xdigest.py15
1 files changed, 8 insertions, 7 deletions
diff --git a/digest.py b/digest.py
index 19218a8..4671467 100755
--- a/digest.py
+++ b/digest.py
@@ -7,7 +7,12 @@
from __future__ import print_function
import hashlib
import sys
-from multiprocessing import Process, Queue
+from threading import Thread
+try:
+ from queue import Queue
+except:
+ # Python 2 compatibility
+ from Queue import Queue
def read_blocks(filename):
if filename == '-':
@@ -53,7 +58,7 @@ class MtHasher(Hasher):
self._queues = {}
self._threads = {}
for algo in algos:
- t = Process(target=self._queue_updater, args=(algo,), name=algo)
+ t = Thread(target=self._queue_updater, args=(algo,), name=algo)
self._queues[algo] = Queue(MtHasher.QUEUE_SIZE)
self._threads[algo] = t
t.start()
@@ -67,8 +72,6 @@ class MtHasher(Hasher):
if not data:
break
h.update(data)
- # Runs on a different process, so need to signal the result
- q.put(h.hexdigest())
def update(self, data):
if data:
@@ -81,10 +84,8 @@ class MtHasher(Hasher):
q = self._queues[algo]
q.put(b'') # Terminate
self._threads[algo].join()
- digest = q.get_nowait()
assert q.empty()
- q.close()
- yield algo, digest
+ return super(MtHasher, self).hexdigests()
try:
supported_algos = hashlib.algorithms_guaranteed