From 8b29dcf2a1d4e9cb34a1b7756e4e6b6f110c7b1f Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Fri, 24 Oct 2014 11:33:01 +0200 Subject: digest.py: use threading instead At first I used multithreading because threading would still run with one CPU core due to the GIL. That probably happened because I accessed _hashes[algos] in the loop of _queue_updater. Now that this is not done anymore, and only the hash update function is called which releases the GIL for data larger than 2 KiB, multiple cores are actually used. For comparison, for a file of 2.3 GiB (min/avg/max/sd secs for n=10): - pee sha256sum md5sum < file: 16.5/16.9/17.4/.305 - python3 digest.py -sha256 -md5 < file: 13.7/15.0/18.7/1.77 - python2 digest.py -sha256 -md5 < file: 13.7/15.9/18.7/1.64 - jacksum -a sha256+md5 -F '#CHECKSUM{i} #FILENAME': 32.7/37.1/50/6.91 The file is actually 2367029248 bytes, resident in the disk cache. Environment: - CPU: Intel i5-460M - Arch Linux x86_64 - Linux 3.17-rc4 - coreutils 8.23 - moreutils 0.51 - jacksum 1.7.0 on OpenJDK 1.7.0_71 - Python 3.4.2, Python 2.7.8 --- digest.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/digest.py b/digest.py index 19218a8..4671467 100755 --- a/digest.py +++ b/digest.py @@ -7,7 +7,12 @@ from __future__ import print_function import hashlib import sys -from multiprocessing import Process, Queue +from threading import Thread +try: + from queue import Queue +except: + # Python 2 compatibility + from Queue import Queue def read_blocks(filename): if filename == '-': @@ -53,7 +58,7 @@ class MtHasher(Hasher): self._queues = {} self._threads = {} for algo in algos: - t = Process(target=self._queue_updater, args=(algo,), name=algo) + t = Thread(target=self._queue_updater, args=(algo,), name=algo) self._queues[algo] = Queue(MtHasher.QUEUE_SIZE) self._threads[algo] = t t.start() @@ -67,8 +72,6 @@ class MtHasher(Hasher): if not data: break h.update(data) - # Runs on a different process, so need to signal the result - q.put(h.hexdigest()) def update(self, data): if data: @@ -81,10 +84,8 @@ class MtHasher(Hasher): q = self._queues[algo] q.put(b'') # Terminate self._threads[algo].join() - digest = q.get_nowait() assert q.empty() - q.close() - yield algo, digest + return super(MtHasher, self).hexdigests() try: supported_algos = hashlib.algorithms_guaranteed -- cgit v1.2.1