From 2fd06e207dcea1d8a7f0e7e92f3359615a99421b Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 23 May 2013 11:04:18 +0300 Subject: serpent: add SSE2 accelerated amd64 implementation * configure.ac (serpent): Add 'serpent-sse2-amd64.lo'. * cipher/Makefile.am (EXTRA_libcipher_la_SOURCES): Add 'serpent-sse2-amd64.S'. * cipher/cipher.c (gcry_cipher_open) [USE_SERPENT]: Register bulk functions for CBC-decryption and CTR-mode. * cipher/serpent.c (USE_SSE2): New macro. [USE_SSE2] (_gcry_serpent_sse2_ctr_enc, _gcry_serpent_sse2_cbc_dec): New prototypes to assembler functions. (serpent_setkey): Set 'serpent_init_done' before calling serpent_test. (_gcry_serpent_ctr_enc): New function. (_gcry_serpent_cbc_dec): New function. (selftest_ctr_128): New function. (selftest_cbc_128): New function. (selftest): Call selftest_ctr_128 and selftest_cbc_128. * cipher/serpent-sse2-amd64.S: New file. * src/cipher.h (_gcry_serpent_ctr_enc): New prototype. (_gcry_serpent_cbc_dec): New prototype. -- [v2]: Converted to SSE2, to support all amd64 processors (SSE2 is required feature by AMD64 SysV ABI). Patch adds word-sliced SSE2 implementation of Serpent for amd64 for speeding up parallelizable workloads (CTR mode, CBC mode decryption). Implementation processes eight blocks in parallel, with two four-block sets interleaved for out-of-order scheduling. Speed old vs. new on Intel Core i5-2450M (Sandy-Bridge): ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- SERPENT128 1.00x 0.99x 1.00x 3.98x 1.00x 1.01x 1.00x 1.01x 4.04x 4.04x Speed old vs. new on AMD Phenom II X6 1055T: ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- SERPENT128 1.02x 1.01x 1.00x 2.83x 1.00x 1.00x 1.00x 1.00x 2.72x 2.72x Speed old vs. new on Intel Core2 Duo T8100: ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- SERPENT128 1.00x 1.02x 0.97x 4.02x 0.98x 1.01x 0.98x 1.00x 3.82x 3.91x Signed-off-by: Jussi Kivilinna --- cipher/cipher.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'cipher/cipher.c') diff --git a/cipher/cipher.c b/cipher/cipher.c index f1224af4..20ac2c73 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -726,6 +726,14 @@ gcry_cipher_open (gcry_cipher_hd_t *handle, h->bulk.ctr_enc = _gcry_camellia_ctr_enc; break; #endif /*USE_CAMELLIA*/ +#ifdef USE_SERPENT + case GCRY_CIPHER_SERPENT128: + case GCRY_CIPHER_SERPENT192: + case GCRY_CIPHER_SERPENT256: + h->bulk.cbc_dec = _gcry_serpent_cbc_dec; + h->bulk.ctr_enc = _gcry_serpent_ctr_enc; + break; +#endif /*USE_SERPENT*/ default: break; -- cgit v1.2.1