diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-05-11 12:00:19 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-05-11 12:00:19 +0300 |
commit | def7d4cad386271c6d4e2f10aabe0cb4abd871e4 (patch) | |
tree | 173fee5e8ff2ed596a57cdae5c160811fe506d23 /cipher/chacha20.c | |
parent | 23f33d57c9b6f2295a8ddfc9a8eee5a2c30cf406 (diff) | |
download | libgcrypt-def7d4cad386271c6d4e2f10aabe0cb4abd871e4.tar.gz |
chacha20: add SSSE3 assembly implementation
* cipher/Makefile.am: Add 'chacha20-ssse3-amd64.S'.
* cipher/chacha20-ssse3-amd64.S: New.
* cipher/chacha20.c (USE_SSSE3): New macro.
[USE_SSSE3] (_gcry_chacha20_amd64_ssse3_blocks): New.
(chacha20_do_setkey): Select SSSE3 implementation if there is HW
support.
* configure.ac [host=x86-64]: Add 'chacha20-ssse3-amd64.lo'.
--
Add SSSE3 optimized implementation for ChaCha20. Based on implementation
by Andrew Moon.
Before (Intel Haswell):
CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 1.97 ns/B 483.6 MiB/s 6.31 c/B
STREAM dec | 1.97 ns/B 484.0 MiB/s 6.31 c/B
After:
CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte
STREAM enc | 0.742 ns/B 1284.8 MiB/s 2.38 c/B
STREAM dec | 0.741 ns/B 1286.5 MiB/s 2.37 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/chacha20.c')
-rw-r--r-- | cipher/chacha20.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/cipher/chacha20.c b/cipher/chacha20.c index ff0366d3..de8982be 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -47,6 +47,13 @@ #define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ #define CHACHA20_INPUT_LENGTH (CHACHA20_BLOCK_SIZE / 4) +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) +# define USE_SSSE3 1 +#endif + struct CHACHA20_context_s; @@ -63,6 +70,14 @@ typedef struct CHACHA20_context_s } CHACHA20_context_t; +#ifdef USE_SSSE3 + +unsigned int _gcry_chacha20_amd64_ssse3_blocks(u32 *state, const byte *in, + byte *out, size_t bytes); + +#endif /* USE_SSSE3 */ + + static void chacha20_setiv (void *context, const byte * iv, size_t ivlen); static const char *selftest (void); @@ -279,6 +294,7 @@ chacha20_do_setkey (CHACHA20_context_t * ctx, { static int initialized; static const char *selftest_failed; + unsigned int features = _gcry_get_hw_features (); if (!initialized) { @@ -294,6 +310,12 @@ chacha20_do_setkey (CHACHA20_context_t * ctx, return GPG_ERR_INV_KEYLEN; ctx->blocks = chacha20_blocks; +#ifdef USE_SSSE3 + if (features & HWF_INTEL_SSSE3) + ctx->blocks = _gcry_chacha20_amd64_ssse3_blocks; +#endif + + (void)features; chacha20_keysetup (ctx, key, keylen); |