summaryrefslogtreecommitdiff
path: root/cipher/chacha20.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-11 12:00:19 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-11 12:00:19 +0300
commitdef7d4cad386271c6d4e2f10aabe0cb4abd871e4 (patch)
tree173fee5e8ff2ed596a57cdae5c160811fe506d23 /cipher/chacha20.c
parent23f33d57c9b6f2295a8ddfc9a8eee5a2c30cf406 (diff)
downloadlibgcrypt-def7d4cad386271c6d4e2f10aabe0cb4abd871e4.tar.gz
chacha20: add SSSE3 assembly implementation
* cipher/Makefile.am: Add 'chacha20-ssse3-amd64.S'. * cipher/chacha20-ssse3-amd64.S: New. * cipher/chacha20.c (USE_SSSE3): New macro. [USE_SSSE3] (_gcry_chacha20_amd64_ssse3_blocks): New. (chacha20_do_setkey): Select SSSE3 implementation if there is HW support. * configure.ac [host=x86-64]: Add 'chacha20-ssse3-amd64.lo'. -- Add SSSE3 optimized implementation for ChaCha20. Based on implementation by Andrew Moon. Before (Intel Haswell): CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte STREAM enc | 1.97 ns/B 483.6 MiB/s 6.31 c/B STREAM dec | 1.97 ns/B 484.0 MiB/s 6.31 c/B After: CHACHA20 | nanosecs/byte mebibytes/sec cycles/byte STREAM enc | 0.742 ns/B 1284.8 MiB/s 2.38 c/B STREAM dec | 0.741 ns/B 1286.5 MiB/s 2.37 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/chacha20.c')
-rw-r--r--cipher/chacha20.c22
1 files changed, 22 insertions, 0 deletions
diff --git a/cipher/chacha20.c b/cipher/chacha20.c
index ff0366d3..de8982be 100644
--- a/cipher/chacha20.c
+++ b/cipher/chacha20.c
@@ -47,6 +47,13 @@
#define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */
#define CHACHA20_INPUT_LENGTH (CHACHA20_BLOCK_SIZE / 4)
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_SSSE3)
+# define USE_SSSE3 1
+#endif
+
struct CHACHA20_context_s;
@@ -63,6 +70,14 @@ typedef struct CHACHA20_context_s
} CHACHA20_context_t;
+#ifdef USE_SSSE3
+
+unsigned int _gcry_chacha20_amd64_ssse3_blocks(u32 *state, const byte *in,
+ byte *out, size_t bytes);
+
+#endif /* USE_SSSE3 */
+
+
static void chacha20_setiv (void *context, const byte * iv, size_t ivlen);
static const char *selftest (void);
@@ -279,6 +294,7 @@ chacha20_do_setkey (CHACHA20_context_t * ctx,
{
static int initialized;
static const char *selftest_failed;
+ unsigned int features = _gcry_get_hw_features ();
if (!initialized)
{
@@ -294,6 +310,12 @@ chacha20_do_setkey (CHACHA20_context_t * ctx,
return GPG_ERR_INV_KEYLEN;
ctx->blocks = chacha20_blocks;
+#ifdef USE_SSSE3
+ if (features & HWF_INTEL_SSSE3)
+ ctx->blocks = _gcry_chacha20_amd64_ssse3_blocks;
+#endif
+
+ (void)features;
chacha20_keysetup (ctx, key, keylen);