summaryrefslogtreecommitdiff
path: root/cipher/poly1305.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-11 20:18:49 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-12 20:32:50 +0300
commit297532602ed2d881d8fdc393d1961068a143a891 (patch)
tree9fc6e7cfd4f685cf52102a39a6c361e6ed160499 /cipher/poly1305.c
parente813958419b0ec4439e6caf07d3b2234cffa2bfa (diff)
downloadlibgcrypt-297532602ed2d881d8fdc393d1961068a143a891.tar.gz
poly1305: add AMD64/SSE2 optimized implementation
* cipher/Makefile.am: Add 'poly1305-sse2-amd64.S'. * cipher/poly1305-internal.h (POLY1305_USE_SSE2) (POLY1305_SSE2_BLOCKSIZE, POLY1305_SSE2_STATESIZE) (POLY1305_SSE2_ALIGNMENT): New. (POLY1305_LARGEST_BLOCKSIZE, POLY1305_LARGEST_STATESIZE) (POLY1305_STATE_ALIGNMENT): Use SSE2 versions when needed. * cipher/poly1305-sse2-amd64.S: New. * cipher/poly1305.c [POLY1305_USE_SSE2] (_gcry_poly1305_amd64_sse2_init_ext) (_gcry_poly1305_amd64_sse2_finish_ext) (_gcry_poly1305_amd64_sse2_blocks, poly1305_amd64_sse2_ops): New. (_gcry_polu1305_init) [POLY1305_USE_SSE2]: Use SSE2 version. * configure.ac [host=x86_64]: Add 'poly1305-sse2-amd64.lo'. -- Add Andrew Moon's public domain SSE2 implementation of Poly1305. Original source is available at: https://github.com/floodyberry/poly1305-opt Benchmarks on Intel i5-4570 (haswell): Old: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 0.844 ns/B 1130.2 MiB/s 2.70 c/B New: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 0.448 ns/B 2129.5 MiB/s 1.43 c/B Benchmarks on Intel i5-2450M (sandy-bridge): Old: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 1.25 ns/B 763.0 MiB/s 3.12 c/B New: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 0.605 ns/B 1575.9 MiB/s 1.51 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305.c')
-rw-r--r--cipher/poly1305.c23
1 files changed, 23 insertions, 0 deletions
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 472ae42c..cd1902aa 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -38,6 +38,25 @@ static const char *selftest (void);
+#ifdef POLY1305_USE_SSE2
+
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
+ size_t remaining,
+ byte mac[16]);
+unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
+ size_t bytes);
+
+static const poly1305_ops_t poly1305_amd64_sse2_ops = {
+ POLY1305_SSE2_BLOCKSIZE,
+ _gcry_poly1305_amd64_sse2_init_ext,
+ _gcry_poly1305_amd64_sse2_blocks,
+ _gcry_poly1305_amd64_sse2_finish_ext
+};
+
+#endif
+
+
#ifdef HAVE_U64_TYPEDEF
/* Reference unoptimized poly1305 implementation using 32 bit * 32 bit = 64 bit
@@ -612,7 +631,11 @@ _gcry_poly1305_init (poly1305_context_t * ctx, const byte * key,
if (selftest_failed)
return GPG_ERR_SELFTEST_FAILED;
+#ifdef POLY1305_USE_SSE2
+ ctx->ops = &poly1305_amd64_sse2_ops;
+#else
ctx->ops = &poly1305_default_ops;
+#endif
buf_cpy (keytmp.b, key, POLY1305_KEYLEN);
poly1305_init (ctx, &keytmp);