summaryrefslogtreecommitdiff
path: root/cipher/poly1305-internal.h
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-11 20:52:27 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2014-05-16 20:54:54 +0300
commit98f021961ee65669037bc8bb552a69fd78f610fc (patch)
treeb95d026328b20f3d0104d749471b8f3649cd23bd /cipher/poly1305-internal.h
parent297532602ed2d881d8fdc393d1961068a143a891 (diff)
downloadlibgcrypt-98f021961ee65669037bc8bb552a69fd78f610fc.tar.gz
poly1305: add AMD64/AVX2 optimized implementation
* cipher/Makefile.am: Add 'poly1305-avx2-amd64.S'. * cipher/poly1305-avx2-amd64.S: New. * cipher/poly1305-internal.h (POLY1305_USE_AVX2) (POLY1305_AVX2_BLOCKSIZE, POLY1305_AVX2_STATESIZE) (POLY1305_AVX2_ALIGNMENT): New. (POLY1305_LARGEST_BLOCKSIZE, POLY1305_LARGEST_STATESIZE) (POLY1305_STATE_ALIGNMENT): Use AVX2 versions when needed. * cipher/poly1305.c [POLY1305_USE_AVX2] (_gcry_poly1305_amd64_avx2_init_ext) (_gcry_poly1305_amd64_avx2_finish_ext) (_gcry_poly1305_amd64_avx2_blocks, poly1305_amd64_avx2_ops): New. (_gcry_poly1305_init) [POLY1305_USE_AVX2]: Use AVX2 implementation if AVX2 supported by CPU. * configure.ac [host=x86_64]: Add 'poly1305-avx2-amd64.lo'. -- Add Andrew Moon's public domain AVX2 implementation of Poly1305. Original source is available at: https://github.com/floodyberry/poly1305-opt Benchmarks on Intel i5-4570 (haswell): Old: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 0.448 ns/B 2129.5 MiB/s 1.43 c/B New: | nanosecs/byte mebibytes/sec cycles/byte POLY1305 | 0.205 ns/B 4643.5 MiB/s 0.657 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305-internal.h')
-rw-r--r--cipher/poly1305-internal.h23
1 files changed, 20 insertions, 3 deletions
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index fa3fe75e..0299c430 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
@@ -54,23 +54,40 @@
#endif
+/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef POLY1305_USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+ defined(ENABLE_AVX2_SUPPORT)
+# define POLY1305_USE_AVX2 1
+# define POLY1305_AVX2_BLOCKSIZE 64
+# define POLY1305_AVX2_STATESIZE 328
+# define POLY1305_AVX2_ALIGNMENT 32
+#endif
+
+
/* Largest block-size used in any implementation (optimized implementations
* might use block-size multiple of 16). */
-#ifdef POLY1305_USE_SSE2
+#ifdef POLY1305_USE_AVX2
+# define POLY1305_LARGEST_BLOCKSIZE POLY1305_AVX2_BLOCKSIZE
+#elif defined(POLY1305_USE_SSE2)
# define POLY1305_LARGEST_BLOCKSIZE POLY1305_SSE2_BLOCKSIZE
#else
# define POLY1305_LARGEST_BLOCKSIZE POLY1305_REF_BLOCKSIZE
#endif
/* Largest state-size used in any implementation. */
-#ifdef POLY1305_USE_SSE2
+#ifdef POLY1305_USE_AVX2
+# define POLY1305_LARGEST_STATESIZE POLY1305_AVX2_STATESIZE
+#elif defined(POLY1305_USE_SSE2)
# define POLY1305_LARGEST_STATESIZE POLY1305_SSE2_STATESIZE
#else
# define POLY1305_LARGEST_STATESIZE POLY1305_REF_STATESIZE
#endif
/* Minimum alignment for state pointer passed to implementations. */
-#ifdef POLY1305_USE_SSE2
+#ifdef POLY1305_USE_AVX2
+# define POLY1305_STATE_ALIGNMENT POLY1305_AVX2_ALIGNMENT
+#elif defined(POLY1305_USE_SSE2)
# define POLY1305_STATE_ALIGNMENT POLY1305_SSE2_ALIGNMENT
#else
# define POLY1305_STATE_ALIGNMENT POLY1305_REF_ALIGNMENT