diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-05-11 20:18:49 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-05-12 20:32:50 +0300 |
commit | 297532602ed2d881d8fdc393d1961068a143a891 (patch) | |
tree | 9fc6e7cfd4f685cf52102a39a6c361e6ed160499 /cipher/poly1305-internal.h | |
parent | e813958419b0ec4439e6caf07d3b2234cffa2bfa (diff) | |
download | libgcrypt-297532602ed2d881d8fdc393d1961068a143a891.tar.gz |
poly1305: add AMD64/SSE2 optimized implementation
* cipher/Makefile.am: Add 'poly1305-sse2-amd64.S'.
* cipher/poly1305-internal.h (POLY1305_USE_SSE2)
(POLY1305_SSE2_BLOCKSIZE, POLY1305_SSE2_STATESIZE)
(POLY1305_SSE2_ALIGNMENT): New.
(POLY1305_LARGEST_BLOCKSIZE, POLY1305_LARGEST_STATESIZE)
(POLY1305_STATE_ALIGNMENT): Use SSE2 versions when needed.
* cipher/poly1305-sse2-amd64.S: New.
* cipher/poly1305.c [POLY1305_USE_SSE2]
(_gcry_poly1305_amd64_sse2_init_ext)
(_gcry_poly1305_amd64_sse2_finish_ext)
(_gcry_poly1305_amd64_sse2_blocks, poly1305_amd64_sse2_ops): New.
(_gcry_polu1305_init) [POLY1305_USE_SSE2]: Use SSE2 version.
* configure.ac [host=x86_64]: Add 'poly1305-sse2-amd64.lo'.
--
Add Andrew Moon's public domain SSE2 implementation of Poly1305. Original
source is available at: https://github.com/floodyberry/poly1305-opt
Benchmarks on Intel i5-4570 (haswell):
Old:
| nanosecs/byte mebibytes/sec cycles/byte
POLY1305 | 0.844 ns/B 1130.2 MiB/s 2.70 c/B
New:
| nanosecs/byte mebibytes/sec cycles/byte
POLY1305 | 0.448 ns/B 2129.5 MiB/s 1.43 c/B
Benchmarks on Intel i5-2450M (sandy-bridge):
Old:
| nanosecs/byte mebibytes/sec cycles/byte
POLY1305 | 1.25 ns/B 763.0 MiB/s 3.12 c/B
New:
| nanosecs/byte mebibytes/sec cycles/byte
POLY1305 | 0.605 ns/B 1575.9 MiB/s 1.51 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/poly1305-internal.h')
-rw-r--r-- | cipher/poly1305-internal.h | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h index d2c6b5cd..fa3fe75e 100644 --- a/cipher/poly1305-internal.h +++ b/cipher/poly1305-internal.h @@ -44,15 +44,37 @@ #define POLY1305_REF_ALIGNMENT sizeof(void *) +/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ +#undef POLY1305_USE_SSE2 +#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +# define POLY1305_USE_SSE2 1 +# define POLY1305_SSE2_BLOCKSIZE 32 +# define POLY1305_SSE2_STATESIZE 248 +# define POLY1305_SSE2_ALIGNMENT 16 +#endif + + /* Largest block-size used in any implementation (optimized implementations * might use block-size multiple of 16). */ -#define POLY1305_LARGEST_BLOCKSIZE POLY1305_REF_BLOCKSIZE +#ifdef POLY1305_USE_SSE2 +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_SSE2_BLOCKSIZE +#else +# define POLY1305_LARGEST_BLOCKSIZE POLY1305_REF_BLOCKSIZE +#endif /* Largest state-size used in any implementation. */ -#define POLY1305_LARGEST_STATESIZE POLY1305_REF_STATESIZE +#ifdef POLY1305_USE_SSE2 +# define POLY1305_LARGEST_STATESIZE POLY1305_SSE2_STATESIZE +#else +# define POLY1305_LARGEST_STATESIZE POLY1305_REF_STATESIZE +#endif /* Minimum alignment for state pointer passed to implementations. */ -#define POLY1305_STATE_ALIGNMENT POLY1305_REF_ALIGNMENT +#ifdef POLY1305_USE_SSE2 +# define POLY1305_STATE_ALIGNMENT POLY1305_SSE2_ALIGNMENT +#else +# define POLY1305_STATE_ALIGNMENT POLY1305_REF_ALIGNMENT +#endif typedef struct poly1305_key_s |