diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-17 15:35:38 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-17 16:01:14 +0200 |
commit | e4e458465b124e25b6aec7a60174bf1ca32dc5fd (patch) | |
tree | 34205838c6050758c135d110f7463f50da6b32d4 /cipher/sha1.c | |
parent | 6fd0dd2a5f1362f91e2861cd9d300341a43842a5 (diff) | |
download | libgcrypt-e4e458465b124e25b6aec7a60174bf1ca32dc5fd.tar.gz |
Add AVX and AVX/BMI2 implementations for SHA-1
* cipher/Makefile.am: Add 'sha1-avx-amd64.S' and
'sha1-avx-bmi2-amd64.S'.
* cipher/sha1-avx-amd64.S: New.
* cipher/sha1-avx-bmi2-amd64.S: New.
* cipher/sha1.c (USE_AVX, USE_BMI2): New.
(SHA1_CONTEXT) [USE_AVX]: Add 'use_avx'.
(SHA1_CONTEXT) [USE_BMI2]: Add 'use_bmi2'.
(sha1_init): Initialize 'use_avx' and 'use_bmi2'.
[USE_AVX] (_gcry_sha1_transform_amd64_avx): New.
[USE_BMI2] (_gcry_sha1_transform_amd64_bmi2): New.
(transform) [USE_BMI2]: Use BMI2 assembly if enabled.
(transform) [USE_AVX]: Use AVX assembly if enabled.
* configure.ac: Add 'sha1-avx-amd64.lo' and 'sha1-avx-bmi2-amd64.lo'.
--
Patch adds AVX (for Sandybridge and Ivybridge) and AVX/BMI2 (for Haswell)
optimized implementations of SHA-1.
Note: AVX implementation is currently limited to Intel CPUs due to use
of SHLD instruction for faster rotations on Sandybrigde.
Benchmarks:
cpu C-version SSSE3 AVX/(SHLD|BMI2) New vs C New vs SSSE3
Intel i5-4570 8.84 c/B 4.61 c/B 3.86 c/B 2.29x 1.19x
Intel i5-2450M 9.45 c/B 5.30 c/B 4.39 c/B 2.15x 1.20x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha1.c')
-rw-r--r-- | cipher/sha1.c | 54 |
1 files changed, 53 insertions, 1 deletions
diff --git a/cipher/sha1.c b/cipher/sha1.c index 8040e766..a55ff938 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -50,6 +50,20 @@ # define USE_SSSE3 1 #endif +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) +# define USE_AVX 1 +#endif + +/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */ +#undef USE_BMI2 +#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2) +# define USE_BMI2 1 +#endif + /* A macro to test whether P is properly aligned for an u32 type. Note that config.h provides a suitable replacement for uintptr_t if @@ -67,6 +81,12 @@ typedef struct #ifdef USE_SSSE3 unsigned int use_ssse3:1; #endif +#ifdef USE_AVX + unsigned int use_avx:1; +#endif +#ifdef USE_BMI2 + unsigned int use_bmi2:1; +#endif } SHA1_CONTEXT; static unsigned int @@ -77,6 +97,7 @@ static void sha1_init (void *context) { SHA1_CONTEXT *hd = context; + unsigned int features = _gcry_get_hw_features (); hd->h0 = 0x67452301; hd->h1 = 0xefcdab89; @@ -91,8 +112,17 @@ sha1_init (void *context) hd->bctx.bwrite = transform; #ifdef USE_SSSE3 - hd->use_ssse3 = (_gcry_get_hw_features () & HWF_INTEL_SSSE3) != 0; + hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX + /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. + * Therefore use this implementation on Intel CPUs only. */ + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); +#endif +#ifdef USE_BMI2 + hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2); #endif + (void)features; } @@ -238,6 +268,18 @@ _gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data, size_t nblks); #endif +#ifdef USE_AVX +unsigned int +_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data, + size_t nblks); +#endif + +#ifdef USE_BMI2 +unsigned int +_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data, + size_t nblks); +#endif + static unsigned int transform (void *ctx, const unsigned char *data, size_t nblks) @@ -245,6 +287,16 @@ transform (void *ctx, const unsigned char *data, size_t nblks) SHA1_CONTEXT *hd = ctx; unsigned int burn; +#ifdef USE_BMI2 + if (hd->use_bmi2) + return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks) + + 4 * sizeof(void*); +#endif +#ifdef USE_AVX + if (hd->use_avx) + return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks) + + 4 * sizeof(void*); +#endif #ifdef USE_SSSE3 if (hd->use_ssse3) return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks) |