summaryrefslogtreecommitdiff
path: root/cipher/sha1.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-17 15:35:38 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-17 16:01:14 +0200
commite4e458465b124e25b6aec7a60174bf1ca32dc5fd (patch)
tree34205838c6050758c135d110f7463f50da6b32d4 /cipher/sha1.c
parent6fd0dd2a5f1362f91e2861cd9d300341a43842a5 (diff)
downloadlibgcrypt-e4e458465b124e25b6aec7a60174bf1ca32dc5fd.tar.gz
Add AVX and AVX/BMI2 implementations for SHA-1
* cipher/Makefile.am: Add 'sha1-avx-amd64.S' and 'sha1-avx-bmi2-amd64.S'. * cipher/sha1-avx-amd64.S: New. * cipher/sha1-avx-bmi2-amd64.S: New. * cipher/sha1.c (USE_AVX, USE_BMI2): New. (SHA1_CONTEXT) [USE_AVX]: Add 'use_avx'. (SHA1_CONTEXT) [USE_BMI2]: Add 'use_bmi2'. (sha1_init): Initialize 'use_avx' and 'use_bmi2'. [USE_AVX] (_gcry_sha1_transform_amd64_avx): New. [USE_BMI2] (_gcry_sha1_transform_amd64_bmi2): New. (transform) [USE_BMI2]: Use BMI2 assembly if enabled. (transform) [USE_AVX]: Use AVX assembly if enabled. * configure.ac: Add 'sha1-avx-amd64.lo' and 'sha1-avx-bmi2-amd64.lo'. -- Patch adds AVX (for Sandybridge and Ivybridge) and AVX/BMI2 (for Haswell) optimized implementations of SHA-1. Note: AVX implementation is currently limited to Intel CPUs due to use of SHLD instruction for faster rotations on Sandybrigde. Benchmarks: cpu C-version SSSE3 AVX/(SHLD|BMI2) New vs C New vs SSSE3 Intel i5-4570 8.84 c/B 4.61 c/B 3.86 c/B 2.29x 1.19x Intel i5-2450M 9.45 c/B 5.30 c/B 4.39 c/B 2.15x 1.20x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha1.c')
-rw-r--r--cipher/sha1.c54
1 files changed, 53 insertions, 1 deletions
diff --git a/cipher/sha1.c b/cipher/sha1.c
index 8040e766..a55ff938 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -50,6 +50,20 @@
# define USE_SSSE3 1
#endif
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX)
+# define USE_AVX 1
+#endif
+
+/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
+#undef USE_BMI2
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2)
+# define USE_BMI2 1
+#endif
+
/* A macro to test whether P is properly aligned for an u32 type.
Note that config.h provides a suitable replacement for uintptr_t if
@@ -67,6 +81,12 @@ typedef struct
#ifdef USE_SSSE3
unsigned int use_ssse3:1;
#endif
+#ifdef USE_AVX
+ unsigned int use_avx:1;
+#endif
+#ifdef USE_BMI2
+ unsigned int use_bmi2:1;
+#endif
} SHA1_CONTEXT;
static unsigned int
@@ -77,6 +97,7 @@ static void
sha1_init (void *context)
{
SHA1_CONTEXT *hd = context;
+ unsigned int features = _gcry_get_hw_features ();
hd->h0 = 0x67452301;
hd->h1 = 0xefcdab89;
@@ -91,8 +112,17 @@ sha1_init (void *context)
hd->bctx.bwrite = transform;
#ifdef USE_SSSE3
- hd->use_ssse3 = (_gcry_get_hw_features () & HWF_INTEL_SSSE3) != 0;
+ hd->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+#endif
+#ifdef USE_AVX
+ /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
+ * Therefore use this implementation on Intel CPUs only. */
+ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+#endif
+#ifdef USE_BMI2
+ hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
#endif
+ (void)features;
}
@@ -238,6 +268,18 @@ _gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
size_t nblks);
#endif
+#ifdef USE_AVX
+unsigned int
+_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
+ size_t nblks);
+#endif
+
+#ifdef USE_BMI2
+unsigned int
+_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
+ size_t nblks);
+#endif
+
static unsigned int
transform (void *ctx, const unsigned char *data, size_t nblks)
@@ -245,6 +287,16 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
SHA1_CONTEXT *hd = ctx;
unsigned int burn;
+#ifdef USE_BMI2
+ if (hd->use_bmi2)
+ return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
+ + 4 * sizeof(void*);
+#endif
+#ifdef USE_AVX
+ if (hd->use_avx)
+ return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
+ + 4 * sizeof(void*);
+#endif
#ifdef USE_SSSE3
if (hd->use_ssse3)
return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)