summaryrefslogtreecommitdiff
path: root/cipher/sha1.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-17 15:35:38 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-18 17:00:33 +0200
commitfc7dcf616937afaf73cfda1bf7bd79566a96b130 (patch)
treecf2736324f73aa279bb6ecc66dc197b56a707963 /cipher/sha1.c
parentdf629ba53a662427ebd3ddca90c3fe9ddd6511d3 (diff)
downloadlibgcrypt-fc7dcf616937afaf73cfda1bf7bd79566a96b130.tar.gz
Add ARM/NEON implementation for SHA-1
* cipher/Makefile.am: Add 'sha1-armv7-neon.S'. * cipher/sha1-armv7-neon.S: New. * cipher/sha1.c (USE_NEON): New. (SHA1_CONTEXT, sha1_init) [USE_NEON]: Add and initialize 'use_neon'. [USE_NEON] (_gcry_sha1_transform_armv7_neon): New. (transform) [USE_NEON]: Use ARM/NEON assembly if enabled. * configure.ac: Add 'sha1-armv7-neon.lo'. -- Patch adds ARM/NEON implementation for SHA-1. Benchmarks show 1.72x improvement on ARM Cortex-A8, 1008 Mhz: jussi@cubie:~/libgcrypt$ tests/bench-slope --cpu-mhz 1008 hash sha1 Hash: | nanosecs/byte mebibytes/sec cycles/byte SHA1 | 7.80 ns/B 122.3 MiB/s 7.86 c/B = jussi@cubie:~/libgcrypt$ tests/bench-slope --disable-hwf arm-neon --cpu-mhz 1008 hash sha1 Hash: | nanosecs/byte mebibytes/sec cycles/byte SHA1 | 13.41 ns/B 71.10 MiB/s 13.52 c/B = Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha1.c')
-rw-r--r--cipher/sha1.c29
1 files changed, 28 insertions, 1 deletions
diff --git a/cipher/sha1.c b/cipher/sha1.c
index a55ff938..889a7ea1 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -64,6 +64,15 @@
# define USE_BMI2 1
#endif
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_NEON 1
+# endif
+#endif
+
/* A macro to test whether P is properly aligned for an u32 type.
Note that config.h provides a suitable replacement for uintptr_t if
@@ -87,6 +96,9 @@ typedef struct
#ifdef USE_BMI2
unsigned int use_bmi2:1;
#endif
+#ifdef USE_NEON
+ unsigned int use_neon:1;
+#endif
} SHA1_CONTEXT;
static unsigned int
@@ -122,6 +134,9 @@ sha1_init (void *context)
#ifdef USE_BMI2
hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
#endif
+#ifdef USE_NEON
+ hd->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
(void)features;
}
@@ -148,6 +163,13 @@ sha1_init (void *context)
} while(0)
+
+#ifdef USE_NEON
+unsigned int
+_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
+ size_t nblks);
+#endif
+
/*
* Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
*/
@@ -302,10 +324,15 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
+ 4 * sizeof(void*);
#endif
+#ifdef USE_NEON
+ if (hd->use_neon)
+ return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks)
+ + 4 * sizeof(void*);
+#endif
do
{
- burn = transform_blk (ctx, data);
+ burn = transform_blk (hd, data);
data += 64;
}
while (--nblks);