diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-17 15:35:38 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-18 17:00:33 +0200 |
commit | fc7dcf616937afaf73cfda1bf7bd79566a96b130 (patch) | |
tree | cf2736324f73aa279bb6ecc66dc197b56a707963 /cipher/sha1.c | |
parent | df629ba53a662427ebd3ddca90c3fe9ddd6511d3 (diff) | |
download | libgcrypt-fc7dcf616937afaf73cfda1bf7bd79566a96b130.tar.gz |
Add ARM/NEON implementation for SHA-1
* cipher/Makefile.am: Add 'sha1-armv7-neon.S'.
* cipher/sha1-armv7-neon.S: New.
* cipher/sha1.c (USE_NEON): New.
(SHA1_CONTEXT, sha1_init) [USE_NEON]: Add and initialize 'use_neon'.
[USE_NEON] (_gcry_sha1_transform_armv7_neon): New.
(transform) [USE_NEON]: Use ARM/NEON assembly if enabled.
* configure.ac: Add 'sha1-armv7-neon.lo'.
--
Patch adds ARM/NEON implementation for SHA-1.
Benchmarks show 1.72x improvement on ARM Cortex-A8, 1008 Mhz:
jussi@cubie:~/libgcrypt$ tests/bench-slope --cpu-mhz 1008 hash sha1
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
SHA1 | 7.80 ns/B 122.3 MiB/s 7.86 c/B
=
jussi@cubie:~/libgcrypt$ tests/bench-slope --disable-hwf arm-neon --cpu-mhz 1008 hash sha1
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
SHA1 | 13.41 ns/B 71.10 MiB/s 13.52 c/B
=
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha1.c')
-rw-r--r-- | cipher/sha1.c | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/cipher/sha1.c b/cipher/sha1.c index a55ff938..889a7ea1 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -64,6 +64,15 @@ # define USE_BMI2 1 #endif +/* USE_NEON indicates whether to enable ARM NEON assembly code. */ +#undef USE_NEON +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_NEON 1 +# endif +#endif + /* A macro to test whether P is properly aligned for an u32 type. Note that config.h provides a suitable replacement for uintptr_t if @@ -87,6 +96,9 @@ typedef struct #ifdef USE_BMI2 unsigned int use_bmi2:1; #endif +#ifdef USE_NEON + unsigned int use_neon:1; +#endif } SHA1_CONTEXT; static unsigned int @@ -122,6 +134,9 @@ sha1_init (void *context) #ifdef USE_BMI2 hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2); #endif +#ifdef USE_NEON + hd->use_neon = (features & HWF_ARM_NEON) != 0; +#endif (void)features; } @@ -148,6 +163,13 @@ sha1_init (void *context) } while(0) + +#ifdef USE_NEON +unsigned int +_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data, + size_t nblks); +#endif + /* * Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA. */ @@ -302,10 +324,15 @@ transform (void *ctx, const unsigned char *data, size_t nblks) return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks) + 4 * sizeof(void*); #endif +#ifdef USE_NEON + if (hd->use_neon) + return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks) + + 4 * sizeof(void*); +#endif do { - burn = transform_blk (ctx, data); + burn = transform_blk (hd, data); data += 64; } while (--nblks); |