diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-11-01 16:06:26 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-11-01 21:47:13 +0200 |
commit | a1cc7bb15473a2419b24ecac765ae0ce5989a13b (patch) | |
tree | 400b3626b6d6413afad522e351eab1711c351cd4 /cipher/keccak.c | |
parent | 2857cb89c6dc1c02266600bc1fd2967a3cd5cf88 (diff) | |
download | libgcrypt-a1cc7bb15473a2419b24ecac765ae0ce5989a13b.tar.gz |
Add ARMv7/NEON implementation of Keccak
* cipher/Makefile.am: Add 'keccak-armv7-neon.S'.
* cipher/keccak-armv7-neon.S: New.
* cipher/keccak.c (USE_64BIT_ARM_NEON): New.
(NEED_COMMON64): Select if USE_64BIT_ARM_NEON.
[NEED_COMMON64] (round_consts_64bit): Rename to...
[NEED_COMMON64] (_gcry_keccak_round_consts_64bit): ...this; Add
terminator at end.
[USE_64BIT_ARM_NEON] (_gcry_keccak_permute_armv7_neon)
(_gcry_keccak_absorb_lanes64_armv7_neon, keccak_permute64_armv7_neon)
(keccak_absorb_lanes64_armv7_neon, keccak_armv7_neon_64_ops): New.
(keccak_init) [USE_64BIT_ARM_NEON]: Select ARM/NEON implementation
if supported by HW.
* cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Update
to use new round constant table.
* configure.ac: Add 'keccak-armv7-neon.lo'.
--
Patch adds ARMv7/NEON implementation of Keccak (SHAKE/SHA3). Patch
is based on public-domain implementation by Ronny Van Keer from
SUPERCOP package:
https://github.com/floodyberry/supercop/blob/master/crypto_hash/\
keccakc1024/inplace-armv7a-neon/keccak2.s
Benchmark results on Cortex-A8 @ 1008 Mhz:
Before (generic 32-bit bit-interleaved impl.):
| nanosecs/byte mebibytes/sec cycles/byte
SHAKE128 | 83.00 ns/B 11.49 MiB/s 83.67 c/B
SHAKE256 | 101.7 ns/B 9.38 MiB/s 102.5 c/B
SHA3-224 | 96.13 ns/B 9.92 MiB/s 96.90 c/B
SHA3-256 | 101.5 ns/B 9.40 MiB/s 102.3 c/B
SHA3-384 | 131.4 ns/B 7.26 MiB/s 132.5 c/B
SHA3-512 | 189.1 ns/B 5.04 MiB/s 190.6 c/B
After (ARM/NEON, ~3.2x faster):
| nanosecs/byte mebibytes/sec cycles/byte
SHAKE128 | 25.09 ns/B 38.01 MiB/s 25.29 c/B
SHAKE256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B
SHA3-224 | 29.24 ns/B 32.61 MiB/s 29.48 c/B
SHA3-256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B
SHA3-384 | 40.42 ns/B 23.59 MiB/s 40.74 c/B
SHA3-512 | 58.37 ns/B 16.34 MiB/s 58.84 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/keccak.c')
-rw-r--r-- | cipher/keccak.c | 71 |
1 files changed, 68 insertions, 3 deletions
diff --git a/cipher/keccak.c b/cipher/keccak.c index ce578607..0bb31552 100644 --- a/cipher/keccak.c +++ b/cipher/keccak.c @@ -59,7 +59,19 @@ #endif -#ifdef USE_64BIT +/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly + * code. */ +#undef USE_64BIT_ARM_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_64BIT_ARM_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) # define NEED_COMMON64 1 #endif @@ -109,7 +121,7 @@ typedef struct KECCAK_CONTEXT_S #ifdef NEED_COMMON64 -static const u64 round_consts_64bit[24] = +const u64 _gcry_keccak_round_consts_64bit[24 + 1] = { U64_C(0x0000000000000001), U64_C(0x0000000000008082), U64_C(0x800000000000808A), U64_C(0x8000000080008000), @@ -122,7 +134,8 @@ static const u64 round_consts_64bit[24] = U64_C(0x8000000000008002), U64_C(0x8000000000000080), U64_C(0x000000000000800A), U64_C(0x800000008000000A), U64_C(0x8000000080008081), U64_C(0x8000000000008080), - U64_C(0x0000000080000001), U64_C(0x8000000080008008) + U64_C(0x0000000080000001), U64_C(0x8000000080008008), + U64_C(0xFFFFFFFFFFFFFFFF) }; static unsigned int @@ -400,6 +413,54 @@ static const keccak_ops_t keccak_bmi2_64_ops = #endif /* USE_64BIT_BMI2 */ +/* 64-bit ARMv7/NEON implementation. */ +#ifdef USE_64BIT_ARM_NEON + +unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); +unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, + const byte *lanes, + unsigned int nlanes, + int blocklanes); + +static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) +{ + return _gcry_keccak_permute_armv7_neon(hd->u.state64); +} + +static unsigned int +keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + if (blocklanes < 0) + { + /* blocklanes == -1, permutationless absorb from keccak_final. */ + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + } + + return 0; + } + else + { + return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, + nlanes, blocklanes); + } +} + +static const keccak_ops_t keccak_armv7_neon_64_ops = +{ + .permute = keccak_permute64_armv7_neon, + .absorb = keccak_absorb_lanes64_armv7_neon, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_ARM_NEON */ + + /* Construct generic 32-bit implementation. */ #ifdef USE_32BIT @@ -662,6 +723,10 @@ keccak_init (int algo, void *context, unsigned int flags) /* Select optimized implementation based in hw features. */ if (0) {} +#ifdef USE_64BIT_ARM_NEON + else if (features & HWF_ARM_NEON) + ctx->ops = &keccak_armv7_neon_64_ops; +#endif #ifdef USE_64BIT_BMI2 else if (features & HWF_INTEL_BMI2) ctx->ops = &keccak_bmi2_64_ops; |