summaryrefslogtreecommitdiff
path: root/cipher/keccak.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-11-01 16:06:26 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-11-01 21:47:13 +0200
commita1cc7bb15473a2419b24ecac765ae0ce5989a13b (patch)
tree400b3626b6d6413afad522e351eab1711c351cd4 /cipher/keccak.c
parent2857cb89c6dc1c02266600bc1fd2967a3cd5cf88 (diff)
downloadlibgcrypt-a1cc7bb15473a2419b24ecac765ae0ce5989a13b.tar.gz
Add ARMv7/NEON implementation of Keccak
* cipher/Makefile.am: Add 'keccak-armv7-neon.S'. * cipher/keccak-armv7-neon.S: New. * cipher/keccak.c (USE_64BIT_ARM_NEON): New. (NEED_COMMON64): Select if USE_64BIT_ARM_NEON. [NEED_COMMON64] (round_consts_64bit): Rename to... [NEED_COMMON64] (_gcry_keccak_round_consts_64bit): ...this; Add terminator at end. [USE_64BIT_ARM_NEON] (_gcry_keccak_permute_armv7_neon) (_gcry_keccak_absorb_lanes64_armv7_neon, keccak_permute64_armv7_neon) (keccak_absorb_lanes64_armv7_neon, keccak_armv7_neon_64_ops): New. (keccak_init) [USE_64BIT_ARM_NEON]: Select ARM/NEON implementation if supported by HW. * cipher/keccak_permute_64.h (KECCAK_F1600_PERMUTE_FUNC_NAME): Update to use new round constant table. * configure.ac: Add 'keccak-armv7-neon.lo'. -- Patch adds ARMv7/NEON implementation of Keccak (SHAKE/SHA3). Patch is based on public-domain implementation by Ronny Van Keer from SUPERCOP package: https://github.com/floodyberry/supercop/blob/master/crypto_hash/\ keccakc1024/inplace-armv7a-neon/keccak2.s Benchmark results on Cortex-A8 @ 1008 Mhz: Before (generic 32-bit bit-interleaved impl.): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 83.00 ns/B 11.49 MiB/s 83.67 c/B SHAKE256 | 101.7 ns/B 9.38 MiB/s 102.5 c/B SHA3-224 | 96.13 ns/B 9.92 MiB/s 96.90 c/B SHA3-256 | 101.5 ns/B 9.40 MiB/s 102.3 c/B SHA3-384 | 131.4 ns/B 7.26 MiB/s 132.5 c/B SHA3-512 | 189.1 ns/B 5.04 MiB/s 190.6 c/B After (ARM/NEON, ~3.2x faster): | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 25.09 ns/B 38.01 MiB/s 25.29 c/B SHAKE256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-224 | 29.24 ns/B 32.61 MiB/s 29.48 c/B SHA3-256 | 30.95 ns/B 30.82 MiB/s 31.19 c/B SHA3-384 | 40.42 ns/B 23.59 MiB/s 40.74 c/B SHA3-512 | 58.37 ns/B 16.34 MiB/s 58.84 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/keccak.c')
-rw-r--r--cipher/keccak.c71
1 files changed, 68 insertions, 3 deletions
diff --git a/cipher/keccak.c b/cipher/keccak.c
index ce578607..0bb31552 100644
--- a/cipher/keccak.c
+++ b/cipher/keccak.c
@@ -59,7 +59,19 @@
#endif
-#ifdef USE_64BIT
+/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly
+ * code. */
+#undef USE_64BIT_ARM_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+ && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+ && defined(HAVE_GCC_INLINE_ASM_NEON)
+# define USE_64BIT_ARM_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON)
# define NEED_COMMON64 1
#endif
@@ -109,7 +121,7 @@ typedef struct KECCAK_CONTEXT_S
#ifdef NEED_COMMON64
-static const u64 round_consts_64bit[24] =
+const u64 _gcry_keccak_round_consts_64bit[24 + 1] =
{
U64_C(0x0000000000000001), U64_C(0x0000000000008082),
U64_C(0x800000000000808A), U64_C(0x8000000080008000),
@@ -122,7 +134,8 @@ static const u64 round_consts_64bit[24] =
U64_C(0x8000000000008002), U64_C(0x8000000000000080),
U64_C(0x000000000000800A), U64_C(0x800000008000000A),
U64_C(0x8000000080008081), U64_C(0x8000000000008080),
- U64_C(0x0000000080000001), U64_C(0x8000000080008008)
+ U64_C(0x0000000080000001), U64_C(0x8000000080008008),
+ U64_C(0xFFFFFFFFFFFFFFFF)
};
static unsigned int
@@ -400,6 +413,54 @@ static const keccak_ops_t keccak_bmi2_64_ops =
#endif /* USE_64BIT_BMI2 */
+/* 64-bit ARMv7/NEON implementation. */
+#ifdef USE_64BIT_ARM_NEON
+
+unsigned int _gcry_keccak_permute_armv7_neon(u64 *state);
+unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos,
+ const byte *lanes,
+ unsigned int nlanes,
+ int blocklanes);
+
+static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd)
+{
+ return _gcry_keccak_permute_armv7_neon(hd->u.state64);
+}
+
+static unsigned int
+keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ if (blocklanes < 0)
+ {
+ /* blocklanes == -1, permutationless absorb from keccak_final. */
+
+ while (nlanes)
+ {
+ hd->u.state64[pos] ^= buf_get_le64(lanes);
+ lanes += 8;
+ nlanes--;
+ }
+
+ return 0;
+ }
+ else
+ {
+ return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes,
+ nlanes, blocklanes);
+ }
+}
+
+static const keccak_ops_t keccak_armv7_neon_64_ops =
+{
+ .permute = keccak_permute64_armv7_neon,
+ .absorb = keccak_absorb_lanes64_armv7_neon,
+ .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_ARM_NEON */
+
+
/* Construct generic 32-bit implementation. */
#ifdef USE_32BIT
@@ -662,6 +723,10 @@ keccak_init (int algo, void *context, unsigned int flags)
/* Select optimized implementation based in hw features. */
if (0) {}
+#ifdef USE_64BIT_ARM_NEON
+ else if (features & HWF_ARM_NEON)
+ ctx->ops = &keccak_armv7_neon_64_ops;
+#endif
#ifdef USE_64BIT_BMI2
else if (features & HWF_INTEL_BMI2)
ctx->ops = &keccak_bmi2_64_ops;