summaryrefslogtreecommitdiff
path: root/cipher/keccak_permute_64.h
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-10-31 21:29:56 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-11-01 21:47:06 +0200
commit2857cb89c6dc1c02266600bc1fd2967a3cd5cf88 (patch)
tree1d5ca6d7135264461757cfdd90b051fc98f5f0ed /cipher/keccak_permute_64.h
parent07e4839e75a7bca3a6c0a94aecfe75efe61d7ff2 (diff)
downloadlibgcrypt-2857cb89c6dc1c02266600bc1fd2967a3cd5cf88.tar.gz
Optimize Keccak 64-bit absorb functions
* cipher/keccak.c [USE_64BIT] [__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. * cipher/keccak.c [USE_64BIT] [!__x86_64__] (absorb_lanes64_8) (absorb_lanes64_4, absorb_lanes64_2, absorb_lanes64_1): New. [USE_64BIT] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT] (keccak_absorb_lanes64): Remove. [USE_64BIT_SHLD] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_SHLD] (keccak_absorb_lanes64_shld): Remove. [USE_64BIT_BMI2] (KECCAK_F1600_ABSORB_FUNC_NAME): New. [USE_64BIT_BMI2] (keccak_absorb_lanes64_bmi2): Remove. * cipher/keccak_permute_64.h (KECCAK_F1600_ABSORB_FUNC_NAME): New. -- Optimize 64-bit absorb functions for small speed-up. After this change, 64-bit BMI2 implementation matches speed of fastest results from SUPERCOP for Intel Haswell CPUs (long messages). Benchmark on Intel Haswell @ 3.2 Ghz: Before: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.32 ns/B 411.7 MiB/s 7.41 c/B SHAKE256 | 2.84 ns/B 336.2 MiB/s 9.08 c/B SHA3-224 | 2.69 ns/B 354.9 MiB/s 8.60 c/B SHA3-256 | 2.84 ns/B 336.0 MiB/s 9.08 c/B SHA3-384 | 3.69 ns/B 258.4 MiB/s 11.81 c/B SHA3-512 | 5.30 ns/B 179.9 MiB/s 16.97 c/B After: | nanosecs/byte mebibytes/sec cycles/byte SHAKE128 | 2.27 ns/B 420.6 MiB/s 7.26 c/B SHAKE256 | 2.79 ns/B 341.4 MiB/s 8.94 c/B SHA3-224 | 2.64 ns/B 361.7 MiB/s 8.44 c/B SHA3-256 | 2.79 ns/B 341.5 MiB/s 8.94 c/B SHA3-384 | 3.65 ns/B 261.4 MiB/s 11.68 c/B SHA3-512 | 5.27 ns/B 181.0 MiB/s 16.87 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/keccak_permute_64.h')
-rw-r--r--cipher/keccak_permute_64.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/cipher/keccak_permute_64.h b/cipher/keccak_permute_64.h
index 1264f195..6f24217d 100644
--- a/cipher/keccak_permute_64.h
+++ b/cipher/keccak_permute_64.h
@@ -288,3 +288,102 @@ KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd)
return sizeof(void *) * 4 + sizeof(u64) * 12 * 5;
}
+
+static unsigned int
+KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes,
+ unsigned int nlanes, int blocklanes)
+{
+ unsigned int burn = 0;
+
+ while (nlanes)
+ {
+ switch (blocklanes)
+ {
+ case 21:
+ /* SHAKE128 */
+ while (pos == 0 && nlanes >= 21)
+ {
+ absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0);
+ absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8);
+ absorb_lanes64_8(&hd->u.state64[12], lanes + 8 * 12);
+ absorb_lanes64_1(&hd->u.state64[20], lanes + 8 * 20);
+ lanes += 8 * 21;
+ nlanes -= 21;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 18:
+ /* SHA3-224 */
+ while (pos == 0 && nlanes >= 18)
+ {
+ absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0);
+ absorb_lanes64_2(&hd->u.state64[8], lanes + 8 * 8);
+ absorb_lanes64_8(&hd->u.state64[10], lanes + 8 * 10);
+ lanes += 8 * 18;
+ nlanes -= 18;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 17:
+ /* SHA3-256 & SHAKE256 */
+ while (pos == 0 && nlanes >= 17)
+ {
+ absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0);
+ absorb_lanes64_8(&hd->u.state64[8], lanes + 8 * 8);
+ absorb_lanes64_1(&hd->u.state64[16], lanes + 8 * 16);
+ lanes += 8 * 17;
+ nlanes -= 17;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 13:
+ /* SHA3-384 */
+ while (pos == 0 && nlanes >= 13)
+ {
+ absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0);
+ absorb_lanes64_4(&hd->u.state64[8], lanes + 8 * 8);
+ absorb_lanes64_1(&hd->u.state64[12], lanes + 8 * 12);
+ lanes += 8 * 13;
+ nlanes -= 13;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+
+ case 9:
+ /* SHA3-512 */
+ while (pos == 0 && nlanes >= 9)
+ {
+ absorb_lanes64_8(&hd->u.state64[0], lanes + 8 * 0);
+ absorb_lanes64_1(&hd->u.state64[8], lanes + 8 * 8);
+ lanes += 8 * 9;
+ nlanes -= 9;
+
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ }
+ break;
+ }
+
+ while (nlanes)
+ {
+ hd->u.state64[pos] ^= buf_get_le64(lanes);
+ lanes += 8;
+ nlanes--;
+
+ if (++pos == blocklanes)
+ {
+ burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+ pos = 0;
+ break;
+ }
+ }
+ }
+
+ return burn;
+}