From 49f52c67fb42c0656c8f9af655087f444562ca82 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Mon, 10 Aug 2015 22:09:56 +0300 Subject: Optimize OCB offset calculation * cipher/cipher-internal.h (ocb_get_l): New. * cipher/cipher-ocb.c (_gcry_cipher_ocb_authenticate) (ocb_crypt): Use 'ocb_get_l' instead of '_gcry_cipher_ocb_get_l'. * cipher/camellia-glue.c (get_l): Remove. (_gcry_camellia_ocb_crypt, _gcry_camellia_ocb_auth): Precalculate offset array when block count matches parallel operation size; Use 'ocb_get_l' instead of 'get_l'. * cipher/rijndael-aesni.c (get_l): Add fast path for 75% most common offsets. (aesni_ocb_enc, aesni_ocb_dec, _gcry_aes_aesni_ocb_auth): Precalculate offset array when block count matches parallel operation size. * cipher/rijndael-ssse3-amd64.c (get_l): Add fast path for 75% most common offsets. * cipher/rijndael.c (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): Use 'ocb_get_l' instead of '_gcry_cipher_ocb_get_l'. * cipher/serpent.c (get_l): Remove. (_gcry_serpent_ocb_crypt, _gcry_serpent_ocb_auth): Precalculate offset array when block count matches parallel operation size; Use 'ocb_get_l' instead of 'get_l'. * cipher/twofish.c (get_l): Remove. (_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): Use 'ocb_get_l' instead of 'get_l'. -- Patch optimizes OCB offset calculation for generic code and assembly implementations with parallel block processing. Benchmark of OCB AES-NI on Intel Haswell: $ tests/bench-slope --cpu-mhz 3201 cipher aes Before: AES | nanosecs/byte mebibytes/sec cycles/byte CTR enc | 0.274 ns/B 3483.9 MiB/s 0.876 c/B CTR dec | 0.273 ns/B 3490.0 MiB/s 0.875 c/B OCB enc | 0.289 ns/B 3296.1 MiB/s 0.926 c/B OCB dec | 0.299 ns/B 3189.9 MiB/s 0.957 c/B OCB auth | 0.260 ns/B 3670.0 MiB/s 0.832 c/B After: AES | nanosecs/byte mebibytes/sec cycles/byte CTR enc | 0.273 ns/B 3489.4 MiB/s 0.875 c/B CTR dec | 0.273 ns/B 3487.5 MiB/s 0.875 c/B OCB enc | 0.248 ns/B 3852.8 MiB/s 0.792 c/B OCB dec | 0.261 ns/B 3659.5 MiB/s 0.834 c/B OCB auth | 0.227 ns/B 4205.5 MiB/s 0.726 c/B Signed-off-by: Jussi Kivilinna --- cipher/rijndael.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) (limited to 'cipher/rijndael.c') diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 4368c6da..eff59c26 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1246,13 +1246,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; - unsigned int ntz = _gcry_ctz64 (i); - const unsigned char *l; - - if (ntz < OCB_L_TABLE_SIZE) - l = c->u_mode.ocb.L[ntz]; - else - l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + const unsigned char *l = ocb_get_l(c, l_tmp.x1, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); @@ -1277,13 +1271,7 @@ _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.data_nblocks; - unsigned int ntz = _gcry_ctz64 (i); - const unsigned char *l; - - if (ntz < OCB_L_TABLE_SIZE) - l = c->u_mode.ocb.L[ntz]; - else - l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + const unsigned char *l = ocb_get_l(c, l_tmp.x1, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); @@ -1343,13 +1331,7 @@ _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) for ( ;nblocks; nblocks-- ) { u64 i = ++c->u_mode.ocb.aad_nblocks; - unsigned int ntz = _gcry_ctz64 (i); - const unsigned char *l; - - if (ntz < OCB_L_TABLE_SIZE) - l = c->u_mode.ocb.L[ntz]; - else - l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + const unsigned char *l = ocb_get_l(c, l_tmp.x1, i); /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); -- cgit v1.2.1