summaryrefslogtreecommitdiff
path: root/cipher/serpent.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-08-11 07:22:16 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-08-12 17:41:42 +0300
commit24ebf53f1e8a8afa27dcd768339bda70a740bb03 (patch)
tree2086fe6cd7e7d5c0cb24181fdaf332946aa3f69c /cipher/serpent.c
parente11895da1f4af9782d89e92ba2e6b1a63235b54b (diff)
downloadlibgcrypt-24ebf53f1e8a8afa27dcd768339bda70a740bb03.tar.gz
Simplify OCB offset calculation for parallel implementations
* cipher/camellia-glue.c (_gcry_camellia_ocb_crypt) (_gcry_camellia_ocb_auth): Precalculate Ls array always, instead of just if 'blkn % <parallel blocks> == 0'. * cipher/serpent.c (_gcry_serpent_ocb_crypt) (_gcry_serpent_ocb_auth): Ditto. * cipher/rijndael-aesni.c (get_l): Remove low-bit checks. (aes_ocb_enc, aes_ocb_dec, _gcry_aes_aesni_ocb_auth): Handle leading blocks until block counter is multiple of 4, so that parallel block processing loop can use 'c->u_mode.ocb.L' array directly. * tests/basic.c (check_ocb_cipher_largebuf): Rename to... (check_ocb_cipher_largebuf_split): ...this and add option to process large buffer as two split buffers. (check_ocb_cipher_largebuf): New. -- Patch simplifies source and reduce object size. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/serpent.c')
-rw-r--r--cipher/serpent.c370
1 files changed, 150 insertions, 220 deletions
diff --git a/cipher/serpent.c b/cipher/serpent.c
index a47a1b77..fc3afa6b 100644
--- a/cipher/serpent.c
+++ b/cipher/serpent.c
@@ -1250,56 +1250,45 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_avx2 = 0;
const void *Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ const void **l;
int i;
- if (blkn % 16 == 0)
+ if (nblocks >= 16)
{
for (i = 0; i < 16; i += 8)
{
- Ls[i + 0] = c->u_mode.ocb.L[0];
- Ls[i + 1] = c->u_mode.ocb.L[1];
- Ls[i + 2] = c->u_mode.ocb.L[0];
- Ls[i + 3] = c->u_mode.ocb.L[2];
- Ls[i + 4] = c->u_mode.ocb.L[0];
- Ls[i + 5] = c->u_mode.ocb.L[1];
- Ls[i + 6] = c->u_mode.ocb.L[0];
+ Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
}
- Ls[7] = c->u_mode.ocb.L[3];
- }
+ Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
- /* Process data in 16 block chunks. */
- while (nblocks >= 16)
- {
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 16 == 0)
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- Ls[15] = ocb_get_l(c, l_tmp, blkn);
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
+
+ if (encrypt)
+ _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 16;
+ outbuf += 16 * sizeof(serpent_block_t);
+ inbuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
}
- else
- {
- for (i = 0; i < 16; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
- }
-
- if (encrypt)
- _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
- else
- _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
-
- nblocks -= 16;
- outbuf += 16 * sizeof(serpent_block_t);
- inbuf += 16 * sizeof(serpent_block_t);
- did_use_avx2 = 1;
}
if (did_use_avx2)
@@ -1317,51 +1306,39 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_sse2 = 0;
const void *Ls[8];
- int i;
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
- if (blkn % 8 == 0)
+ if (nblocks >= 8)
{
- Ls[0] = c->u_mode.ocb.L[0];
- Ls[1] = c->u_mode.ocb.L[1];
- Ls[2] = c->u_mode.ocb.L[0];
- Ls[3] = c->u_mode.ocb.L[2];
- Ls[4] = c->u_mode.ocb.L[0];
- Ls[5] = c->u_mode.ocb.L[1];
- Ls[6] = c->u_mode.ocb.L[0];
- }
-
- /* Process data in 8 block chunks. */
- while (nblocks >= 8)
- {
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 8 == 0)
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- Ls[7] = ocb_get_l(c, l_tmp, blkn);
- }
- else
- {
- for (i = 0; i < 8; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+ if (encrypt)
+ _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
}
-
- if (encrypt)
- _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
- else
- _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
-
- nblocks -= 8;
- outbuf += 8 * sizeof(serpent_block_t);
- inbuf += 8 * sizeof(serpent_block_t);
- did_use_sse2 = 1;
}
if (did_use_sse2)
@@ -1380,51 +1357,39 @@ _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
{
int did_use_neon = 0;
const void *Ls[8];
- int i;
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
- if (blkn % 8 == 0)
+ if (nblocks >= 8)
{
- Ls[0] = c->u_mode.ocb.L[0];
- Ls[1] = c->u_mode.ocb.L[1];
- Ls[2] = c->u_mode.ocb.L[0];
- Ls[3] = c->u_mode.ocb.L[2];
- Ls[4] = c->u_mode.ocb.L[0];
- Ls[5] = c->u_mode.ocb.L[1];
- Ls[6] = c->u_mode.ocb.L[0];
- }
-
- /* Process data in 8 block chunks. */
- while (nblocks >= 8)
- {
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 8 == 0)
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- Ls[7] = ocb_get_l(c, l_tmp, blkn);
- }
- else
- {
- for (i = 0; i < 8; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
+
+ if (encrypt)
+ _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+ else
+ _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+ c->u_ctr.ctr, Ls);
+
+ nblocks -= 8;
+ outbuf += 8 * sizeof(serpent_block_t);
+ inbuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
}
-
- if (encrypt)
- _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
- else
- _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
- c->u_ctr.ctr, Ls);
-
- nblocks -= 8;
- outbuf += 8 * sizeof(serpent_block_t);
- inbuf += 8 * sizeof(serpent_block_t);
- did_use_neon = 1;
}
if (did_use_neon)
@@ -1471,51 +1436,40 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_avx2 = 0;
const void *Ls[16];
+ unsigned int n = 16 - (blkn % 16);
+ const void **l;
int i;
- if (blkn % 16 == 0)
+ if (nblocks >= 16)
{
for (i = 0; i < 16; i += 8)
{
- Ls[i + 0] = c->u_mode.ocb.L[0];
- Ls[i + 1] = c->u_mode.ocb.L[1];
- Ls[i + 2] = c->u_mode.ocb.L[0];
- Ls[i + 3] = c->u_mode.ocb.L[2];
- Ls[i + 4] = c->u_mode.ocb.L[0];
- Ls[i + 5] = c->u_mode.ocb.L[1];
- Ls[i + 6] = c->u_mode.ocb.L[0];
+ Ls[(i + 0 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 1 + n) % 16] = c->u_mode.ocb.L[1];
+ Ls[(i + 2 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 3 + n) % 16] = c->u_mode.ocb.L[2];
+ Ls[(i + 4 + n) % 16] = c->u_mode.ocb.L[0];
+ Ls[(i + 5 + n) % 16] = c->u_mode.ocb.L[1];
+ Ls[(i + 6 + n) % 16] = c->u_mode.ocb.L[0];
}
- Ls[7] = c->u_mode.ocb.L[3];
- }
+ Ls[(7 + n) % 16] = c->u_mode.ocb.L[3];
+ l = &Ls[(15 + n) % 16];
- /* Process data in 16 block chunks. */
- while (nblocks >= 16)
- {
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 16 == 0)
+ /* Process data in 16 block chunks. */
+ while (nblocks >= 16)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 16;
- Ls[15] = ocb_get_l(c, l_tmp, blkn);
- }
- else
- {
- for (i = 0; i < 16; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
- }
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 16);
- _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, Ls);
+ _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
- nblocks -= 16;
- abuf += 16 * sizeof(serpent_block_t);
- did_use_avx2 = 1;
+ nblocks -= 16;
+ abuf += 16 * sizeof(serpent_block_t);
+ did_use_avx2 = 1;
+ }
}
if (did_use_avx2)
@@ -1533,46 +1487,34 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_sse2 = 0;
const void *Ls[8];
- int i;
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
- if (blkn % 8 == 0)
+ if (nblocks >= 8)
{
- Ls[0] = c->u_mode.ocb.L[0];
- Ls[1] = c->u_mode.ocb.L[1];
- Ls[2] = c->u_mode.ocb.L[0];
- Ls[3] = c->u_mode.ocb.L[2];
- Ls[4] = c->u_mode.ocb.L[0];
- Ls[5] = c->u_mode.ocb.L[1];
- Ls[6] = c->u_mode.ocb.L[0];
- }
-
- /* Process data in 8 block chunks. */
- while (nblocks >= 8)
- {
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 8 == 0)
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- Ls[7] = ocb_get_l(c, l_tmp, blkn);
- }
- else
- {
- for (i = 0; i < 8; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
- }
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
- _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, Ls);
+ _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
- nblocks -= 8;
- abuf += 8 * sizeof(serpent_block_t);
- did_use_sse2 = 1;
+ nblocks -= 8;
+ abuf += 8 * sizeof(serpent_block_t);
+ did_use_sse2 = 1;
+ }
}
if (did_use_sse2)
@@ -1591,46 +1533,34 @@ _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
{
int did_use_neon = 0;
const void *Ls[8];
- int i;
-
- if (blkn % 8 == 0)
- {
- Ls[0] = c->u_mode.ocb.L[0];
- Ls[1] = c->u_mode.ocb.L[1];
- Ls[2] = c->u_mode.ocb.L[0];
- Ls[3] = c->u_mode.ocb.L[2];
- Ls[4] = c->u_mode.ocb.L[0];
- Ls[5] = c->u_mode.ocb.L[1];
- Ls[6] = c->u_mode.ocb.L[0];
- }
+ unsigned int n = 8 - (blkn % 8);
+ const void **l;
- /* Process data in 8 block chunks. */
- while (nblocks >= 8)
+ if (nblocks >= 8)
{
- /* l_tmp will be used only every 65536-th block. */
- if (blkn % 8 == 0)
+ Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+ l = &Ls[(7 + n) % 8];
+
+ /* Process data in 8 block chunks. */
+ while (nblocks >= 8)
{
+ /* l_tmp will be used only every 65536-th block. */
blkn += 8;
- Ls[7] = ocb_get_l(c, l_tmp, blkn);
- }
- else
- {
- for (i = 0; i < 8; i += 4)
- {
- Ls[i + 0] = ocb_get_l(c, l_tmp, blkn + 1);
- Ls[i + 1] = ocb_get_l(c, l_tmp, blkn + 2);
- Ls[i + 2] = ocb_get_l(c, l_tmp, blkn + 3);
- Ls[i + 3] = ocb_get_l(c, l_tmp, blkn + 4);
- blkn += 4;
- }
- }
+ *l = ocb_get_l(c, l_tmp, blkn - blkn % 8);
- _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
- c->u_mode.ocb.aad_sum, Ls);
+ _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum, Ls);
- nblocks -= 8;
- abuf += 8 * sizeof(serpent_block_t);
- did_use_neon = 1;
+ nblocks -= 8;
+ abuf += 8 * sizeof(serpent_block_t);
+ did_use_neon = 1;
+ }
}
if (did_use_neon)