diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-07-07 21:52:34 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-07-27 11:47:17 +0300 |
commit | 7f6804c37c4b41d85fb26aa723b1c41e4a3cf278 (patch) | |
tree | 586b8d3f5af239e7161e87578c27869ac0b693c9 /cipher | |
parent | bb088c6b1620504fdc79e89af27c2bf3fb02b4b4 (diff) | |
download | libgcrypt-7f6804c37c4b41d85fb26aa723b1c41e4a3cf278.tar.gz |
Add bulk OCB for Twofish AMD64 implementation
* cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk
functions for Twofish.
* cipher/twofish-amd64.S: Add OCB assembly functions.
* cipher/twofish.c (_gcry_twofish_amd64_ocb_enc)
(_gcry_twofish_amd64_ocb_dec, _gcry_twofish_amd64_ocb_auth): New
prototypes.
(call_sysv_fn5, call_sysv_fn6, twofish_amd64_ocb_enc)
(twofish_amd64_ocb_dec, twofish_amd64_ocb_auth, get_l)
(_gcry_twofish_ocb_crypt, _gcry_twofish_ocb_auth): New.
* src/cipher.h (_gcry_twofish_ocb_crypt)
(_gcry_twofish_ocb_auth): New.
* tests/basic.c (check_ocb_cipher): Add test-vector for Twofish.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/cipher.c | 2 | ||||
-rw-r--r-- | cipher/twofish-amd64.S | 310 | ||||
-rw-r--r-- | cipher/twofish.c | 259 |
3 files changed, 570 insertions, 1 deletions
diff --git a/cipher/cipher.c b/cipher/cipher.c index 2d2b0ade..8483c5fc 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -561,6 +561,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.cbc_dec = _gcry_twofish_cbc_dec; h->bulk.cfb_dec = _gcry_twofish_cfb_dec; h->bulk.ctr_enc = _gcry_twofish_ctr_enc; + h->bulk.ocb_crypt = _gcry_twofish_ocb_crypt; + h->bulk.ocb_auth = _gcry_twofish_ocb_auth; break; #endif /*USE_TWOFISH*/ diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index ea88b94e..aa964e03 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -1,6 +1,6 @@ /* twofish-amd64.S - AMD64 assembly implementation of Twofish cipher * - * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -734,5 +734,313 @@ _gcry_twofish_amd64_cfb_dec: ret; ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) +.align 8 +.globl _gcry_twofish_amd64_ocb_enc +ELF(.type _gcry_twofish_amd64_ocb_enc,@function;) +_gcry_twofish_amd64_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r8, RX2; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB0, (0 * 8)(RX2); + xor RCD0, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB1, (0 * 8)(RX2); + xor RCD1, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* Checksum_i = Checksum_{i-1} xor P_i */ + xor RAB2, (0 * 8)(RX2); + xor RCD2, (1 * 8)(RX2); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* CX_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* C_i = CX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_dec +ELF(.type _gcry_twofish_amd64_ocb_dec,@function;) +_gcry_twofish_amd64_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rsi, (6 * 8)(%rsp); + movq %r8, (7 * 8)(%rsp); + movq %rdx, RX0; + movq %rcx, RX1; + movq %r9, RY0; + movq %rsi, RY1; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* Store Offset_i */ + movq RT0, (0 * 8)(RY1); + movq RT1, (1 * 8)(RY1); + /* CX_i = C_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* Store Offset_i */ + movq RT0, (2 * 8)(RY1); + movq RT1, (3 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* Store Offset_i */ + movq RT0, (4 * 8)(RY1); + movq RT1, (5 * 8)(RY1); + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* PX_i = DECIPHER(K, CX_i) */ + call __twofish_dec_blk3; + + movq (7 * 8)(%rsp), RX2; /*checksum*/ + movq (6 * 8)(%rsp), RX1; /*dst*/ + + /* Load checksum */ + movq (0 * 8)(RX2), RT0; + movq (1 * 8)(RX2), RT1; + + /* P_i = PX_i xor Offset_i */ + xorq RCD0, (0 * 8)(RX1); + xorq RAB0, (1 * 8)(RX1); + xorq RCD1, (2 * 8)(RX1); + xorq RAB1, (3 * 8)(RX1); + xorq RCD2, (4 * 8)(RX1); + xorq RAB2, (5 * 8)(RX1); + + /* Checksum_i = Checksum_{i-1} xor P_i */ + xorq (0 * 8)(RX1), RT0; + xorq (1 * 8)(RX1), RT1; + xorq (2 * 8)(RX1), RT0; + xorq (3 * 8)(RX1), RT1; + xorq (4 * 8)(RX1), RT0; + xorq (5 * 8)(RX1), RT1; + + /* Store checksum */ + movq RT0, (0 * 8)(RX2); + movq RT1, (1 * 8)(RX2); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;) + +.align 8 +.globl _gcry_twofish_amd64_ocb_auth +ELF(.type _gcry_twofish_amd64_ocb_auth,@function;) +_gcry_twofish_amd64_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (3 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[3]) + */ + subq $(8 * 8), %rsp; + movq %rbp, (0 * 8)(%rsp); + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rcx, (6 * 8)(%rsp); + movq %rsi, RX0; + movq %rdx, RX1; + movq %r8, RY0; + + /* Load offset */ + movq (0 * 8)(RX1), RT0; + movq (1 * 8)(RX1), RT1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq (RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (0 * 8)(RX0), RAB0; + movq (1 * 8)(RX0), RCD0; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB0; + xorq RT1, RCD0; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 8(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (2 * 8)(RX0), RAB1; + movq (3 * 8)(RX0), RCD1; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB1; + xorq RT1, RCD1; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + movq 16(RY0), RY2; + xorq (0 * 8)(RY2), RT0; + xorq (1 * 8)(RY2), RT1; + movq (4 * 8)(RX0), RAB2; + movq (5 * 8)(RX0), RCD2; + /* PX_i = P_i xor Offset_i */ + xorq RT0, RAB2; + xorq RT1, RCD2; + + /* Store offset */ + movq RT0, (0 * 8)(RX1); + movq RT1, (1 * 8)(RX1); + + /* C_i = ENCIPHER(K, PX_i) */ + call __twofish_enc_blk3; + + movq (6 * 8)(%rsp), RX1; /*checksum*/ + + /* Checksum_i = C_i xor Checksum_i */ + xorq RCD0, RCD1; + xorq RAB0, RAB1; + xorq RCD1, RCD2; + xorq RAB1, RAB2; + xorq RCD2, (0 * 8)(RX1); + xorq RAB2, (1 * 8)(RX1); + + movq (0 * 8)(%rsp), %rbp; + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + addq $(8 * 8), %rsp; + + ret; +ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;) + #endif /*USE_TWOFISH*/ #endif /*__x86_64*/ diff --git a/cipher/twofish.c b/cipher/twofish.c index ce83fadf..9b9c35f5 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -45,6 +45,7 @@ #include "g10lib.h" #include "cipher.h" #include "bufhelp.h" +#include "cipher-internal.h" #include "cipher-selftest.h" @@ -755,6 +756,18 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); +extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const void *Ls[3]); + +extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, + const byte *in, byte *offset, + byte *checksum, const void *Ls[3]); + +extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx, + const byte *abuf, byte *offset, + byte *checksum, const void *Ls[3]); + #ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS static inline void call_sysv_fn (const void *fn, const void *arg1, const void *arg2, @@ -771,6 +784,43 @@ call_sysv_fn (const void *fn, const void *arg1, const void *arg2, : : "cc", "memory", "r8", "r9", "r10", "r11"); } + +static inline void +call_sysv_fn5 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} + +static inline void +call_sysv_fn6 (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4, const void *arg5, + const void *arg6) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("movq %[arg5], %%r8\n\t" + "movq %[arg6], %%r9\n\t" + "callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : [arg5] "g" (arg5), + [arg6] "g" (arg6) + : "cc", "memory", "r8", "r9", "r10", "r11"); +} #endif static inline void @@ -826,6 +876,39 @@ twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, #endif } +static inline void +twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_enc, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn6(_gcry_twofish_amd64_ocb_dec, ctx, out, in, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls); +#endif +} + +static inline void +twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf, + byte *offset, byte *checksum, const void *Ls[3]) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn5(_gcry_twofish_amd64_ocb_auth, ctx, abuf, offset, checksum, Ls); +#else + _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls); +#endif +} + #elif defined(USE_ARM_ASM) /* Assembly implementations of Twofish. */ @@ -1188,6 +1271,182 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, _gcry_burn_stack(burn_stack_depth); } +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i) +{ + unsigned int ntz = _gcry_ctz64 (i); + + if (ntz < OCB_L_TABLE_SIZE) + return c->u_mode.ocb.L[ntz]; + else + return _gcry_cipher_ocb_get_l (c, l_tmp, i); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +void +_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + TWOFISH_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char l_tmp[TWOFISH_BLOCKSIZE]; + const unsigned char *l; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.data_nblocks; + +#ifdef USE_AMD64_ASM + { + const void *Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + /* l_tmp will be used only every 65536-th block. */ + Ls[0] = get_l(c, l_tmp, blkn + 1); + Ls[1] = get_l(c, l_tmp, blkn + 2); + Ls[2] = get_l(c, l_tmp, blkn + 3); + blkn += 3; + + if (encrypt) + twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + else + twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr, + Ls); + + nblocks -= 3; + outbuf += 3 * TWOFISH_BLOCKSIZE; + inbuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + if (encrypt) + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); + buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + burn = twofish_encrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); + + inbuf += TWOFISH_BLOCKSIZE; + outbuf += TWOFISH_BLOCKSIZE; + } + } + else + { + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, TWOFISH_BLOCKSIZE); + buf_cpy (l_tmp, inbuf, TWOFISH_BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + burn = twofish_decrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (l_tmp, c->u_iv.iv, TWOFISH_BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp, TWOFISH_BLOCKSIZE); + buf_cpy (outbuf, l_tmp, TWOFISH_BLOCKSIZE); + + inbuf += TWOFISH_BLOCKSIZE; + outbuf += TWOFISH_BLOCKSIZE; + } + } + + c->u_mode.ocb.data_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + +/* Bulk authentication of complete blocks in OCB mode. */ +void +_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + TWOFISH_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned char l_tmp[TWOFISH_BLOCKSIZE]; + const unsigned char *l; + unsigned int burn, burn_stack_depth = 0; + u64 blkn = c->u_mode.ocb.aad_nblocks; + +#ifdef USE_AMD64_ASM + { + const void *Ls[3]; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + /* l_tmp will be used only every 65536-th block. */ + Ls[0] = get_l(c, l_tmp, blkn + 1); + Ls[1] = get_l(c, l_tmp, blkn + 2); + Ls[2] = get_l(c, l_tmp, blkn + 3); + blkn += 3; + + twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 3; + abuf += 3 * TWOFISH_BLOCKSIZE; + + burn = 8 * sizeof(void*); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for (; nblocks; nblocks--) + { + l = get_l(c, l_tmp, ++blkn); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, TWOFISH_BLOCKSIZE); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, TWOFISH_BLOCKSIZE); + burn = twofish_encrypt(ctx, l_tmp, l_tmp); + if (burn > burn_stack_depth) + burn_stack_depth = burn; + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, TWOFISH_BLOCKSIZE); + + abuf += TWOFISH_BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = blkn; + + wipememory(&l_tmp, sizeof(l_tmp)); + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +} + /* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR |