summaryrefslogtreecommitdiff
path: root/cipher/rijndael-aesni.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-04-18 17:41:34 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-04-18 17:41:34 +0300
commit305cc878d395475c46b4ef52f4764bd0c85bf8ac (patch)
treefd146e81575e8b0e68ddc51237c7acb00df79c0b /cipher/rijndael-aesni.c
parentfe38d3815b4cd203cd529949e244aca80d32897f (diff)
downloadlibgcrypt-305cc878d395475c46b4ef52f4764bd0c85bf8ac.tar.gz
Add OCB bulk crypt/auth functions for AES/AES-NI
* cipher/cipher-internal.h (gcry_cipher_handle): Add bulk.ocb_crypt and bulk.ocb_auth. (_gcry_cipher_ocb_get_l): New prototype. * cipher/cipher-ocb.c (get_l): Rename to ... (_gcry_cipher_ocb_get_l): ... this. (_gcry_cipher_ocb_authenticate, ocb_crypt): Use bulk function when available. * cipher/cipher.c (_gcry_cipher_open_internal): Setup OCB bulk functions for AES. * cipher/rijndael-aesni.c (get_l, aesni_ocb_enc, aes_ocb_dec) (_gcry_aes_aesni_ocb_crypt, _gcry_aes_aesni_ocb_auth): New. * cipher/rijndael.c [USE_AESNI] (_gcry_aes_aesni_ocb_crypt) (_gcry_aes_aesni_ocb_auth): New prototypes. (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): New. * src/cipher.h (_gcry_aes_ocb_crypt, _gcry_aes_ocb_auth): New prototypes. * tests/basic.c (check_ocb_cipher_largebuf): New. (check_ocb_cipher): Add large buffer encryption/decryption test. -- Patch adds bulk encryption/decryption/authentication code for AES-NI accelerated AES. Benchmark on Intel i5-4570 (3200 Mhz, turbo off): Before: AES | nanosecs/byte mebibytes/sec cycles/byte OCB enc | 2.12 ns/B 449.7 MiB/s 6.79 c/B OCB dec | 2.12 ns/B 449.6 MiB/s 6.79 c/B OCB auth | 2.07 ns/B 459.9 MiB/s 6.64 c/B After: AES | nanosecs/byte mebibytes/sec cycles/byte OCB enc | 0.292 ns/B 3262.5 MiB/s 0.935 c/B OCB dec | 0.297 ns/B 3212.2 MiB/s 0.950 c/B OCB auth | 0.260 ns/B 3666.1 MiB/s 0.832 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/rijndael-aesni.c')
-rw-r--r--cipher/rijndael-aesni.c483
1 files changed, 483 insertions, 0 deletions
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 3c367cec..9a816021 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -29,6 +29,7 @@
#include "bufhelp.h"
#include "cipher-selftest.h"
#include "rijndael-internal.h"
+#include "./cipher-internal.h"
#ifdef USE_AESNI
@@ -1251,4 +1252,486 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
aesni_cleanup_2_6 ();
}
+
+static inline const unsigned char *
+get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
+ unsigned char *ctr)
+{
+ const unsigned char *l;
+ unsigned int ntz;
+
+ if (i & 0xffffffffU)
+ {
+ asm ("rep;bsf %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" (i & 0xffffffffU)
+ : "cc");
+ }
+ else
+ {
+ if (OCB_L_TABLE_SIZE < 32)
+ {
+ ntz = 32;
+ }
+ else if (i)
+ {
+ asm ("rep;bsf %k[high], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [high] "r" (i >> 32)
+ : "cc");
+ ntz += 32;
+ }
+ else
+ {
+ ntz = 64;
+ }
+ }
+
+ if (ntz < OCB_L_TABLE_SIZE)
+ {
+ l = c->u_mode.ocb.L[ntz];
+ }
+ else
+ {
+ /* Store Offset & Checksum before calling external function */
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*iv),
+ [ctr] "=m" (*ctr)
+ :
+ : "memory" );
+
+ l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
+
+ /* Restore Offset & Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*iv),
+ [ctr] "m" (*ctr)
+ : "memory" );
+ }
+
+ return l;
+}
+
+
+static void
+aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0] "m" (*l[0]),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [l1] "m" (*l[1]),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+ : [l2] "m" (*l[2]),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf1],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %[outbuf2],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+ [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+static void
+aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0] "m" (*l[0]),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [l1] "m" (*l[1]),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+ : [l2] "m" (*l[2]),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf1],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %[outbuf2],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ "pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+ [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_dec (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+void
+_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ if (encrypt)
+ aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+ else
+ aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+}
+
+
+void
+_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u64 n = c->u_mode.ocb.aad_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Sum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "m" (*c->u_mode.ocb.aad_sum)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[abuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ :
+ : [l0] "m" (*l[0]),
+ [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[abuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ :
+ : [l1] "m" (*l[1]),
+ [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[abuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ :
+ : [l2] "m" (*l[2]),
+ [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[abuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[abuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [abuf] "m" (*abuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.aad_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
#endif /* USE_AESNI */