summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cipher/cipher-internal.h5
-rw-r--r--cipher/cipher-ocb.c84
-rw-r--r--cipher/cipher.c2
-rw-r--r--cipher/rijndael-aesni.c483
-rw-r--r--cipher/rijndael.c161
-rw-r--r--src/cipher.h4
-rw-r--r--tests/basic.c174
7 files changed, 884 insertions, 29 deletions
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index 50b03243..e20ea562 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -128,6 +128,9 @@ struct gcry_cipher_handle
void (*ctr_enc)(void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
size_t nblocks);
+ void (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt);
+ void (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks);
} bulk;
@@ -440,6 +443,8 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag
gcry_err_code_t _gcry_cipher_ocb_check_tag
/* */ (gcry_cipher_hd_t c,
const unsigned char *intag, size_t taglen);
+const unsigned char *_gcry_cipher_ocb_get_l
+/* */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n);
#endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c
index 62e79bbd..bc6fd87f 100644
--- a/cipher/cipher-ocb.c
+++ b/cipher/cipher-ocb.c
@@ -115,8 +115,8 @@ bit_copy (unsigned char *d, const unsigned char *s,
every 65536-th block. L_TMP is a helper buffer of size
OCB_BLOCK_LEN which is used to hold the computation if not taken
from the table. */
-static const unsigned char *
-get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
+const unsigned char *
+_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n)
{
int ntz = _gcry_ctz64 (n);
@@ -257,6 +257,15 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
if (!abuflen)
return 0;
+ /* Use a bulk method if available. */
+ if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth)
+ {
+ size_t nblks = abuflen / OCB_BLOCK_LEN;
+ c->bulk.ocb_auth (c, abuf, nblks);
+ abuf += nblks * OCB_BLOCK_LEN;
+ abuflen -= nblks * OCB_BLOCK_LEN;
+ }
+
/* Hash all full blocks. */
while (abuflen >= OCB_BLOCK_LEN)
{
@@ -264,7 +273,8 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
buf_xor_1 (c->u_mode.ocb.aad_offset,
- get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks), OCB_BLOCK_LEN);
+ _gcry_cipher_ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks),
+ OCB_BLOCK_LEN);
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN);
c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
@@ -341,40 +351,56 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt,
else if ((inbuflen % OCB_BLOCK_LEN))
return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */
- if (encrypt)
+ /* Use a bulk method if available. */
+ if (nblks && c->bulk.ocb_crypt)
{
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
+ c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+ inbuf += nblks * OCB_BLOCK_LEN;
+ outbuf += nblks * OCB_BLOCK_LEN;
+ inbuflen -= nblks * OCB_BLOCK_LEN;
+ outbuflen -= nblks * OCB_BLOCK_LEN;
+ nblks = 0;
}
- /* Encrypt all full blocks. */
- while (inbuflen >= OCB_BLOCK_LEN)
+ if (nblks)
{
- c->u_mode.ocb.data_nblocks++;
+ gcry_cipher_encrypt_t crypt_fn =
+ encrypt ? c->spec->encrypt : c->spec->decrypt;
- /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
- buf_xor_1 (c->u_iv.iv,
- get_l (c, l_tmp, c->u_mode.ocb.data_nblocks), OCB_BLOCK_LEN);
- /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
- buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
if (encrypt)
- nburn = c->spec->encrypt (&c->context.c, outbuf, outbuf);
- else
- nburn = c->spec->decrypt (&c->context.c, outbuf, outbuf);
- burn = nburn > burn ? nburn : burn;
- buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, inbuf, nblks);
+ }
- inbuf += OCB_BLOCK_LEN;
- inbuflen -= OCB_BLOCK_LEN;
- outbuf += OCB_BLOCK_LEN;
- outbuflen =- OCB_BLOCK_LEN;
- }
+ /* Encrypt all full blocks. */
+ while (inbuflen >= OCB_BLOCK_LEN)
+ {
+ c->u_mode.ocb.data_nblocks++;
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_iv.iv,
+ _gcry_cipher_ocb_get_l (c, l_tmp,
+ c->u_mode.ocb.data_nblocks),
+ OCB_BLOCK_LEN);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+ nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+ burn = nburn > burn ? nburn : burn;
+ buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+ inbuf += OCB_BLOCK_LEN;
+ inbuflen -= OCB_BLOCK_LEN;
+ outbuf += OCB_BLOCK_LEN;
+ outbuflen =- OCB_BLOCK_LEN;
+ }
- if (!encrypt)
- {
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
- }
+ if (!encrypt)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks);
+ }
+ }
/* Encrypt final partial block. Note that we expect INBUFLEN to be
shorter than OCB_BLOCK_LEN (see above). */
diff --git a/cipher/cipher.c b/cipher/cipher.c
index 0a13fe61..6e1173f5 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -510,6 +510,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
h->bulk.cbc_enc = _gcry_aes_cbc_enc;
h->bulk.cbc_dec = _gcry_aes_cbc_dec;
h->bulk.ctr_enc = _gcry_aes_ctr_enc;
+ h->bulk.ocb_crypt = _gcry_aes_ocb_crypt;
+ h->bulk.ocb_auth = _gcry_aes_ocb_auth;
break;
#endif /*USE_AES*/
#ifdef USE_BLOWFISH
diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c
index 3c367cec..9a816021 100644
--- a/cipher/rijndael-aesni.c
+++ b/cipher/rijndael-aesni.c
@@ -29,6 +29,7 @@
#include "bufhelp.h"
#include "cipher-selftest.h"
#include "rijndael-internal.h"
+#include "./cipher-internal.h"
#ifdef USE_AESNI
@@ -1251,4 +1252,486 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf,
aesni_cleanup_2_6 ();
}
+
+static inline const unsigned char *
+get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv,
+ unsigned char *ctr)
+{
+ const unsigned char *l;
+ unsigned int ntz;
+
+ if (i & 0xffffffffU)
+ {
+ asm ("rep;bsf %k[low], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [low] "r" (i & 0xffffffffU)
+ : "cc");
+ }
+ else
+ {
+ if (OCB_L_TABLE_SIZE < 32)
+ {
+ ntz = 32;
+ }
+ else if (i)
+ {
+ asm ("rep;bsf %k[high], %k[ntz]\n\t"
+ : [ntz] "=r" (ntz)
+ : [high] "r" (i >> 32)
+ : "cc");
+ ntz += 32;
+ }
+ else
+ {
+ ntz = 64;
+ }
+ }
+
+ if (ntz < OCB_L_TABLE_SIZE)
+ {
+ l = c->u_mode.ocb.L[ntz];
+ }
+ else
+ {
+ /* Store Offset & Checksum before calling external function */
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*iv),
+ [ctr] "=m" (*ctr)
+ :
+ : "memory" );
+
+ l = _gcry_cipher_ocb_get_l (c, l_tmp, i);
+
+ /* Restore Offset & Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*iv),
+ [ctr] "m" (*ctr)
+ : "memory" );
+ }
+
+ return l;
+}
+
+
+static void
+aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0] "m" (*l[0]),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [l1] "m" (*l[1]),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+ : [l2] "m" (*l[2]),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf1],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %[outbuf2],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+ [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+static void
+aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ u64 n = c->u_mode.ocb.data_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Checksum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_iv.iv),
+ [ctr] "m" (*c->u_ctr.ctr)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[inbuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ "movdqu %%xmm5, %[outbuf0]\n\t"
+ : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE))
+ : [l0] "m" (*l[0]),
+ [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[inbuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ "movdqu %%xmm5, %[outbuf1]\n\t"
+ : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+ : [l1] "m" (*l[1]),
+ [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[inbuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "movdqu %%xmm5, %[outbuf2]\n\t"
+ : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE))
+ : [l2] "m" (*l[2]),
+ [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[inbuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_dec_vec4 (ctx);
+
+ asm volatile ("movdqu %[outbuf0],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm1\n\t"
+ "movdqu %%xmm1, %[outbuf0]\n\t"
+ "movdqu %[outbuf1],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "movdqu %%xmm2, %[outbuf1]\n\t"
+ "movdqu %[outbuf2],%%xmm0\n\t"
+ "pxor %%xmm0, %%xmm3\n\t"
+ "movdqu %%xmm3, %[outbuf2]\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ "movdqu %%xmm4, %[outbuf3]\n\t"
+ "pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)),
+ [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)),
+ [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)),
+ [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+ :
+ : "memory" );
+
+ outbuf += 4*BLOCKSIZE;
+ inbuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[inbuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [inbuf] "m" (*inbuf)
+ : "memory" );
+
+ do_aesni_dec (ctx);
+
+ asm volatile ("pxor %%xmm5, %%xmm0\n\t"
+ "pxor %%xmm0, %%xmm6\n\t"
+ "movdqu %%xmm0, %[outbuf]\n\t"
+ : [outbuf] "=m" (*outbuf)
+ :
+ : "memory" );
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.data_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_iv.iv),
+ [ctr] "=m" (*c->u_ctr.ctr)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
+void
+_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ if (encrypt)
+ aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+ else
+ aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+}
+
+
+void
+_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks)
+{
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u64 n = c->u_mode.ocb.aad_nblocks;
+
+ aesni_prepare ();
+
+ /* Preload Offset and Sum */
+ asm volatile ("movdqu %[iv], %%xmm5\n\t"
+ "movdqu %[ctr], %%xmm6\n\t"
+ : /* No output */
+ : [iv] "m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "m" (*c->u_mode.ocb.aad_sum)
+ : "memory" );
+
+ for ( ;nblocks > 3 ; nblocks -= 4 )
+ {
+ const unsigned char *l[4];
+
+ /* l_tmp will be used only every 65536-th block. */
+ l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[1] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[2] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+ l[3] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l0], %%xmm0\n\t"
+ "movdqu %[abuf0], %%xmm1\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm1\n\t"
+ :
+ : [l0] "m" (*l[0]),
+ [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l1], %%xmm0\n\t"
+ "movdqu %[abuf1], %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm2\n\t"
+ :
+ : [l1] "m" (*l[1]),
+ [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l2], %%xmm0\n\t"
+ "movdqu %[abuf2], %%xmm3\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ :
+ : [l2] "m" (*l[2]),
+ [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+ : "memory" );
+ asm volatile ("movdqu %[l3], %%xmm0\n\t"
+ "movdqu %[abuf3], %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm4\n\t"
+ :
+ : [l3] "m" (*l[3]),
+ [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+ : "memory" );
+
+ do_aesni_enc_vec4 (ctx);
+
+ asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+ "pxor %%xmm2, %%xmm6\n\t"
+ "pxor %%xmm3, %%xmm6\n\t"
+ "pxor %%xmm4, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += 4*BLOCKSIZE;
+ }
+ for ( ;nblocks; nblocks-- )
+ {
+ const unsigned char *l;
+
+ l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset,
+ c->u_mode.ocb.aad_sum);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ asm volatile ("movdqu %[l], %%xmm1\n\t"
+ "movdqu %[abuf], %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm0\n\t"
+ :
+ : [l] "m" (*l),
+ [abuf] "m" (*abuf)
+ : "memory" );
+
+ do_aesni_enc (ctx);
+
+ asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+ :
+ :
+ : "memory" );
+
+ abuf += BLOCKSIZE;
+ }
+
+ c->u_mode.ocb.aad_nblocks = n;
+ asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+ "movdqu %%xmm6, %[ctr]\n\t"
+ : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+ [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+ :
+ : "memory" );
+
+ aesni_cleanup ();
+ aesni_cleanup_2_6 ();
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+}
+
+
#endif /* USE_AESNI */
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index a481e6f6..ade41c9d 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -48,6 +48,7 @@
#include "bufhelp.h"
#include "cipher-selftest.h"
#include "rijndael-internal.h"
+#include "./cipher-internal.h"
#ifdef USE_AMD64_ASM
@@ -97,6 +98,11 @@ extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx,
unsigned char *outbuf,
const unsigned char *inbuf,
unsigned char *iv, size_t nblocks);
+extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt);
+extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
#endif
#ifdef USE_SSSE3
@@ -1150,6 +1156,161 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv,
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+void
+_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned int burn_depth = 0;
+
+ if (encrypt)
+ {
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+ }
+ else
+ {
+ check_decryption_preparation (ctx);
+
+ if (ctx->prefetch_dec_fn)
+ ctx->prefetch_dec_fn();
+ }
+
+ if (0)
+ ;
+#ifdef USE_AESNI
+ else if (ctx->use_aesni)
+ {
+ _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt);
+ burn_depth = 0;
+ }
+#endif /*USE_AESNI*/
+ else if (encrypt)
+ {
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.data_nblocks;
+ unsigned int ntz = _gcry_ctz64 (i);
+ const unsigned char *l;
+
+ if (ntz < OCB_L_TABLE_SIZE)
+ l = c->u_mode.ocb.L[ntz];
+ else
+ l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+ buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+ }
+ else
+ {
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.data_nblocks;
+ unsigned int ntz = _gcry_ctz64 (i);
+ const unsigned char *l;
+
+ if (ntz < OCB_L_TABLE_SIZE)
+ l = c->u_mode.ocb.L[ntz];
+ else
+ l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+ buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+ /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
+ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+ buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+ inbuf += BLOCKSIZE;
+ outbuf += BLOCKSIZE;
+ }
+ }
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+/* Bulk authentication of complete blocks in OCB mode. */
+void
+_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ unsigned int burn_depth = 0;
+
+ if (ctx->prefetch_enc_fn)
+ ctx->prefetch_enc_fn();
+
+ if (0)
+ ;
+#ifdef USE_AESNI
+ else if (ctx->use_aesni)
+ {
+ _gcry_aes_aesni_ocb_auth (c, abuf, nblocks);
+ burn_depth = 0;
+ }
+#endif /*USE_AESNI*/
+ else
+ {
+ union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+ rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+ for ( ;nblocks; nblocks-- )
+ {
+ u64 i = ++c->u_mode.ocb.aad_nblocks;
+ unsigned int ntz = _gcry_ctz64 (i);
+ const unsigned char *l;
+
+ if (ntz < OCB_L_TABLE_SIZE)
+ l = c->u_mode.ocb.L[ntz];
+ else
+ l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i);
+
+ /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+ buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+ /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
+ buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE);
+ burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+ buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+
+ abuf += BLOCKSIZE;
+ }
+
+ wipememory(&l_tmp, sizeof(l_tmp));
+ }
+
+ if (burn_depth)
+ _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+
/* Run the self-tests for AES 128. Returns NULL on success. */
static const char*
selftest_basic_128 (void)
diff --git a/src/cipher.h b/src/cipher.h
index f4f6cc4c..7ad0b2cd 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -135,6 +135,10 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
void *outbuf_arg, const void *inbuf_arg,
size_t nblocks);
+void _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks, int encrypt);
+void _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks);
/*-- blowfish.c --*/
void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv,
diff --git a/tests/basic.c b/tests/basic.c
index 6ebc0568..1175b386 100644
--- a/tests/basic.c
+++ b/tests/basic.c
@@ -3153,6 +3153,172 @@ do_check_ocb_cipher (int inplace)
static void
+check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect)
+{
+ static const unsigned char key[32] =
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
+ static const unsigned char nonce[12] =
+ "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03";
+ const size_t buflen = 1024 * 1024 * 2 + 32;
+ unsigned char *inbuf;
+ unsigned char *outbuf;
+ gpg_error_t err = 0;
+ gcry_cipher_hd_t hde, hdd;
+ unsigned char tag[16];
+ int i;
+
+ inbuf = xmalloc(buflen);
+ if (!inbuf)
+ {
+ fail ("out-of-memory\n");
+ return;
+ }
+ outbuf = xmalloc(buflen);
+ if (!outbuf)
+ {
+ fail ("out-of-memory\n");
+ xfree(inbuf);
+ return;
+ }
+
+ for (i = 0; i < buflen; i++)
+ inbuf[i] = 'a';
+
+ err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0);
+ if (!err)
+ err = gcry_cipher_open (&hdd, algo, GCRY_CIPHER_MODE_OCB, 0);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_open failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ goto out_free;
+ }
+
+ err = gcry_cipher_setkey (hde, key, keylen);
+ if (!err)
+ err = gcry_cipher_setkey (hdd, key, keylen);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_setkey failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ err = gcry_cipher_setiv (hde, nonce, 12);
+ if (!err)
+ err = gcry_cipher_setiv (hdd, nonce, 12);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_setiv failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ err = gcry_cipher_authenticate (hde, inbuf, buflen);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ err = gcry_cipher_final (hde);
+ if (!err)
+ {
+ err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen);
+ }
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_encrypt failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ /* Check that the tag matches. */
+ err = gcry_cipher_gettag (hde, tag, 16);
+ if (err)
+ {
+ fail ("cipher_ocb, gcry_cipher_gettag failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ }
+ if (memcmp (tagexpect, tag, 16))
+ {
+ mismatch (tagexpect, 16, tag, 16);
+ fail ("cipher-ocb, encrypt tag mismatch (large, algo %d)\n", algo);
+ }
+
+ err = gcry_cipher_authenticate (hdd, inbuf, buflen);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ /* Now for the decryption. */
+ err = gcry_cipher_final (hdd);
+ if (!err)
+ {
+ err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0);
+ }
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_decrypt (large, algo %d) failed: %s\n",
+ algo, gpg_strerror (err));
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ goto out_free;
+ }
+
+ /* We still have TAG from the encryption. */
+ err = gcry_cipher_checktag (hdd, tag, 16);
+ if (err)
+ {
+ fail ("cipher-ocb, gcry_cipher_checktag failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ }
+
+ /* Check that the decrypt output matches the original plaintext. */
+ if (memcmp (inbuf, outbuf, buflen))
+ {
+ /*mismatch (inbuf, buflen, outbuf, buflen);*/
+ fail ("cipher-ocb, decrypt data mismatch (large, algo %d)\n", algo);
+ }
+
+ /* Check that gettag also works for decryption. */
+ err = gcry_cipher_gettag (hdd, tag, 16);
+ if (err)
+ {
+ fail ("cipher_ocb, decrypt gettag failed (large, algo %d): %s\n",
+ algo, gpg_strerror (err));
+ }
+ if (memcmp (tagexpect, tag, 16))
+ {
+ mismatch (tagexpect, 16, tag, 16);
+ fail ("cipher-ocb, decrypt tag mismatch (large, algo %d)\n", algo);
+ }
+
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+
+out_free:
+ xfree(outbuf);
+ xfree(inbuf);
+}
+
+
+static void
check_ocb_cipher (void)
{
/* Check OCB cipher with separate destination and source buffers for
@@ -3161,6 +3327,14 @@ check_ocb_cipher (void)
/* Check OCB cipher with inplace encrypt/decrypt. */
do_check_ocb_cipher(1);
+
+ /* Check large buffer encryption/decryption. */
+ check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16,
+ "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8"
+ "\x33\xfd\x7a\x4f\x42\x60\x5d\x20");
+ check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32,
+ "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d"
+ "\xfe\x96\x67\xc9\xc8\x41\x03\x51");
}