diff options
-rw-r--r-- | cipher/cipher-internal.h | 5 | ||||
-rw-r--r-- | cipher/cipher-ocb.c | 84 | ||||
-rw-r--r-- | cipher/cipher.c | 2 | ||||
-rw-r--r-- | cipher/rijndael-aesni.c | 483 | ||||
-rw-r--r-- | cipher/rijndael.c | 161 | ||||
-rw-r--r-- | src/cipher.h | 4 | ||||
-rw-r--r-- | tests/basic.c | 174 |
7 files changed, 884 insertions, 29 deletions
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index 50b03243..e20ea562 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -128,6 +128,9 @@ struct gcry_cipher_handle void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); + void (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); + void (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); } bulk; @@ -440,6 +443,8 @@ gcry_err_code_t _gcry_cipher_ocb_get_tag gcry_err_code_t _gcry_cipher_ocb_check_tag /* */ (gcry_cipher_hd_t c, const unsigned char *intag, size_t taglen); +const unsigned char *_gcry_cipher_ocb_get_l +/* */ (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n); #endif /*G10_CIPHER_INTERNAL_H*/ diff --git a/cipher/cipher-ocb.c b/cipher/cipher-ocb.c index 62e79bbd..bc6fd87f 100644 --- a/cipher/cipher-ocb.c +++ b/cipher/cipher-ocb.c @@ -115,8 +115,8 @@ bit_copy (unsigned char *d, const unsigned char *s, every 65536-th block. L_TMP is a helper buffer of size OCB_BLOCK_LEN which is used to hold the computation if not taken from the table. */ -static const unsigned char * -get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n) +const unsigned char * +_gcry_cipher_ocb_get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 n) { int ntz = _gcry_ctz64 (n); @@ -257,6 +257,15 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, if (!abuflen) return 0; + /* Use a bulk method if available. */ + if (abuflen >= OCB_BLOCK_LEN && c->bulk.ocb_auth) + { + size_t nblks = abuflen / OCB_BLOCK_LEN; + c->bulk.ocb_auth (c, abuf, nblks); + abuf += nblks * OCB_BLOCK_LEN; + abuflen -= nblks * OCB_BLOCK_LEN; + } + /* Hash all full blocks. */ while (abuflen >= OCB_BLOCK_LEN) { @@ -264,7 +273,8 @@ _gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ buf_xor_1 (c->u_mode.ocb.aad_offset, - get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks), OCB_BLOCK_LEN); + _gcry_cipher_ocb_get_l (c, l_tmp, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ buf_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, OCB_BLOCK_LEN); c->spec->encrypt (&c->context.c, l_tmp, l_tmp); @@ -341,40 +351,56 @@ ocb_crypt (gcry_cipher_hd_t c, int encrypt, else if ((inbuflen % OCB_BLOCK_LEN)) return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */ - if (encrypt) + /* Use a bulk method if available. */ + if (nblks && c->bulk.ocb_crypt) { - /* Checksum_i = Checksum_{i-1} xor P_i */ - ocb_checksum (c->u_ctr.ctr, inbuf, nblks); + c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt); + inbuf += nblks * OCB_BLOCK_LEN; + outbuf += nblks * OCB_BLOCK_LEN; + inbuflen -= nblks * OCB_BLOCK_LEN; + outbuflen -= nblks * OCB_BLOCK_LEN; + nblks = 0; } - /* Encrypt all full blocks. */ - while (inbuflen >= OCB_BLOCK_LEN) + if (nblks) { - c->u_mode.ocb.data_nblocks++; + gcry_cipher_encrypt_t crypt_fn = + encrypt ? c->spec->encrypt : c->spec->decrypt; - /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ - buf_xor_1 (c->u_iv.iv, - get_l (c, l_tmp, c->u_mode.ocb.data_nblocks), OCB_BLOCK_LEN); - /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ - buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); if (encrypt) - nburn = c->spec->encrypt (&c->context.c, outbuf, outbuf); - else - nburn = c->spec->decrypt (&c->context.c, outbuf, outbuf); - burn = nburn > burn ? nburn : burn; - buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, inbuf, nblks); + } - inbuf += OCB_BLOCK_LEN; - inbuflen -= OCB_BLOCK_LEN; - outbuf += OCB_BLOCK_LEN; - outbuflen =- OCB_BLOCK_LEN; - } + /* Encrypt all full blocks. */ + while (inbuflen >= OCB_BLOCK_LEN) + { + c->u_mode.ocb.data_nblocks++; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, + _gcry_cipher_ocb_get_l (c, l_tmp, + c->u_mode.ocb.data_nblocks), + OCB_BLOCK_LEN); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + buf_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + + inbuf += OCB_BLOCK_LEN; + inbuflen -= OCB_BLOCK_LEN; + outbuf += OCB_BLOCK_LEN; + outbuflen =- OCB_BLOCK_LEN; + } - if (!encrypt) - { - /* Checksum_i = Checksum_{i-1} xor P_i */ - ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks); - } + if (!encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, outbuf - nblks * OCB_BLOCK_LEN, nblks); + } + } /* Encrypt final partial block. Note that we expect INBUFLEN to be shorter than OCB_BLOCK_LEN (see above). */ diff --git a/cipher/cipher.c b/cipher/cipher.c index 0a13fe61..6e1173f5 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -510,6 +510,8 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle, h->bulk.cbc_enc = _gcry_aes_cbc_enc; h->bulk.cbc_dec = _gcry_aes_cbc_dec; h->bulk.ctr_enc = _gcry_aes_ctr_enc; + h->bulk.ocb_crypt = _gcry_aes_ocb_crypt; + h->bulk.ocb_auth = _gcry_aes_ocb_auth; break; #endif /*USE_AES*/ #ifdef USE_BLOWFISH diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 3c367cec..9a816021 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -29,6 +29,7 @@ #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" +#include "./cipher-internal.h" #ifdef USE_AESNI @@ -1251,4 +1252,486 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, aesni_cleanup_2_6 (); } + +static inline const unsigned char * +get_l (gcry_cipher_hd_t c, unsigned char *l_tmp, u64 i, unsigned char *iv, + unsigned char *ctr) +{ + const unsigned char *l; + unsigned int ntz; + + if (i & 0xffffffffU) + { + asm ("rep;bsf %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" (i & 0xffffffffU) + : "cc"); + } + else + { + if (OCB_L_TABLE_SIZE < 32) + { + ntz = 32; + } + else if (i) + { + asm ("rep;bsf %k[high], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [high] "r" (i >> 32) + : "cc"); + ntz += 32; + } + else + { + ntz = 64; + } + } + + if (ntz < OCB_L_TABLE_SIZE) + { + l = c->u_mode.ocb.L[ntz]; + } + else + { + /* Store Offset & Checksum before calling external function */ + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*iv), + [ctr] "=m" (*ctr) + : + : "memory" ); + + l = _gcry_cipher_ocb_get_l (c, l_tmp, i); + + /* Restore Offset & Checksum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*iv), + [ctr] "m" (*ctr) + : "memory" ); + } + + return l; +} + + +static void +aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + + aesni_prepare (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + const unsigned char *l[4]; + + /* l_tmp will be used only every 65536-th block. */ + l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0] "m" (*l[0]), + [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [l1] "m" (*l[1]), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) + : [l2] "m" (*l[2]), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l[3]), + [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf1],%%xmm0\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %[outbuf2],%%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), + [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) + : + : "memory" ); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); + + wipememory(&l_tmp, sizeof(l_tmp)); +} + + +static void +aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + u64 n = c->u_mode.ocb.data_nblocks; + + aesni_prepare (); + + /* Preload Offset and Checksum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_iv.iv), + [ctr] "m" (*c->u_ctr.ctr) + : "memory" ); + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + const unsigned char *l[4]; + + /* l_tmp will be used only every 65536-th block. */ + l[0] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[1] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[2] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + l[3] = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[inbuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm5, %[outbuf0]\n\t" + : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)) + : [l0] "m" (*l[0]), + [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[inbuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "movdqu %%xmm5, %[outbuf1]\n\t" + : [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE)) + : [l1] "m" (*l[1]), + [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[inbuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm5, %[outbuf2]\n\t" + : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)) + : [l2] "m" (*l[2]), + [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[inbuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l[3]), + [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_dec_vec4 (ctx); + + asm volatile ("movdqu %[outbuf0],%%xmm0\n\t" + "pxor %%xmm0, %%xmm1\n\t" + "movdqu %%xmm1, %[outbuf0]\n\t" + "movdqu %[outbuf1],%%xmm0\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "movdqu %%xmm2, %[outbuf1]\n\t" + "movdqu %[outbuf2],%%xmm0\n\t" + "pxor %%xmm0, %%xmm3\n\t" + "movdqu %%xmm3, %[outbuf2]\n\t" + "pxor %%xmm5, %%xmm4\n\t" + "movdqu %%xmm4, %[outbuf3]\n\t" + "pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm4, %%xmm6\n\t" + : [outbuf0] "+m" (*(outbuf + 0 * BLOCKSIZE)), + [outbuf1] "+m" (*(outbuf + 1 * BLOCKSIZE)), + [outbuf2] "+m" (*(outbuf + 2 * BLOCKSIZE)), + [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)) + : + : "memory" ); + + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_iv.iv, c->u_ctr.ctr); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [inbuf] "m" (*inbuf) + : "memory" ); + + do_aesni_dec (ctx); + + asm volatile ("pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm0, %%xmm6\n\t" + "movdqu %%xmm0, %[outbuf]\n\t" + : [outbuf] "=m" (*outbuf) + : + : "memory" ); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + c->u_mode.ocb.data_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_iv.iv), + [ctr] "=m" (*c->u_ctr.ctr) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); + + wipememory(&l_tmp, sizeof(l_tmp)); +} + + +void +_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + if (encrypt) + aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks); + else + aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks); +} + + +void +_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + u64 n = c->u_mode.ocb.aad_nblocks; + + aesni_prepare (); + + /* Preload Offset and Sum */ + asm volatile ("movdqu %[iv], %%xmm5\n\t" + "movdqu %[ctr], %%xmm6\n\t" + : /* No output */ + : [iv] "m" (*c->u_mode.ocb.aad_offset), + [ctr] "m" (*c->u_mode.ocb.aad_sum) + : "memory" ); + + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + const unsigned char *l[4]; + + /* l_tmp will be used only every 65536-th block. */ + l[0] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum); + l[1] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum); + l[2] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum); + l[3] = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l0], %%xmm0\n\t" + "movdqu %[abuf0], %%xmm1\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm1\n\t" + : + : [l0] "m" (*l[0]), + [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l1], %%xmm0\n\t" + "movdqu %[abuf1], %%xmm2\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm2\n\t" + : + : [l1] "m" (*l[1]), + [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l2], %%xmm0\n\t" + "movdqu %[abuf2], %%xmm3\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + : + : [l2] "m" (*l[2]), + [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)) + : "memory" ); + asm volatile ("movdqu %[l3], %%xmm0\n\t" + "movdqu %[abuf3], %%xmm4\n\t" + "pxor %%xmm0, %%xmm5\n\t" + "pxor %%xmm5, %%xmm4\n\t" + : + : [l3] "m" (*l[3]), + [abuf3] "m" (*(abuf + 3 * BLOCKSIZE)) + : "memory" ); + + do_aesni_enc_vec4 (ctx); + + asm volatile ("pxor %%xmm1, %%xmm6\n\t" + "pxor %%xmm2, %%xmm6\n\t" + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm4, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += 4*BLOCKSIZE; + } + for ( ;nblocks; nblocks-- ) + { + const unsigned char *l; + + l = get_l(c, l_tmp.x1, ++n, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + asm volatile ("movdqu %[l], %%xmm1\n\t" + "movdqu %[abuf], %%xmm0\n\t" + "pxor %%xmm1, %%xmm5\n\t" + "pxor %%xmm5, %%xmm0\n\t" + : + : [l] "m" (*l), + [abuf] "m" (*abuf) + : "memory" ); + + do_aesni_enc (ctx); + + asm volatile ("pxor %%xmm0, %%xmm6\n\t" + : + : + : "memory" ); + + abuf += BLOCKSIZE; + } + + c->u_mode.ocb.aad_nblocks = n; + asm volatile ("movdqu %%xmm5, %[iv]\n\t" + "movdqu %%xmm6, %[ctr]\n\t" + : [iv] "=m" (*c->u_mode.ocb.aad_offset), + [ctr] "=m" (*c->u_mode.ocb.aad_sum) + : + : "memory" ); + + aesni_cleanup (); + aesni_cleanup_2_6 (); + + wipememory(&l_tmp, sizeof(l_tmp)); +} + + #endif /* USE_AESNI */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index a481e6f6..ade41c9d 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -48,6 +48,7 @@ #include "bufhelp.h" #include "cipher-selftest.h" #include "rijndael-internal.h" +#include "./cipher-internal.h" #ifdef USE_AMD64_ASM @@ -97,6 +98,11 @@ extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks); +extern void _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +extern void _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); #endif #ifdef USE_SSSE3 @@ -1150,6 +1156,161 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +void +_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned int burn_depth = 0; + + if (encrypt) + { + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + } + else + { + check_decryption_preparation (ctx); + + if (ctx->prefetch_dec_fn) + ctx->prefetch_dec_fn(); + } + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_ocb_crypt (c, outbuf, inbuf, nblocks, encrypt); + burn_depth = 0; + } +#endif /*USE_AESNI*/ + else if (encrypt) + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.data_nblocks; + unsigned int ntz = _gcry_ctz64 (i); + const unsigned char *l; + + if (ntz < OCB_L_TABLE_SIZE) + l = c->u_mode.ocb.L[ntz]; + else + l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + } + else + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.data_nblocks; + unsigned int ntz = _gcry_ctz64 (i); + const unsigned char *l; + + if (ntz < OCB_L_TABLE_SIZE) + l = c->u_mode.ocb.L[ntz]; + else + l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_iv.iv, l, BLOCKSIZE); + buf_cpy (l_tmp.x1, inbuf, BLOCKSIZE); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE); + /* Checksum_i = Checksum_{i-1} xor P_i */ + buf_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE); + buf_cpy (outbuf, l_tmp.x1, BLOCKSIZE); + + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + +/* Bulk authentication of complete blocks in OCB mode. */ +void +_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks) +{ + RIJNDAEL_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + unsigned int burn_depth = 0; + + if (ctx->prefetch_enc_fn) + ctx->prefetch_enc_fn(); + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + _gcry_aes_aesni_ocb_auth (c, abuf, nblocks); + burn_depth = 0; + } +#endif /*USE_AESNI*/ + else + { + union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp; + rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; + + for ( ;nblocks; nblocks-- ) + { + u64 i = ++c->u_mode.ocb.aad_nblocks; + unsigned int ntz = _gcry_ctz64 (i); + const unsigned char *l; + + if (ntz < OCB_L_TABLE_SIZE) + l = c->u_mode.ocb.L[ntz]; + else + l = _gcry_cipher_ocb_get_l (c, l_tmp.x1, i); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + buf_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + buf_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf, BLOCKSIZE); + burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1); + buf_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE); + + abuf += BLOCKSIZE; + } + + wipememory(&l_tmp, sizeof(l_tmp)); + } + + if (burn_depth) + _gcry_burn_stack (burn_depth + 4 * sizeof(void *)); +} + + + /* Run the self-tests for AES 128. Returns NULL on success. */ static const char* selftest_basic_128 (void) diff --git a/src/cipher.h b/src/cipher.h index f4f6cc4c..7ad0b2cd 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -135,6 +135,10 @@ void _gcry_aes_cbc_dec (void *context, unsigned char *iv, void _gcry_aes_ctr_enc (void *context, unsigned char *ctr, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); +void _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); +void _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); /*-- blowfish.c --*/ void _gcry_blowfish_cfb_dec (void *context, unsigned char *iv, diff --git a/tests/basic.c b/tests/basic.c index 6ebc0568..1175b386 100644 --- a/tests/basic.c +++ b/tests/basic.c @@ -3153,6 +3153,172 @@ do_check_ocb_cipher (int inplace) static void +check_ocb_cipher_largebuf (int algo, int keylen, const char *tagexpect) +{ + static const unsigned char key[32] = + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"; + static const unsigned char nonce[12] = + "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F\x00\x01\x02\x03"; + const size_t buflen = 1024 * 1024 * 2 + 32; + unsigned char *inbuf; + unsigned char *outbuf; + gpg_error_t err = 0; + gcry_cipher_hd_t hde, hdd; + unsigned char tag[16]; + int i; + + inbuf = xmalloc(buflen); + if (!inbuf) + { + fail ("out-of-memory\n"); + return; + } + outbuf = xmalloc(buflen); + if (!outbuf) + { + fail ("out-of-memory\n"); + xfree(inbuf); + return; + } + + for (i = 0; i < buflen; i++) + inbuf[i] = 'a'; + + err = gcry_cipher_open (&hde, algo, GCRY_CIPHER_MODE_OCB, 0); + if (!err) + err = gcry_cipher_open (&hdd, algo, GCRY_CIPHER_MODE_OCB, 0); + if (err) + { + fail ("cipher-ocb, gcry_cipher_open failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + goto out_free; + } + + err = gcry_cipher_setkey (hde, key, keylen); + if (!err) + err = gcry_cipher_setkey (hdd, key, keylen); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setkey failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + err = gcry_cipher_setiv (hde, nonce, 12); + if (!err) + err = gcry_cipher_setiv (hdd, nonce, 12); + if (err) + { + fail ("cipher-ocb, gcry_cipher_setiv failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + err = gcry_cipher_authenticate (hde, inbuf, buflen); + if (err) + { + fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + err = gcry_cipher_final (hde); + if (!err) + { + err = gcry_cipher_encrypt (hde, outbuf, buflen, inbuf, buflen); + } + if (err) + { + fail ("cipher-ocb, gcry_cipher_encrypt failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + /* Check that the tag matches. */ + err = gcry_cipher_gettag (hde, tag, 16); + if (err) + { + fail ("cipher_ocb, gcry_cipher_gettag failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + } + if (memcmp (tagexpect, tag, 16)) + { + mismatch (tagexpect, 16, tag, 16); + fail ("cipher-ocb, encrypt tag mismatch (large, algo %d)\n", algo); + } + + err = gcry_cipher_authenticate (hdd, inbuf, buflen); + if (err) + { + fail ("cipher-ocb, gcry_cipher_authenticate failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + /* Now for the decryption. */ + err = gcry_cipher_final (hdd); + if (!err) + { + err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0); + } + if (err) + { + fail ("cipher-ocb, gcry_cipher_decrypt (large, algo %d) failed: %s\n", + algo, gpg_strerror (err)); + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + goto out_free; + } + + /* We still have TAG from the encryption. */ + err = gcry_cipher_checktag (hdd, tag, 16); + if (err) + { + fail ("cipher-ocb, gcry_cipher_checktag failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + } + + /* Check that the decrypt output matches the original plaintext. */ + if (memcmp (inbuf, outbuf, buflen)) + { + /*mismatch (inbuf, buflen, outbuf, buflen);*/ + fail ("cipher-ocb, decrypt data mismatch (large, algo %d)\n", algo); + } + + /* Check that gettag also works for decryption. */ + err = gcry_cipher_gettag (hdd, tag, 16); + if (err) + { + fail ("cipher_ocb, decrypt gettag failed (large, algo %d): %s\n", + algo, gpg_strerror (err)); + } + if (memcmp (tagexpect, tag, 16)) + { + mismatch (tagexpect, 16, tag, 16); + fail ("cipher-ocb, decrypt tag mismatch (large, algo %d)\n", algo); + } + + gcry_cipher_close (hde); + gcry_cipher_close (hdd); + +out_free: + xfree(outbuf); + xfree(inbuf); +} + + +static void check_ocb_cipher (void) { /* Check OCB cipher with separate destination and source buffers for @@ -3161,6 +3327,14 @@ check_ocb_cipher (void) /* Check OCB cipher with inplace encrypt/decrypt. */ do_check_ocb_cipher(1); + + /* Check large buffer encryption/decryption. */ + check_ocb_cipher_largebuf(GCRY_CIPHER_AES, 16, + "\xf5\xf3\x12\x7d\x58\x2d\x96\xe8" + "\x33\xfd\x7a\x4f\x42\x60\x5d\x20"); + check_ocb_cipher_largebuf(GCRY_CIPHER_AES256, 32, + "\xfa\x26\xa5\xbf\xf6\x7d\x3a\x8d" + "\xfe\x96\x67\xc9\xc8\x41\x03\x51"); } |