diff options
author | Werner Koch <wk@gnupg.org> | 2011-02-16 17:17:49 +0100 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 2011-02-16 17:17:49 +0100 |
commit | d9795cfdd758e2aa22e7ab8a6790e2915d1f5334 (patch) | |
tree | 3529ce3d48631e33b56520e0bd876edebc334ef1 /cipher/rijndael.c | |
parent | b825c5db17292988d261fefdc83cbc43d97d4b02 (diff) | |
download | libgcrypt-d9795cfdd758e2aa22e7ab8a6790e2915d1f5334.tar.gz |
Improved AES-CFB performance using AES-NI insn.
There is also a new regression test which tests the bulk
encryption methods we have for a few ciphers (namely AES). A bug in
them could have slipped through because we only did encrypt-decrypt
tests but didn't compared them to fixed vectors.
Benchmarks using gcc 4.4 show a 7 fold speed improvement for CFB
encryption and 14 for decryption. This is a bit strange; someone
should check the code to see why we have this difference.
Without AESNI (undef USE_AESNI in rijndael.c):
$ ./benchmark --cipher-repetitions 100 --alignment 16 cipher aes aes256
Running each test 100 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
AES 1370ms 1430ms 1140ms 1190ms 1120ms 1130ms 1520ms 1540ms 1780ms 1770ms
AES256 1780ms 1850ms 1530ms 1610ms 1540ms 1530ms 1930ms 1960ms 2180ms 2180ms
With AESNI:
$ ./benchmark --cipher-repetitions 100 --alignment 16 cipher aes aes256
Running each test 100 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
AES 80ms 100ms 240ms 220ms 140ms 70ms 300ms 290ms 490ms 510ms
AES256 130ms 130ms 290ms 270ms 200ms 100ms 340ms 340ms 470ms 470ms
$ ./benchmark --cipher-repetitions 100 --alignment 0 cipher aes aes256
Running each test 100 times.
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
AES 80ms 90ms 240ms 230ms 150ms 80ms 290ms 300ms 500ms 530ms
AES256 130ms 130ms 290ms 260ms 190ms 110ms 340ms 340ms 470ms 490ms
Diffstat (limited to 'cipher/rijndael.c')
-rw-r--r-- | cipher/rijndael.c | 97 |
1 files changed, 83 insertions, 14 deletions
diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 50fb3930..4c498470 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -220,7 +220,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) KC = 6; if (0) - ; + { + ; + } #ifdef USE_AESNI else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) { @@ -234,7 +236,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) KC = 8; if (0) - ; + { + ; + } #ifdef USE_AESNI else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) { @@ -778,6 +782,77 @@ do_aesni_dec_aligned (const RIJNDAEL_context *ctx, #undef aesdeclast_xmm1_xmm0 } + +/* Perform a CFB encryption or decryption round using the + initialization vector IV and the input block A. Write the result + to the output block B and update IV. IV needs to be 16 byte + aligned. */ +static void +do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag, + unsigned char *iv, unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */ + "movl %[key], %%esi\n\t" /* esi := keyschenc */ + "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "movdqu %[src], %%xmm1\n\t" /* Save input. */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */ + + "cmp $1, %[decrypt]\n\t" + "jz .Ldecrypt_%=\n\t" + "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */ + "jmp .Lleave_%=\n" + ".Ldecrypt_%=:\n\t" + "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */ + ".Lleave_%=:\n\t" + "movdqu %%xmm0, %[dst]\n" /* Store output. */ + : [iv] "+m" (*iv), [dst] "=m" (*b) + : [src] "m" (*a), + [key] "g" (ctx->keyschenc), + [rounds] "g" (ctx->rounds), + [decrypt] "m" (decrypt_flag) + : "%esi", "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + + static void do_aesni (RIJNDAEL_context *ctx, int decrypt_flag, unsigned char *bx, const unsigned char *ax) @@ -865,11 +940,9 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv, aesni_prepare (); for ( ;nblocks; nblocks-- ) { - /* Encrypt the IV. */ - do_aesni_enc_aligned (ctx, iv, iv); - /* XOR the input with the IV and store input into IV. */ - for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) - *outbuf++ = (*ivp++ ^= *inbuf++); + do_aesni_cfb (ctx, 0, iv, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; } aesni_cleanup (); } @@ -1137,13 +1210,9 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv, aesni_prepare (); for ( ;nblocks; nblocks-- ) { - do_aesni_enc_aligned (ctx, iv, iv); - for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) - { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } + do_aesni_cfb (ctx, 1, iv, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; } aesni_cleanup (); } |