summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>2011-02-16 17:17:49 +0100
committerWerner Koch <wk@gnupg.org>2011-02-16 17:17:49 +0100
commitd9795cfdd758e2aa22e7ab8a6790e2915d1f5334 (patch)
tree3529ce3d48631e33b56520e0bd876edebc334ef1
parentb825c5db17292988d261fefdc83cbc43d97d4b02 (diff)
downloadlibgcrypt-d9795cfdd758e2aa22e7ab8a6790e2915d1f5334.tar.gz
Improved AES-CFB performance using AES-NI insn.
There is also a new regression test which tests the bulk encryption methods we have for a few ciphers (namely AES). A bug in them could have slipped through because we only did encrypt-decrypt tests but didn't compared them to fixed vectors. Benchmarks using gcc 4.4 show a 7 fold speed improvement for CFB encryption and 14 for decryption. This is a bit strange; someone should check the code to see why we have this difference. Without AESNI (undef USE_AESNI in rijndael.c): $ ./benchmark --cipher-repetitions 100 --alignment 16 cipher aes aes256 Running each test 100 times. ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- AES 1370ms 1430ms 1140ms 1190ms 1120ms 1130ms 1520ms 1540ms 1780ms 1770ms AES256 1780ms 1850ms 1530ms 1610ms 1540ms 1530ms 1930ms 1960ms 2180ms 2180ms With AESNI: $ ./benchmark --cipher-repetitions 100 --alignment 16 cipher aes aes256 Running each test 100 times. ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- AES 80ms 100ms 240ms 220ms 140ms 70ms 300ms 290ms 490ms 510ms AES256 130ms 130ms 290ms 270ms 200ms 100ms 340ms 340ms 470ms 470ms $ ./benchmark --cipher-repetitions 100 --alignment 0 cipher aes aes256 Running each test 100 times. ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- AES 80ms 90ms 240ms 230ms 150ms 80ms 290ms 300ms 500ms 530ms AES256 130ms 130ms 290ms 260ms 190ms 110ms 340ms 340ms 470ms 490ms
-rw-r--r--cipher/ChangeLog7
-rw-r--r--cipher/rijndael.c97
-rw-r--r--tests/ChangeLog6
-rw-r--r--tests/basic.c245
4 files changed, 340 insertions, 15 deletions
diff --git a/cipher/ChangeLog b/cipher/ChangeLog
index af3bfde9..85dd43f0 100644
--- a/cipher/ChangeLog
+++ b/cipher/ChangeLog
@@ -1,3 +1,8 @@
+2011-02-16 Werner Koch <wk@g10code.com>
+
+ * rijndael.c (do_aesni_cfb) [USE_AESNI]: New.
+ (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec) [USE_AESNI]: Use new fucntion.
+
2011-02-15 Werner Koch <wk@g10code.com>
* rijndael.c (do_aesni_enc_aligned, do_aesni_dec_aligned): Use
@@ -7,7 +12,7 @@
(rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
(rijndael_decrypt, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use
these macros. Don't burn the stack in the USE_AESNI case.
- (do_setkey): Add disabled code to use aeskeygenassis.
+ (do_setkey): Add disabled code to use aeskeygenassist.
2011-02-14 Werner Koch <wk@g10code.com>
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 50fb3930..4c498470 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -220,7 +220,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
KC = 6;
if (0)
- ;
+ {
+ ;
+ }
#ifdef USE_AESNI
else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
{
@@ -234,7 +236,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
KC = 8;
if (0)
- ;
+ {
+ ;
+ }
#ifdef USE_AESNI
else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
{
@@ -778,6 +782,77 @@ do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
#undef aesdeclast_xmm1_xmm0
}
+
+/* Perform a CFB encryption or decryption round using the
+ initialization vector IV and the input block A. Write the result
+ to the output block B and update IV. IV needs to be 16 byte
+ aligned. */
+static void
+do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
+ unsigned char *iv, unsigned char *b, const unsigned char *a)
+{
+#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+ asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */
+ "movl %[key], %%esi\n\t" /* esi := keyschenc */
+ "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */
+ "movdqa 0x10(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x20(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x30(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x40(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x50(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x60(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x70(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x80(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0x90(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xa0(%%esi), %%xmm1\n\t"
+ "cmp $10, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xb0(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xc0(%%esi), %%xmm1\n\t"
+ "cmp $12, %[rounds]\n\t"
+ "jz .Lenclast%=\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xd0(%%esi), %%xmm1\n\t"
+ aesenc_xmm1_xmm0
+ "movdqa 0xe0(%%esi), %%xmm1\n"
+
+ ".Lenclast%=:\n\t"
+ aesenclast_xmm1_xmm0
+ "movdqu %[src], %%xmm1\n\t" /* Save input. */
+ "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */
+
+ "cmp $1, %[decrypt]\n\t"
+ "jz .Ldecrypt_%=\n\t"
+ "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */
+ "jmp .Lleave_%=\n"
+ ".Ldecrypt_%=:\n\t"
+ "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */
+ ".Lleave_%=:\n\t"
+ "movdqu %%xmm0, %[dst]\n" /* Store output. */
+ : [iv] "+m" (*iv), [dst] "=m" (*b)
+ : [src] "m" (*a),
+ [key] "g" (ctx->keyschenc),
+ [rounds] "g" (ctx->rounds),
+ [decrypt] "m" (decrypt_flag)
+ : "%esi", "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
static void
do_aesni (RIJNDAEL_context *ctx, int decrypt_flag,
unsigned char *bx, const unsigned char *ax)
@@ -865,11 +940,9 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
aesni_prepare ();
for ( ;nblocks; nblocks-- )
{
- /* Encrypt the IV. */
- do_aesni_enc_aligned (ctx, iv, iv);
- /* XOR the input with the IV and store input into IV. */
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- *outbuf++ = (*ivp++ ^= *inbuf++);
+ do_aesni_cfb (ctx, 0, iv, outbuf, inbuf);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
}
aesni_cleanup ();
}
@@ -1137,13 +1210,9 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv,
aesni_prepare ();
for ( ;nblocks; nblocks-- )
{
- do_aesni_enc_aligned (ctx, iv, iv);
- for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
- {
- temp = *inbuf++;
- *outbuf++ = *ivp ^ temp;
- *ivp++ = temp;
- }
+ do_aesni_cfb (ctx, 1, iv, outbuf, inbuf);
+ outbuf += BLOCKSIZE;
+ inbuf += BLOCKSIZE;
}
aesni_cleanup ();
}
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 6d7fb13e..c674f121 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2011-02-16 Werner Koch <wk@g10code.com>
+
+ * basic.c (DIM): New.
+ (check_bulk_cipher_modes): New.
+ (main): Run new test.
+
2011-02-15 Werner Koch <wk@g10code.com>
* benchmark.c: Add option --cipher-with-keysetup.
diff --git a/tests/basic.c b/tests/basic.c
index 066ae41c..bcc39cc8 100644
--- a/tests/basic.c
+++ b/tests/basic.c
@@ -28,6 +28,11 @@
#include "../src/gcrypt.h"
+#ifndef DIM
+# define DIM(v) (sizeof(v)/sizeof((v)[0]))
+#endif
+
+
typedef struct test_spec_pubkey_key
{
const char *secret;
@@ -983,6 +988,245 @@ check_ofb_cipher (void)
fprintf (stderr, " Completed OFB checks.\n");
}
+
+/* Check that our bulk encryption fucntions work properly. */
+static void
+check_bulk_cipher_modes (void)
+{
+ struct
+ {
+ int algo;
+ int mode;
+ const char *key;
+ int keylen;
+ const char *iv;
+ int ivlen;
+ char t1_hash[20];
+ } tv[] = {
+ { GCRY_CIPHER_AES, GCRY_CIPHER_MODE_CFB,
+ "abcdefghijklmnop", 16,
+ "1234567890123456", 16,
+/*[0]*/
+ { 0x53, 0xda, 0x27, 0x3c, 0x78, 0x3d, 0x54, 0x66, 0x19, 0x63,
+ 0xd7, 0xe6, 0x20, 0x10, 0xcd, 0xc0, 0x5a, 0x0b, 0x06, 0xcc }
+ },
+ { GCRY_CIPHER_AES192, GCRY_CIPHER_MODE_CFB,
+ "abcdefghijklmnopABCDEFG", 24,
+ "1234567890123456", 16,
+/*[1]*/
+ { 0xc7, 0xb1, 0xd0, 0x09, 0x95, 0x04, 0x34, 0x61, 0x2b, 0xd9,
+ 0xcb, 0xb3, 0xc7, 0xcb, 0xef, 0xea, 0x16, 0x19, 0x9b, 0x3e }
+ },
+ { GCRY_CIPHER_AES256, GCRY_CIPHER_MODE_CFB,
+ "abcdefghijklmnopABCDEFGHIJKLMNOP", 32,
+ "1234567890123456", 16,
+/*[2]*/
+ { 0x31, 0xe1, 0x1f, 0x63, 0x65, 0x47, 0x8c, 0x3f, 0x53, 0xdb,
+ 0xd9, 0x4d, 0x91, 0x1d, 0x02, 0x9c, 0x05, 0x25, 0x58, 0x29 }
+ },
+ { GCRY_CIPHER_AES, GCRY_CIPHER_MODE_CBC,
+ "abcdefghijklmnop", 16,
+ "1234567890123456", 16,
+/*[3]*/
+ { 0xdc, 0x0c, 0xc2, 0xd9, 0x6b, 0x47, 0xf9, 0xeb, 0x06, 0xb4,
+ 0x2f, 0x6e, 0xec, 0x72, 0xbf, 0x55, 0x26, 0x7f, 0xa9, 0x97 }
+ },
+ { GCRY_CIPHER_AES192, GCRY_CIPHER_MODE_CBC,
+ "abcdefghijklmnopABCDEFG", 24,
+ "1234567890123456", 16,
+/*[4]*/
+ { 0x2b, 0x90, 0x9b, 0xe6, 0x40, 0xab, 0x6e, 0xc2, 0xc5, 0xb1,
+ 0x87, 0xf5, 0x43, 0x84, 0x7b, 0x04, 0x06, 0x47, 0xd1, 0x8f }
+ },
+ { GCRY_CIPHER_AES256, GCRY_CIPHER_MODE_CBC,
+ "abcdefghijklmnopABCDEFGHIJKLMNOP", 32,
+ "1234567890123456", 16,
+/*[5]*/
+ { 0xaa, 0xa8, 0xdf, 0x03, 0xb0, 0xba, 0xc4, 0xe3, 0xc1, 0x02,
+ 0x38, 0x31, 0x8d, 0x86, 0xcb, 0x49, 0x6d, 0xad, 0xae, 0x01 }
+ },
+ { GCRY_CIPHER_AES, GCRY_CIPHER_MODE_OFB,
+ "abcdefghijklmnop", 16,
+ "1234567890123456", 16,
+/*[6]*/
+ { 0x65, 0xfe, 0xde, 0x48, 0xd0, 0xa1, 0xa6, 0xf9, 0x24, 0x6b,
+ 0x52, 0x5f, 0x21, 0x8a, 0x6f, 0xc7, 0x70, 0x3b, 0xd8, 0x4a }
+ },
+ { GCRY_CIPHER_AES192, GCRY_CIPHER_MODE_OFB,
+ "abcdefghijklmnopABCDEFG", 24,
+ "1234567890123456", 16,
+/*[7]*/
+ { 0x59, 0x5b, 0x02, 0xa2, 0x88, 0xc0, 0xbe, 0x94, 0x43, 0xaa,
+ 0x39, 0xf6, 0xbd, 0xcc, 0x83, 0x99, 0xee, 0x00, 0xa1, 0x91 }
+ },
+ { GCRY_CIPHER_AES256, GCRY_CIPHER_MODE_OFB,
+ "abcdefghijklmnopABCDEFGHIJKLMNOP", 32,
+ "1234567890123456", 16,
+/*[8]*/
+ { 0x38, 0x8c, 0xe1, 0xe2, 0xbe, 0x67, 0x60, 0xe8, 0xeb, 0xce,
+ 0xd0, 0xc6, 0xaa, 0xd6, 0xf6, 0x26, 0x15, 0x56, 0xd0, 0x2b }
+ },
+ { GCRY_CIPHER_AES, GCRY_CIPHER_MODE_CTR,
+ "abcdefghijklmnop", 16,
+ "1234567890123456", 16,
+/*[9]*/
+ { 0x9a, 0x48, 0x94, 0xd6, 0x50, 0x46, 0x81, 0xdb, 0x68, 0x34,
+ 0x3b, 0xc5, 0x9e, 0x66, 0x94, 0x81, 0x98, 0xa0, 0xf9, 0xff }
+ },
+ { GCRY_CIPHER_AES192, GCRY_CIPHER_MODE_CTR,
+ "abcdefghijklmnopABCDEFG", 24,
+ "1234567890123456", 16,
+/*[10]*/
+ { 0x2c, 0x2c, 0xd3, 0x75, 0x81, 0x2a, 0x59, 0x07, 0xeb, 0x08,
+ 0xce, 0x28, 0x4c, 0x0c, 0x6a, 0xa8, 0x8f, 0xa3, 0x98, 0x7e }
+ },
+ { GCRY_CIPHER_AES256, GCRY_CIPHER_MODE_CTR,
+ "abcdefghijklmnopABCDEFGHIJKLMNOP", 32,
+ "1234567890123456", 16,
+/*[11]*/
+ { 0x64, 0xce, 0x73, 0x03, 0xc7, 0x89, 0x99, 0x1f, 0xf1, 0xce,
+ 0xfe, 0xfb, 0xb9, 0x42, 0x30, 0xdf, 0xbb, 0x68, 0x6f, 0xd3 }
+ },
+ { GCRY_CIPHER_AES, GCRY_CIPHER_MODE_ECB,
+ "abcdefghijklmnop", 16,
+ "1234567890123456", 16,
+/*[12]*/
+ { 0x51, 0xae, 0xf5, 0xac, 0x22, 0xa0, 0xba, 0x11, 0xc5, 0xaa,
+ 0xb4, 0x70, 0x99, 0xce, 0x18, 0x08, 0x12, 0x9b, 0xb1, 0xc5 }
+ },
+ { GCRY_CIPHER_AES192, GCRY_CIPHER_MODE_ECB,
+ "abcdefghijklmnopABCDEFG", 24,
+ "1234567890123456", 16,
+/*[13]*/
+ { 0x57, 0x91, 0xea, 0x48, 0xd8, 0xbf, 0x9e, 0xc1, 0xae, 0x33,
+ 0xb3, 0xfd, 0xf7, 0x7a, 0xeb, 0x30, 0xb1, 0x62, 0x0d, 0x82 }
+ },
+ { GCRY_CIPHER_AES256, GCRY_CIPHER_MODE_ECB,
+ "abcdefghijklmnopABCDEFGHIJKLMNOP", 32,
+ "1234567890123456", 16,
+/*[14]*/
+ { 0x2d, 0x71, 0x54, 0xb9, 0xc5, 0x28, 0x76, 0xff, 0x76, 0xb5,
+ 0x99, 0x37, 0x99, 0x9d, 0xf7, 0x10, 0x6d, 0x86, 0x4f, 0x3f }
+ }
+ };
+ gcry_cipher_hd_t hde = NULL;
+ gcry_cipher_hd_t hdd = NULL;
+ unsigned char *buffer_base, *outbuf_base; /* Allocated buffers. */
+ unsigned char *buffer, *outbuf; /* Aligned buffers. */
+ size_t buflen;
+ unsigned char hash[20];
+ int i, j, keylen, blklen;
+ gcry_error_t err = 0;
+
+ if (verbose)
+ fprintf (stderr, "Starting bulk cipher checks.\n");
+
+ buflen = 16*100; /* We check a 1600 byte buffer. */
+ buffer_base = gcry_xmalloc (buflen+15);
+ buffer = buffer_base + (16 - ((size_t)buffer_base & 0x0f));
+ outbuf_base = gcry_xmalloc (buflen+15);
+ outbuf = outbuf_base + (16 - ((size_t)outbuf_base & 0x0f));
+
+
+ for (i = 0; i < DIM (tv); i++)
+ {
+ if (verbose)
+ fprintf (stderr, " checking bulk encryption for %s [%i], mode %d\n",
+ gcry_cipher_algo_name (tv[i].algo),
+ tv[i].algo, tv[i].mode);
+ err = gcry_cipher_open (&hde, tv[i].algo, tv[i].mode, 0);
+ if (!err)
+ err = gcry_cipher_open (&hdd, tv[i].algo, tv[i].mode, 0);
+ if (err)
+ {
+ fail ("gcry_cipher_open failed: %s\n", gpg_strerror (err));
+ goto leave;
+ }
+
+ keylen = gcry_cipher_get_algo_keylen(tv[i].algo);
+ if (!keylen)
+ {
+ fail ("gcry_cipher_get_algo_keylen failed\n");
+ goto leave;
+ }
+
+ err = gcry_cipher_setkey (hde, tv[i].key, tv[i].keylen);
+ if (!err)
+ err = gcry_cipher_setkey (hdd, tv[i].key, tv[i].keylen);
+ if (err)
+ {
+ fail ("gcry_cipher_setkey failed: %s\n", gpg_strerror (err));
+ goto leave;
+ }
+
+ blklen = gcry_cipher_get_algo_blklen(tv[i].algo);
+ if (!blklen)
+ {
+ fail ("gcry_cipher_get_algo_blklen failed\n");
+ goto leave;
+ }
+
+ err = gcry_cipher_setiv (hde, tv[i].iv, tv[i].ivlen);
+ if (!err)
+ err = gcry_cipher_setiv (hdd, tv[i].iv, tv[i].ivlen);
+ if (err)
+ {
+ fail ("gcry_cipher_setiv failed: %s\n", gpg_strerror (err));
+ goto leave;
+ }
+
+ /* Fill the buffer with our test pattern. */
+ for (j=0; j < buflen; j++)
+ buffer[j] = ((j & 0xff) ^ ((j >> 8) & 0xff));
+
+ err = gcry_cipher_encrypt (hde, outbuf, buflen, buffer, buflen);
+ if (err)
+ {
+ fail ("gcry_cipher_encrypt (algo %d, mode %d) failed: %s\n",
+ tv[i].algo, tv[i].mode, gpg_strerror (err));
+ goto leave;
+ }
+
+ gcry_md_hash_buffer (GCRY_MD_SHA1, hash, outbuf, buflen);
+#if 0
+ printf ("/*[%d]*/\n", i);
+ fputs (" {", stdout);
+ for (j=0; j < 20; j++)
+ printf (" 0x%02x%c%s", hash[j], j==19? ' ':',', j == 9? "\n ":"");
+ puts ("}");
+#endif
+
+ if (memcmp (hash, tv[i].t1_hash, 20))
+ fail ("encrypt mismatch (algo %d, mode %d)\n",
+ tv[i].algo, tv[i].mode);
+
+ err = gcry_cipher_decrypt (hdd, outbuf, buflen, NULL, 0);
+ if (err)
+ {
+ fail ("gcry_cipher_decrypt (algo %d, mode %d) failed: %s\n",
+ tv[i].algo, tv[i].mode, gpg_strerror (err));
+ goto leave;
+ }
+
+ if (memcmp (buffer, outbuf, buflen))
+ fail ("decrypt mismatch (algo %d, mode %d)\n",
+ tv[i].algo, tv[i].mode);
+
+ gcry_cipher_close (hde); hde = NULL;
+ gcry_cipher_close (hdd); hdd = NULL;
+ }
+
+ if (verbose)
+ fprintf (stderr, "Completed bulk cipher checks.\n");
+ leave:
+ gcry_cipher_close (hde);
+ gcry_cipher_close (hdd);
+ gcry_free (buffer_base);
+ gcry_free (outbuf_base);
+}
+
+
+
static void
check_one_cipher (int algo, int mode, int flags)
{
@@ -2287,6 +2531,7 @@ main (int argc, char **argv)
{
check_ciphers ();
check_cipher_modes ();
+ check_bulk_cipher_modes ();
check_digests ();
check_hmac ();
check_pubkey ();