diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-12-27 12:37:16 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-12-27 12:37:16 +0200 |
commit | 8eabecc883332156adffc1df42d27f614c157e06 (patch) | |
tree | 39de3ff81af5dfae0ed4c322e781c60d7a7ca590 /cipher/rijndael.c | |
parent | c2e1f8fea271f3ef8027809547c4a52e0b1e24a2 (diff) | |
download | libgcrypt-8eabecc883332156adffc1df42d27f614c157e06.tar.gz |
Add Intel SSSE3 based vector permutation AES implementation
* cipher/Makefile.am: Add 'rijndael-ssse3-amd64.c'.
* cipher/rijndael-internal.h (USE_SSSE3): New.
(RIJNDAEL_context_s) [USE_SSSE3]: Add 'use_ssse3'.
* cipher/rijndael-ssse3-amd64.c: New.
* cipher/rijndael.c [USE_SSSE3] (_gcry_aes_ssse3_do_setkey)
(_gcry_aes_ssse3_prepare_decryption, _gcry_aes_ssse3_encrypt)
(_gcry_aes_ssse3_decrypt, _gcry_aes_ssse3_cfb_enc)
(_gcry_aes_ssse3_cbc_enc, _gcry_aes_ssse3_ctr_enc)
(_gcry_aes_ssse3_cfb_dec, _gcry_aes_ssse3_cbc_dec): New.
(do_setkey): Add HWF check for SSSE3 and setup for SSSE3
implementation.
(prepare_decryption, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc)
(_gcry_aes_ctr_enc, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Add
selection for SSSE3 implementation.
* configure.ac [host=x86_64]: Add 'rijndael-ssse3-amd64.lo'.
--
This patch adds "AES with vector permutations" implementation by
Mike Hamburg. Public-domain source-code is available at:
http://crypto.stanford.edu/vpaes/
Benchmark on Intel Core2 T8100 (2.1Ghz, no turbo):
Old (AMD64 asm):
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 8.79 ns/B 108.5 MiB/s 18.46 c/B
ECB dec | 9.07 ns/B 105.1 MiB/s 19.05 c/B
CBC enc | 7.77 ns/B 122.7 MiB/s 16.33 c/B
CBC dec | 7.74 ns/B 123.2 MiB/s 16.26 c/B
CFB enc | 7.88 ns/B 121.0 MiB/s 16.54 c/B
CFB dec | 7.56 ns/B 126.1 MiB/s 15.88 c/B
OFB enc | 9.02 ns/B 105.8 MiB/s 18.94 c/B
OFB dec | 9.07 ns/B 105.1 MiB/s 19.05 c/B
CTR enc | 7.80 ns/B 122.2 MiB/s 16.38 c/B
CTR dec | 7.81 ns/B 122.2 MiB/s 16.39 c/B
New (ssse3):
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 5.77 ns/B 165.2 MiB/s 12.13 c/B
ECB dec | 7.13 ns/B 133.7 MiB/s 14.98 c/B
CBC enc | 5.27 ns/B 181.0 MiB/s 11.06 c/B
CBC dec | 6.39 ns/B 149.3 MiB/s 13.42 c/B
CFB enc | 5.27 ns/B 180.9 MiB/s 11.07 c/B
CFB dec | 5.28 ns/B 180.7 MiB/s 11.08 c/B
OFB enc | 6.11 ns/B 156.1 MiB/s 12.83 c/B
OFB dec | 6.13 ns/B 155.5 MiB/s 12.88 c/B
CTR enc | 5.26 ns/B 181.5 MiB/s 11.04 c/B
CTR dec | 5.24 ns/B 182.0 MiB/s 11.00 c/B
Benchmark on Intel i5-2450M (2.5Ghz, no turbo, aes-ni disabled):
Old (AMD64 asm):
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 8.06 ns/B 118.3 MiB/s 20.15 c/B
ECB dec | 8.21 ns/B 116.1 MiB/s 20.53 c/B
CBC enc | 7.88 ns/B 121.1 MiB/s 19.69 c/B
CBC dec | 7.57 ns/B 126.0 MiB/s 18.92 c/B
CFB enc | 7.87 ns/B 121.2 MiB/s 19.67 c/B
CFB dec | 7.56 ns/B 126.2 MiB/s 18.89 c/B
OFB enc | 8.27 ns/B 115.3 MiB/s 20.67 c/B
OFB dec | 8.28 ns/B 115.1 MiB/s 20.71 c/B
CTR enc | 8.02 ns/B 119.0 MiB/s 20.04 c/B
CTR dec | 8.02 ns/B 118.9 MiB/s 20.05 c/B
New (ssse3):
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 4.03 ns/B 236.6 MiB/s 10.07 c/B
ECB dec | 5.28 ns/B 180.8 MiB/s 13.19 c/B
CBC enc | 3.77 ns/B 252.7 MiB/s 9.43 c/B
CBC dec | 4.69 ns/B 203.3 MiB/s 11.73 c/B
CFB enc | 3.75 ns/B 254.3 MiB/s 9.37 c/B
CFB dec | 3.69 ns/B 258.6 MiB/s 9.22 c/B
OFB enc | 4.17 ns/B 228.7 MiB/s 10.43 c/B
OFB dec | 4.17 ns/B 228.7 MiB/s 10.42 c/B
CTR enc | 3.72 ns/B 256.5 MiB/s 9.30 c/B
CTR dec | 3.72 ns/B 256.1 MiB/s 9.31 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/rijndael.c')
-rw-r--r-- | cipher/rijndael.c | 96 |
1 files changed, 94 insertions, 2 deletions
diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 7a83718b..51c36c76 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -99,6 +99,40 @@ extern void _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv, size_t nblocks); #endif +#ifdef USE_SSSE3 +/* SSSE3 (AMD64) vector permutation implementation of AES */ +extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key); +extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx); + +extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, + unsigned char *dst, + const unsigned char *src); +extern void _gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks, + int cbc_mac); +extern void _gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *ctr, size_t nblocks); +extern void _gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +extern void _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, + unsigned char *outbuf, + const unsigned char *inbuf, + unsigned char *iv, size_t nblocks); +#endif + #ifdef USE_PADLOCK extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, @@ -182,7 +216,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) int rounds; int i,j, r, t, rconpointer = 0; int KC; -#if defined(USE_AESNI) || defined(USE_PADLOCK) +#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) unsigned int hwfeatures; #endif @@ -223,7 +257,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) ctx->rounds = rounds; -#if defined(USE_AESNI) || defined(USE_PADLOCK) +#if defined(USE_AESNI) || defined(USE_PADLOCK) || defined(USE_SSSE3) hwfeatures = _gcry_get_hw_features (); #endif @@ -234,6 +268,9 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) #ifdef USE_AESNI ctx->use_aesni = 0; #endif +#ifdef USE_SSSE3 + ctx->use_ssse3 = 0; +#endif if (0) { @@ -260,6 +297,16 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) memcpy (ctx->padlockkey, key, keylen); } #endif +#ifdef USE_SSSE3 + else if (hwfeatures & HWF_INTEL_SSSE3) + { + ctx->encrypt_fn = _gcry_aes_ssse3_encrypt; + ctx->decrypt_fn = _gcry_aes_ssse3_decrypt; + ctx->prefetch_enc_fn = NULL; + ctx->prefetch_dec_fn = NULL; + ctx->use_ssse3 = 1; + } +#endif else { ctx->encrypt_fn = do_encrypt; @@ -278,6 +325,10 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) else if (ctx->use_aesni) _gcry_aes_aesni_do_setkey (ctx, key); #endif +#ifdef USE_AESNI + else if (ctx->use_ssse3) + _gcry_aes_ssse3_do_setkey (ctx, key); +#endif else { const byte *sbox = ((const byte *)encT) + 1; @@ -403,6 +454,12 @@ prepare_decryption( RIJNDAEL_context *ctx ) _gcry_aes_aesni_prepare_decryption (ctx); } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_prepare_decryption (ctx); + } +#endif /*USE_SSSE3*/ #ifdef USE_PADLOCK else if (ctx->use_padlock) { @@ -650,6 +707,13 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cfb_enc (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; @@ -697,6 +761,13 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cbc_enc (ctx, outbuf, inbuf, iv, nblocks, cbc_mac); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; @@ -752,6 +823,13 @@ _gcry_aes_ctr_enc (void *context, unsigned char *ctr, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_ctr_enc (ctx, outbuf, inbuf, ctr, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp; @@ -986,6 +1064,13 @@ _gcry_aes_cfb_dec (void *context, unsigned char *iv, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cfb_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn; @@ -1032,6 +1117,13 @@ _gcry_aes_cbc_dec (void *context, unsigned char *iv, burn_depth = 0; } #endif /*USE_AESNI*/ +#ifdef USE_SSSE3 + else if (ctx->use_ssse3) + { + _gcry_aes_ssse3_cbc_dec (ctx, outbuf, inbuf, iv, nblocks); + burn_depth = 0; + } +#endif /*USE_SSSE3*/ else { unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16; |