diff options
author | Werner Koch <wk@gnupg.org> | 2008-03-17 18:08:15 +0000 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 2008-03-17 18:08:15 +0000 |
commit | 37b298a02055e027b690e643fe149754727b79db (patch) | |
tree | 912a67ccd1acd0eb73cdff69d522d74b4c0aa3e0 | |
parent | 4126a0a6b6b4aa45de670052af9d6112789f8341 (diff) | |
download | libgcrypt-37b298a02055e027b690e643fe149754727b79db.tar.gz |
Better AES performance.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | cipher/ChangeLog | 28 | ||||
-rw-r--r-- | cipher/cipher.c | 561 | ||||
-rw-r--r-- | cipher/ecc.c | 4 | ||||
-rw-r--r-- | cipher/rijndael.c | 463 | ||||
-rw-r--r-- | cipher/rsa.c | 7 | ||||
-rw-r--r-- | configure.ac | 24 | ||||
-rw-r--r-- | src/ChangeLog | 5 | ||||
-rw-r--r-- | src/cipher.h | 15 | ||||
-rw-r--r-- | src/gcrypt.h.in | 4 | ||||
-rw-r--r-- | tests/ChangeLog | 5 | ||||
-rw-r--r-- | tests/benchmark.c | 56 |
13 files changed, 792 insertions, 390 deletions
@@ -1,3 +1,7 @@ +2008-02-18 Werner Koch <wk@g10code.com> + + * configure.ac (IS_DEVELOPMENT_VERSION): Set depending on the my_svn. + 2007-12-11 Werner Koch <wk@g10code.com> * configure.ac: We actually require libgpg-error 1.4. Reported by @@ -1,8 +1,10 @@ Noteworthy changes in version 1.4.1 ------------------------------------------------ - * Fixed a bug introduced by 1.3.1 led to the comsumption of far too - much entropy for the intial seeding. + * Fixed a bug introduced by 1.3.1 which led to the comsumption of far + too much entropy for the intial seeding. + + * Improved AES performance for CFB and CBS modes. Noteworthy changes in version 1.4.0 (2007-12-10) diff --git a/cipher/ChangeLog b/cipher/ChangeLog index 3dec8a95..8f91be67 100644 --- a/cipher/ChangeLog +++ b/cipher/ChangeLog @@ -1,3 +1,29 @@ +2008-03-17 Werner Koch <wk@g10code.com> + + * rijndael.c (_gcry_aes_cfb_dec): New. + (do_encrypt): Factor code out to .. + (do_encrypt_aligned): .. New. + (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec): Use new function. + (do_decrypt): Factor code out to .. + (do_decrypt_aligned): .. new. + (_gcry_aes_cbc_enc, _gcry_aes_cbc_dec): New. + * cipher.c (struct gcry_cipher_handle): Put field IV into new + union U_IV to enforce proper alignment. Change all users. + (do_cfb_decrypt): Optimize. + (do_cbc_encrypt, do_cbc_decrypt): Optimize. + +2008-03-15 Werner Koch <wk@g10code.com> + + * rijndael.c (_gcry_aes_cfb_enc): New. + * cipher.c (struct gcry_cipher_handle): Add field ALGO and BULK. + (gcry_cipher_open): Set ALGO and BULK. + (do_cfb_encrypt): Optimize. + +2008-02-18 Werner Koch <wk@g10code.com> + + * rsa.c (_gcry_rsa_verify) [IS_DEVELOPMENT_VERSION]: Print + intermediate results. + 2008-01-08 Werner Koch <wk@g10code.com> * random.c (add_randomness): Do not just increment @@ -3471,7 +3497,7 @@ Mon Feb 16 10:08:47 1998 Werner Koch (wk@isil.d.shuttle.de) Copyright 1998,1999,2000,2001,2002,2003,2004,2005,2006 - 2007 Free Software Foundation, Inc. + 2007, 2008 Free Software Foundation, Inc. This file is free software; as a special exception the author gives unlimited permission to copy and/or distribute it, with or without diff --git a/cipher/cipher.c b/cipher/cipher.c index b34ace77..9b3b4ff9 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -60,9 +60,9 @@ static struct cipher_table_entry { &_gcry_cipher_spec_cast5, GCRY_CIPHER_CAST5 }, #endif #if USE_AES - { &_gcry_cipher_spec_aes, GCRY_CIPHER_AES }, - { &_gcry_cipher_spec_aes192, GCRY_CIPHER_AES192 }, - { &_gcry_cipher_spec_aes256, GCRY_CIPHER_AES256 }, + { &_gcry_cipher_spec_aes, GCRY_CIPHER_AES}, + { &_gcry_cipher_spec_aes192, GCRY_CIPHER_AES192}, + { &_gcry_cipher_spec_aes256, GCRY_CIPHER_AES256}, #endif #if USE_TWOFISH { &_gcry_cipher_spec_twofish, GCRY_CIPHER_TWOFISH }, @@ -137,12 +137,49 @@ struct gcry_cipher_handle size_t handle_offset; /* Offset to the malloced block. */ gcry_cipher_spec_t *cipher; gcry_module_t module; + + /* The algorithm id. This is a hack required because the module + interface does not easily allow to retrieve this value. */ + int algo; + + /* A structure with function pointers for bulk operations. Due to + limitations of the module system (we don't want to change the + API) we need to keep these function pointers here. The cipher + open function intializes them and the actual encryption routines + use them if they are not NULL. */ + struct { + void (*cfb_enc)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); + void (*cfb_dec)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); + void (*cbc_enc)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks, int cbc_mac); + void (*cbc_dec)(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); + } bulk; + + int mode; unsigned int flags; - unsigned char iv[MAX_BLOCKSIZE]; /* (this should be ulong aligned) */ + + /* The initialization vector. To help code optimization we make + sure that it is aligned on an unsigned long and u32 boundary. */ + union { + unsigned long dummy_iv; + u32 dummy_u32_iv; + unsigned char iv[MAX_BLOCKSIZE]; + } u_iv; + unsigned char lastiv[MAX_BLOCKSIZE]; - int unused; /* in IV */ + int unused; /* Number of unused bytes in the IV. */ + unsigned char ctr[MAX_BLOCKSIZE]; /* For Counter (CTR) mode. */ + + /* What follows are two contexts of the cipher in use. The first one needs to be aligned well enough for the cipher operation whereas the second one is a copy created by cipher_setkey and @@ -151,6 +188,7 @@ struct gcry_cipher_handle cipher_context_alignment_t context; }; + /* These dummy functions are used in case a cipher implementation refuses to provide it's own functions. */ @@ -705,8 +743,25 @@ gcry_cipher_open (gcry_cipher_hd_t *handle, h->handle_offset = off; h->cipher = cipher; h->module = module; + h->algo = algo; h->mode = mode; h->flags = flags; + + /* Setup bulk encryption routines. */ + switch (algo) + { + case GCRY_CIPHER_AES128: + case GCRY_CIPHER_AES192: + case GCRY_CIPHER_AES256: + h->bulk.cfb_enc = _gcry_aes_cfb_enc; + h->bulk.cfb_dec = _gcry_aes_cfb_dec; + h->bulk.cbc_enc = _gcry_aes_cbc_enc; + h->bulk.cbc_dec = _gcry_aes_cbc_dec; + break; + + default: + break; + } } } @@ -787,16 +842,17 @@ cipher_setkey (gcry_cipher_hd_t c, byte *key, unsigned keylen) static void cipher_setiv( gcry_cipher_hd_t c, const byte *iv, unsigned ivlen ) { - memset( c->iv, 0, c->cipher->blocksize ); - if( iv ) { - if( ivlen != c->cipher->blocksize ) - log_info("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", - ivlen, (unsigned) c->cipher->blocksize ); - if (ivlen > c->cipher->blocksize) - ivlen = c->cipher->blocksize; - memcpy( c->iv, iv, ivlen ); + memset (c->u_iv.iv, 0, c->cipher->blocksize); + if (iv) + { + if (ivlen != c->cipher->blocksize) + log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", + ivlen, (unsigned int)c->cipher->blocksize); + if (ivlen > c->cipher->blocksize) + ivlen = c->cipher->blocksize; + memcpy (c->u_iv.iv, iv, ivlen); } - c->unused = 0; + c->unused = 0; } @@ -808,7 +864,7 @@ cipher_reset (gcry_cipher_hd_t c) memcpy (&c->context.c, (char *) &c->context.c + c->cipher->contextsize, c->cipher->contextsize); - memset (c->iv, 0, c->cipher->blocksize); + memset (c->u_iv.iv, 0, c->cipher->blocksize); memset (c->lastiv, 0, c->cipher->blocksize); memset (c->ctr, 0, c->cipher->blocksize); } @@ -840,220 +896,312 @@ do_ecb_decrypt( gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, } } + static void -do_cbc_encrypt( gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, - unsigned int nbytes ) +do_cbc_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, unsigned int nbytes ) { - unsigned int n; - byte *ivp; - int i; - size_t blocksize = c->cipher->blocksize; - unsigned nblocks = nbytes / blocksize; + unsigned int n; + unsigned char *ivp; + int i; + size_t blocksize = c->cipher->blocksize; + unsigned nblocks = nbytes / blocksize; - if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) { + if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) + { if ((nbytes % blocksize) == 0) nblocks--; } - for(n=0; n < nblocks; n++ ) { - /* fixme: the xor should work on words and not on - * bytes. Maybe it is a good idea to enhance the cipher backend - * API to allow for CBC handling direct in the backend */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) - outbuf[i] = inbuf[i] ^ *ivp++; - c->cipher->encrypt ( &c->context.c, outbuf, outbuf ); - memcpy(c->iv, outbuf, blocksize ); - inbuf += blocksize; - if (!(c->flags & GCRY_CIPHER_CBC_MAC)) - outbuf += blocksize; + if (c->bulk.cbc_enc) + { + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks, + (c->flags & GCRY_CIPHER_CBC_MAC)); + inbuf += nblocks * blocksize; + if (!(c->flags & GCRY_CIPHER_CBC_MAC)) + outbuf += nblocks * blocksize; + } + else + { + for (n=0; n < nblocks; n++ ) + { + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + outbuf[i] = inbuf[i] ^ *ivp++; + c->cipher->encrypt ( &c->context.c, outbuf, outbuf ); + memcpy (c->u_iv.iv, outbuf, blocksize ); + inbuf += blocksize; + if (!(c->flags & GCRY_CIPHER_CBC_MAC)) + outbuf += blocksize; + } } - if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) - { - /* We have to be careful here, since outbuf might be equal to - inbuf. */ - - int restbytes; - byte b; - - if ((nbytes % blocksize) == 0) - restbytes = blocksize; - else - restbytes = nbytes % blocksize; - - outbuf -= blocksize; - for (ivp = c->iv, i = 0; i < restbytes; i++) - { - b = inbuf[i]; - outbuf[blocksize + i] = outbuf[i]; - outbuf[i] = b ^ *ivp++; - } - for (; i < blocksize; i++) - outbuf[i] = 0 ^ *ivp++; + if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) + { + /* We have to be careful here, since outbuf might be equal to + inbuf. */ + int restbytes; + unsigned char b; - c->cipher->encrypt (&c->context.c, outbuf, outbuf); - memcpy (c->iv, outbuf, blocksize); - } + if ((nbytes % blocksize) == 0) + restbytes = blocksize; + else + restbytes = nbytes % blocksize; + + outbuf -= blocksize; + for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++) + { + b = inbuf[i]; + outbuf[blocksize + i] = outbuf[i]; + outbuf[i] = b ^ *ivp++; + } + for (; i < blocksize; i++) + outbuf[i] = 0 ^ *ivp++; + + c->cipher->encrypt (&c->context.c, outbuf, outbuf); + memcpy (c->u_iv.iv, outbuf, blocksize); + } } + static void -do_cbc_decrypt( gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, - unsigned int nbytes ) +do_cbc_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, unsigned int nbytes) { - unsigned int n; - byte *ivp; - int i; - size_t blocksize = c->cipher->blocksize; - unsigned int nblocks = nbytes / blocksize; + unsigned int n; + unsigned char *ivp; + int i; + size_t blocksize = c->cipher->blocksize; + unsigned int nblocks = nbytes / blocksize; - if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) { + if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) + { nblocks--; if ((nbytes % blocksize) == 0) nblocks--; - memcpy(c->lastiv, c->iv, blocksize ); + memcpy (c->lastiv, c->u_iv.iv, blocksize); } - for(n=0; n < nblocks; n++ ) { - /* Because outbuf and inbuf might be the same, we have - * to save the original ciphertext block. We use lastiv - * for this here because it is not used otherwise. */ - memcpy(c->lastiv, inbuf, blocksize ); - c->cipher->decrypt ( &c->context.c, outbuf, inbuf ); - for(ivp=c->iv,i=0; i < blocksize; i++ ) + if (c->bulk.cbc_dec) + { + c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + inbuf += nblocks * blocksize; + outbuf += nblocks * blocksize; + } + else + { + for (n=0; n < nblocks; n++ ) + { + /* Because outbuf and inbuf might be the same, we have to + * save the original ciphertext block. We use lastiv for + * this here because it is not used otherwise. */ + memcpy (c->lastiv, inbuf, blocksize); + c->cipher->decrypt ( &c->context.c, outbuf, inbuf ); + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) outbuf[i] ^= *ivp++; - memcpy(c->iv, c->lastiv, blocksize ); - inbuf += c->cipher->blocksize; - outbuf += c->cipher->blocksize; + memcpy(c->u_iv.iv, c->lastiv, blocksize ); + inbuf += c->cipher->blocksize; + outbuf += c->cipher->blocksize; + } } - if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) { - int restbytes; - - if ((nbytes % blocksize) == 0) - restbytes = blocksize; - else - restbytes = nbytes % blocksize; - - memcpy(c->lastiv, c->iv, blocksize ); /* save Cn-2 */ - memcpy(c->iv, inbuf + blocksize, restbytes ); /* save Cn */ - - c->cipher->decrypt ( &c->context.c, outbuf, inbuf ); - for(ivp=c->iv,i=0; i < restbytes; i++ ) - outbuf[i] ^= *ivp++; + if ((c->flags & GCRY_CIPHER_CBC_CTS) && nbytes > blocksize) + { + int restbytes; + + if ((nbytes % blocksize) == 0) + restbytes = blocksize; + else + restbytes = nbytes % blocksize; + + memcpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ + memcpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */ - memcpy(outbuf + blocksize, outbuf, restbytes); - for(i=restbytes; i < blocksize; i++) - c->iv[i] = outbuf[i]; - c->cipher->decrypt ( &c->context.c, outbuf, c->iv ); - for(ivp=c->lastiv,i=0; i < blocksize; i++ ) - outbuf[i] ^= *ivp++; - /* c->lastiv is now really lastlastiv, does this matter? */ + c->cipher->decrypt ( &c->context.c, outbuf, inbuf ); + for (ivp=c->u_iv.iv,i=0; i < restbytes; i++ ) + outbuf[i] ^= *ivp++; + + memcpy(outbuf + blocksize, outbuf, restbytes); + for(i=restbytes; i < blocksize; i++) + c->u_iv.iv[i] = outbuf[i]; + c->cipher->decrypt (&c->context.c, outbuf, c->u_iv.iv); + for(ivp=c->lastiv,i=0; i < blocksize; i++ ) + outbuf[i] ^= *ivp++; + /* c->lastiv is now really lastlastiv, does this matter? */ } } static void -do_cfb_encrypt( gcry_cipher_hd_t c, - byte *outbuf, const byte *inbuf, unsigned nbytes ) +do_cfb_encrypt( gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, unsigned int nbytes ) { - byte *ivp; - size_t blocksize = c->cipher->blocksize; - - if( nbytes <= c->unused ) { - /* Short enough to be encoded by the remaining XOR mask. */ - /* XOR the input with the IV and store input into IV. */ - for (ivp=c->iv+c->cipher->blocksize - c->unused; - nbytes; - nbytes--, c->unused-- ) - *outbuf++ = (*ivp++ ^= *inbuf++); - return; + unsigned char *ivp; + size_t blocksize = c->cipher->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; + + if ( nbytes <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv+c->cipher->blocksize - c->unused; + nbytes; + nbytes--, c->unused-- ) + *outbuf++ = (*ivp++ ^= *inbuf++); + return; + } + + if ( c->unused ) + { + /* XOR the input with the IV and store input into IV */ + nbytes -= c->unused; + for(ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) + *outbuf++ = (*ivp++ ^= *inbuf++); } - if( c->unused ) { - /* XOR the input with the IV and store input into IV */ - nbytes -= c->unused; - for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) - *outbuf++ = (*ivp++ ^= *inbuf++); + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (nbytes >= blocksize_x_2 && c->bulk.cfb_enc) + { + unsigned int nblocks = nbytes / blocksize; + c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + nbytes -= nblocks * blocksize; + } + else + { + while ( nbytes >= blocksize_x_2 ) + { + int i; + /* Encrypt the IV. */ + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV. */ + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + nbytes -= blocksize; + } } - /* Now we can process complete blocks. */ - while( nbytes >= blocksize ) { - int i; - /* Encrypt the IV (and save the current one). */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) - *outbuf++ = (*ivp++ ^= *inbuf++); - nbytes -= blocksize; + if ( nbytes >= blocksize ) + { + int i; + /* Save the current IV and then encrypt the IV. */ + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV */ + for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + nbytes -= blocksize; } - if( nbytes ) { /* process the remaining bytes */ - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); - c->unused = blocksize; - /* and apply the xor */ - c->unused -= nbytes; - for(ivp=c->iv; nbytes; nbytes-- ) - *outbuf++ = (*ivp++ ^= *inbuf++); + if ( nbytes ) + { + /* Save the current IV and then encrypt the IV. */ + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= nbytes; + for(ivp=c->u_iv.iv; nbytes; nbytes-- ) + *outbuf++ = (*ivp++ ^= *inbuf++); } } + static void -do_cfb_decrypt( gcry_cipher_hd_t c, - byte *outbuf, const byte *inbuf, unsigned int nbytes ) +do_cfb_decrypt( gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, unsigned int nbytes ) { - byte *ivp; - ulong temp; - size_t blocksize = c->cipher->blocksize; - - if( nbytes <= c->unused ) { - /* Short enough to be encoded by the remaining XOR mask. */ - /* XOR the input with the IV and store input into IV. */ - for(ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } - return; + unsigned char *ivp; + unsigned long temp; + int i; + size_t blocksize = c->cipher->blocksize; + size_t blocksize_x_2 = blocksize + blocksize; + + if (nbytes <= c->unused) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv+blocksize - c->unused; + nbytes; + nbytes--, c->unused--) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + return; } - - if( c->unused ) { - /* XOR the input with the IV and store input into IV. */ - nbytes -= c->unused; - for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } + + if (c->unused) + { + /* XOR the input with the IV and store input into IV. */ + nbytes -= c->unused; + for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (nbytes >= blocksize_x_2 && c->bulk.cfb_dec) + { + unsigned int nblocks = nbytes / blocksize; + c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks * blocksize; + inbuf += nblocks * blocksize; + nbytes -= nblocks * blocksize; + } + else + { + while (nbytes >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV. */ + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + nbytes -= blocksize; + } } - /* now we can process complete blocks */ - while( nbytes >= blocksize ) { - int i; - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); - /* XOR the input with the IV and store input into IV */ - for(ivp=c->iv,i=0; i < blocksize; i++ ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } - nbytes -= blocksize; + if (nbytes >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + memcpy ( c->lastiv, c->u_iv.iv, blocksize); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + /* XOR the input with the IV and store input into IV */ + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + nbytes -= blocksize; } - if( nbytes ) { /* process the remaining bytes */ - /* encrypt the IV (and save the current one) */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); - c->unused = blocksize; - /* and apply the xor */ - c->unused -= nbytes; - for(ivp=c->iv; nbytes; nbytes-- ) { - temp = *inbuf++; - *outbuf++ = *ivp ^ temp; - *ivp++ = temp; - } + + if (nbytes) + { + /* Save the current IV and then encrypt the IV. */ + memcpy ( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= nbytes; + for (ivp=c->u_iv.iv; nbytes; nbytes-- ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } } } @@ -1069,7 +1217,7 @@ do_ofb_encrypt( gcry_cipher_hd_t c, { /* Short enough to be encoded by the remaining XOR mask. */ /* XOR the input with the IV */ - for (ivp=c->iv+c->cipher->blocksize - c->unused; + for (ivp=c->u_iv.iv+c->cipher->blocksize - c->unused; nbytes; nbytes--, c->unused-- ) *outbuf++ = (*ivp++ ^ *inbuf++); @@ -1079,7 +1227,7 @@ do_ofb_encrypt( gcry_cipher_hd_t c, if( c->unused ) { nbytes -= c->unused; - for(ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) + for(ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) *outbuf++ = (*ivp++ ^ *inbuf++); } @@ -1088,20 +1236,20 @@ do_ofb_encrypt( gcry_cipher_hd_t c, { int i; /* Encrypt the IV (and save the current one). */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); - for (ivp=c->iv,i=0; i < blocksize; i++ ) + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) *outbuf++ = (*ivp++ ^ *inbuf++); nbytes -= blocksize; } if ( nbytes ) { /* process the remaining bytes */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); c->unused = blocksize; c->unused -= nbytes; - for(ivp=c->iv; nbytes; nbytes-- ) + for(ivp=c->u_iv.iv; nbytes; nbytes-- ) *outbuf++ = (*ivp++ ^ *inbuf++); } } @@ -1116,7 +1264,7 @@ do_ofb_decrypt( gcry_cipher_hd_t c, if( nbytes <= c->unused ) { /* Short enough to be encoded by the remaining XOR mask. */ - for (ivp=c->iv+blocksize - c->unused; nbytes; nbytes--,c->unused--) + for (ivp=c->u_iv.iv+blocksize - c->unused; nbytes; nbytes--,c->unused--) *outbuf++ = *ivp++ ^ *inbuf++; return; } @@ -1124,7 +1272,7 @@ do_ofb_decrypt( gcry_cipher_hd_t c, if ( c->unused ) { nbytes -= c->unused; - for (ivp=c->iv+blocksize - c->unused; c->unused; c->unused-- ) + for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- ) *outbuf++ = *ivp++ ^ *inbuf++; } @@ -1133,20 +1281,20 @@ do_ofb_decrypt( gcry_cipher_hd_t c, { int i; /* Encrypt the IV (and save the current one). */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); - for (ivp=c->iv,i=0; i < blocksize; i++ ) + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ ) *outbuf++ = *ivp++ ^ *inbuf++; nbytes -= blocksize; } if ( nbytes ) { /* Process the remaining bytes. */ /* Encrypt the IV (and save the current one). */ - memcpy( c->lastiv, c->iv, blocksize ); - c->cipher->encrypt ( &c->context.c, c->iv, c->iv ); + memcpy( c->lastiv, c->u_iv.iv, blocksize ); + c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); c->unused = blocksize; c->unused -= nbytes; - for (ivp=c->iv; nbytes; nbytes-- ) + for (ivp=c->u_iv.iv; nbytes; nbytes-- ) *outbuf++ = *ivp++ ^ *inbuf++; } } @@ -1362,10 +1510,13 @@ gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, static void cipher_sync( gcry_cipher_hd_t c ) { - if( (c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused ) { - memmove(c->iv + c->unused, c->iv, c->cipher->blocksize - c->unused ); - memcpy(c->iv, c->lastiv + c->cipher->blocksize - c->unused, c->unused); - c->unused = 0; + if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused) + { + memmove (c->u_iv.iv + c->unused, + c->u_iv.iv, c->cipher->blocksize - c->unused); + memcpy (c->u_iv.iv, + c->lastiv + c->cipher->blocksize - c->unused, c->unused); + c->unused = 0; } } diff --git a/cipher/ecc.c b/cipher/ecc.c index 06b01000..b93e8098 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -100,9 +100,9 @@ static const struct { "NIST P-224", "secp224r1" }, { "NIST P-224", "1.3.132.0.33" }, /* SECP OID. */ - { "NIST P-256", "1.2.840.10045.3.1.7" }, + { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1. */ { "NIST P-256", "prime256v1" }, - { "NIST P-256", "secp256r1" }, + { "NIST P-256", "secp256r1" }, { "NIST P-384", "secp384r1" }, { "NIST P-384", "1.3.132.0.34" }, diff --git a/cipher/rijndael.c b/cipher/rijndael.c index a839fe9b..2f4673a1 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -1,5 +1,6 @@ /* Rijndael (AES) for GnuPG - * Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008 Free Software Foundation, Inc. * * This file is part of Libgcrypt. * @@ -43,6 +44,7 @@ #define MAXKC (256/32) #define MAXROUNDS 14 +#define BLOCKSIZE (128/8) /* USE_PADLOCK indicates whether to compile the padlock specific @@ -1926,22 +1928,100 @@ prepare_decryption( RIJNDAEL_context *ctx ) -/* Encrypt one block. A and B may be the same. */ +/* Encrypt one block. A and B need to be aligned on a 4 byte + boundary. A and B may be the same. */ static void -do_encrypt (const RIJNDAEL_context *ctx, byte *bx, const byte *ax) +do_encrypt_aligned (const RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) { - /* FIXME: Ugly code, replace by straighter implementaion and use - optimized assembler for common CPUs. */ - +#define rk (ctx->keySched) + int ROUNDS = ctx->ROUNDS; int r; union { u32 tempu32[4]; /* Force correct alignment. */ byte temp[4][4]; } u; - int ROUNDS = ctx->ROUNDS; -#define rk (ctx->keySched) + *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]); + *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]); + *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]); + *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]); + *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + ^ *((u32*)T2[u.temp[1][1]]) + ^ *((u32*)T3[u.temp[2][2]]) + ^ *((u32*)T4[u.temp[3][3]])); + *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + ^ *((u32*)T2[u.temp[2][1]]) + ^ *((u32*)T3[u.temp[3][2]]) + ^ *((u32*)T4[u.temp[0][3]])); + *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + ^ *((u32*)T2[u.temp[3][1]]) + ^ *((u32*)T3[u.temp[0][2]]) + ^ *((u32*)T4[u.temp[1][3]])); + *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + ^ *((u32*)T2[u.temp[0][1]]) + ^ *((u32*)T3[u.temp[1][2]]) + ^ *((u32*)T4[u.temp[2][3]])); + + for (r = 1; r < ROUNDS-1; r++) + { + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); + + *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + ^ *((u32*)T2[u.temp[1][1]]) + ^ *((u32*)T3[u.temp[2][2]]) + ^ *((u32*)T4[u.temp[3][3]])); + *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + ^ *((u32*)T2[u.temp[2][1]]) + ^ *((u32*)T3[u.temp[3][2]]) + ^ *((u32*)T4[u.temp[0][3]])); + *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + ^ *((u32*)T2[u.temp[3][1]]) + ^ *((u32*)T3[u.temp[0][2]]) + ^ *((u32*)T4[u.temp[1][3]])); + *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + ^ *((u32*)T2[u.temp[0][1]]) + ^ *((u32*)T3[u.temp[1][2]]) + ^ *((u32*)T4[u.temp[2][3]])); + } + + /* Last round is special. */ + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]); + b[ 0] = T1[u.temp[0][0]][1]; + b[ 1] = T1[u.temp[1][1]][1]; + b[ 2] = T1[u.temp[2][2]][1]; + b[ 3] = T1[u.temp[3][3]][1]; + b[ 4] = T1[u.temp[1][0]][1]; + b[ 5] = T1[u.temp[2][1]][1]; + b[ 6] = T1[u.temp[3][2]][1]; + b[ 7] = T1[u.temp[0][3]][1]; + b[ 8] = T1[u.temp[2][0]][1]; + b[ 9] = T1[u.temp[3][1]][1]; + b[10] = T1[u.temp[0][2]][1]; + b[11] = T1[u.temp[1][3]][1]; + b[12] = T1[u.temp[3][0]][1]; + b[13] = T1[u.temp[0][1]][1]; + b[14] = T1[u.temp[1][2]][1]; + b[15] = T1[u.temp[2][3]][1]; + *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]); + *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]); + *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]); + *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]); +#undef rk +} + + +static void +do_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ /* BX and AX are not necessary correctly aligned. Thus we need to copy them here. */ union @@ -1956,81 +2036,8 @@ do_encrypt (const RIJNDAEL_context *ctx, byte *bx, const byte *ax) } b; memcpy (a.a, ax, 16); - - *((u32*)u.temp[0]) = *((u32*)(a.a )) ^ *((u32*)rk[0][0]); - *((u32*)u.temp[1]) = *((u32*)(a.a+ 4)) ^ *((u32*)rk[0][1]); - *((u32*)u.temp[2]) = *((u32*)(a.a+ 8)) ^ *((u32*)rk[0][2]); - *((u32*)u.temp[3]) = *((u32*)(a.a+12)) ^ *((u32*)rk[0][3]); - *((u32*)(b.b )) = (*((u32*)T1[u.temp[0][0]]) - ^ *((u32*)T2[u.temp[1][1]]) - ^ *((u32*)T3[u.temp[2][2]]) - ^ *((u32*)T4[u.temp[3][3]])); - *((u32*)(b.b + 4)) = (*((u32*)T1[u.temp[1][0]]) - ^ *((u32*)T2[u.temp[2][1]]) - ^ *((u32*)T3[u.temp[3][2]]) - ^ *((u32*)T4[u.temp[0][3]])); - *((u32*)(b.b + 8)) = (*((u32*)T1[u.temp[2][0]]) - ^ *((u32*)T2[u.temp[3][1]]) - ^ *((u32*)T3[u.temp[0][2]]) - ^ *((u32*)T4[u.temp[1][3]])); - *((u32*)(b.b +12)) = (*((u32*)T1[u.temp[3][0]]) - ^ *((u32*)T2[u.temp[0][1]]) - ^ *((u32*)T3[u.temp[1][2]]) - ^ *((u32*)T4[u.temp[2][3]])); - - for (r = 1; r < ROUNDS-1; r++) - { - *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[r][0]); - *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[r][1]); - *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[r][2]); - *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[r][3]); - - *((u32*)(b.b )) = (*((u32*)T1[u.temp[0][0]]) - ^ *((u32*)T2[u.temp[1][1]]) - ^ *((u32*)T3[u.temp[2][2]]) - ^ *((u32*)T4[u.temp[3][3]])); - *((u32*)(b.b + 4)) = (*((u32*)T1[u.temp[1][0]]) - ^ *((u32*)T2[u.temp[2][1]]) - ^ *((u32*)T3[u.temp[3][2]]) - ^ *((u32*)T4[u.temp[0][3]])); - *((u32*)(b.b + 8)) = (*((u32*)T1[u.temp[2][0]]) - ^ *((u32*)T2[u.temp[3][1]]) - ^ *((u32*)T3[u.temp[0][2]]) - ^ *((u32*)T4[u.temp[1][3]])); - *((u32*)(b.b +12)) = (*((u32*)T1[u.temp[3][0]]) - ^ *((u32*)T2[u.temp[0][1]]) - ^ *((u32*)T3[u.temp[1][2]]) - ^ *((u32*)T4[u.temp[2][3]])); - } - - /* Last round is special. */ - *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[ROUNDS-1][0]); - *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); - *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); - *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[ROUNDS-1][3]); - b.b[ 0] = T1[u.temp[0][0]][1]; - b.b[ 1] = T1[u.temp[1][1]][1]; - b.b[ 2] = T1[u.temp[2][2]][1]; - b.b[ 3] = T1[u.temp[3][3]][1]; - b.b[ 4] = T1[u.temp[1][0]][1]; - b.b[ 5] = T1[u.temp[2][1]][1]; - b.b[ 6] = T1[u.temp[3][2]][1]; - b.b[ 7] = T1[u.temp[0][3]][1]; - b.b[ 8] = T1[u.temp[2][0]][1]; - b.b[ 9] = T1[u.temp[3][1]][1]; - b.b[10] = T1[u.temp[0][2]][1]; - b.b[11] = T1[u.temp[1][3]][1]; - b.b[12] = T1[u.temp[3][0]][1]; - b.b[13] = T1[u.temp[0][1]][1]; - b.b[14] = T1[u.temp[1][2]][1]; - b.b[15] = T1[u.temp[2][3]][1]; - *((u32*)(b.b )) ^= *((u32*)rk[ROUNDS][0]); - *((u32*)(b.b+ 4)) ^= *((u32*)rk[ROUNDS][1]); - *((u32*)(b.b+ 8)) ^= *((u32*)rk[ROUNDS][2]); - *((u32*)(b.b+12)) ^= *((u32*)rk[ROUNDS][3]); - + do_encrypt_aligned (ctx, b.b, a.a); memcpy (bx, b.b, 16); -#undef rk } @@ -2100,19 +2107,161 @@ rijndael_encrypt (void *context, byte *b, const byte *a) } +/* Bulk encryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the IV. */ + do_encrypt_aligned (ctx, iv, iv); + /* XOR the input with the IV and store input into IV. */ + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + +/* Bulk encryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks, int cbc_mac) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + + for ( ;nblocks; nblocks-- ) + { + for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) + outbuf[i] = inbuf[i] ^ *ivp++; + do_encrypt (ctx, outbuf, outbuf ); + memcpy (iv, outbuf, BLOCKSIZE); + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + -/* Decrypt one block. a and b may be the same. */ +/* Decrypt one block. A and B need to be aligned on a 4 byte boundary + and the decryption must have been prepared. A and B may be the + same. */ static void -do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) +do_decrypt_aligned (RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) { #define rk (ctx->keySched2) int ROUNDS = ctx->ROUNDS; int r; - union { + union + { u32 tempu32[4]; /* Force correct alignment. */ byte temp[4][4]; } u; + + *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[ROUNDS][0]); + *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[ROUNDS][1]); + *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[ROUNDS][2]); + *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[ROUNDS][3]); + + *((u32*)(b )) = (*((u32*)T5[u.temp[0][0]]) + ^ *((u32*)T6[u.temp[3][1]]) + ^ *((u32*)T7[u.temp[2][2]]) + ^ *((u32*)T8[u.temp[1][3]])); + *((u32*)(b+ 4)) = (*((u32*)T5[u.temp[1][0]]) + ^ *((u32*)T6[u.temp[0][1]]) + ^ *((u32*)T7[u.temp[3][2]]) + ^ *((u32*)T8[u.temp[2][3]])); + *((u32*)(b+ 8)) = (*((u32*)T5[u.temp[2][0]]) + ^ *((u32*)T6[u.temp[1][1]]) + ^ *((u32*)T7[u.temp[0][2]]) + ^ *((u32*)T8[u.temp[3][3]])); + *((u32*)(b+12)) = (*((u32*)T5[u.temp[3][0]]) + ^ *((u32*)T6[u.temp[2][1]]) + ^ *((u32*)T7[u.temp[1][2]]) + ^ *((u32*)T8[u.temp[0][3]])); + + for (r = ROUNDS-1; r > 1; r--) + { + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); + *((u32*)(b )) = (*((u32*)T5[u.temp[0][0]]) + ^ *((u32*)T6[u.temp[3][1]]) + ^ *((u32*)T7[u.temp[2][2]]) + ^ *((u32*)T8[u.temp[1][3]])); + *((u32*)(b+ 4)) = (*((u32*)T5[u.temp[1][0]]) + ^ *((u32*)T6[u.temp[0][1]]) + ^ *((u32*)T7[u.temp[3][2]]) + ^ *((u32*)T8[u.temp[2][3]])); + *((u32*)(b+ 8)) = (*((u32*)T5[u.temp[2][0]]) + ^ *((u32*)T6[u.temp[1][1]]) + ^ *((u32*)T7[u.temp[0][2]]) + ^ *((u32*)T8[u.temp[3][3]])); + *((u32*)(b+12)) = (*((u32*)T5[u.temp[3][0]]) + ^ *((u32*)T6[u.temp[2][1]]) + ^ *((u32*)T7[u.temp[1][2]]) + ^ *((u32*)T8[u.temp[0][3]])); + } + + /* Last round is special. */ + *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[1][0]); + *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[1][1]); + *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[1][2]); + *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[1][3]); + b[ 0] = S5[u.temp[0][0]]; + b[ 1] = S5[u.temp[3][1]]; + b[ 2] = S5[u.temp[2][2]]; + b[ 3] = S5[u.temp[1][3]]; + b[ 4] = S5[u.temp[1][0]]; + b[ 5] = S5[u.temp[0][1]]; + b[ 6] = S5[u.temp[3][2]]; + b[ 7] = S5[u.temp[2][3]]; + b[ 8] = S5[u.temp[2][0]]; + b[ 9] = S5[u.temp[1][1]]; + b[10] = S5[u.temp[0][2]]; + b[11] = S5[u.temp[3][3]]; + b[12] = S5[u.temp[3][0]]; + b[13] = S5[u.temp[2][1]]; + b[14] = S5[u.temp[1][2]]; + b[15] = S5[u.temp[0][3]]; + *((u32*)(b )) ^= *((u32*)rk[0][0]); + *((u32*)(b+ 4)) ^= *((u32*)rk[0][1]); + *((u32*)(b+ 8)) ^= *((u32*)rk[0][2]); + *((u32*)(b+12)) ^= *((u32*)rk[0][3]); +#undef rk +} + + +/* Decrypt one block. AX and BX may be the same. */ +static void +do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) +{ /* BX and AX are not necessary correctly aligned. Thus we need to copy them here. */ union @@ -2126,90 +2275,21 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) byte b[16]; } b; - memcpy (a.a, ax, 16); - if ( !ctx->decryption_prepared ) { prepare_decryption ( ctx ); _gcry_burn_stack (64); ctx->decryption_prepared = 1; } - - *((u32*)u.temp[0]) = *((u32*)(a.a )) ^ *((u32*)rk[ROUNDS][0]); - *((u32*)u.temp[1]) = *((u32*)(a.a+ 4)) ^ *((u32*)rk[ROUNDS][1]); - *((u32*)u.temp[2]) = *((u32*)(a.a+ 8)) ^ *((u32*)rk[ROUNDS][2]); - *((u32*)u.temp[3]) = *((u32*)(a.a+12)) ^ *((u32*)rk[ROUNDS][3]); - - *((u32*)(b.b )) = (*((u32*)T5[u.temp[0][0]]) - ^ *((u32*)T6[u.temp[3][1]]) - ^ *((u32*)T7[u.temp[2][2]]) - ^ *((u32*)T8[u.temp[1][3]])); - *((u32*)(b.b+ 4)) = (*((u32*)T5[u.temp[1][0]]) - ^ *((u32*)T6[u.temp[0][1]]) - ^ *((u32*)T7[u.temp[3][2]]) - ^ *((u32*)T8[u.temp[2][3]])); - *((u32*)(b.b+ 8)) = (*((u32*)T5[u.temp[2][0]]) - ^ *((u32*)T6[u.temp[1][1]]) - ^ *((u32*)T7[u.temp[0][2]]) - ^ *((u32*)T8[u.temp[3][3]])); - *((u32*)(b.b+12)) = (*((u32*)T5[u.temp[3][0]]) - ^ *((u32*)T6[u.temp[2][1]]) - ^ *((u32*)T7[u.temp[1][2]]) - ^ *((u32*)T8[u.temp[0][3]])); - - for (r = ROUNDS-1; r > 1; r--) - { - *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[r][0]); - *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[r][1]); - *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[r][2]); - *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[r][3]); - *((u32*)(b.b )) = (*((u32*)T5[u.temp[0][0]]) - ^ *((u32*)T6[u.temp[3][1]]) - ^ *((u32*)T7[u.temp[2][2]]) - ^ *((u32*)T8[u.temp[1][3]])); - *((u32*)(b.b+ 4)) = (*((u32*)T5[u.temp[1][0]]) - ^ *((u32*)T6[u.temp[0][1]]) - ^ *((u32*)T7[u.temp[3][2]]) - ^ *((u32*)T8[u.temp[2][3]])); - *((u32*)(b.b+ 8)) = (*((u32*)T5[u.temp[2][0]]) - ^ *((u32*)T6[u.temp[1][1]]) - ^ *((u32*)T7[u.temp[0][2]]) - ^ *((u32*)T8[u.temp[3][3]])); - *((u32*)(b.b+12)) = (*((u32*)T5[u.temp[3][0]]) - ^ *((u32*)T6[u.temp[2][1]]) - ^ *((u32*)T7[u.temp[1][2]]) - ^ *((u32*)T8[u.temp[0][3]])); - } - - /* Last round is special. */ - *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[1][0]); - *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[1][1]); - *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[1][2]); - *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[1][3]); - b.b[ 0] = S5[u.temp[0][0]]; - b.b[ 1] = S5[u.temp[3][1]]; - b.b[ 2] = S5[u.temp[2][2]]; - b.b[ 3] = S5[u.temp[1][3]]; - b.b[ 4] = S5[u.temp[1][0]]; - b.b[ 5] = S5[u.temp[0][1]]; - b.b[ 6] = S5[u.temp[3][2]]; - b.b[ 7] = S5[u.temp[2][3]]; - b.b[ 8] = S5[u.temp[2][0]]; - b.b[ 9] = S5[u.temp[1][1]]; - b.b[10] = S5[u.temp[0][2]]; - b.b[11] = S5[u.temp[3][3]]; - b.b[12] = S5[u.temp[3][0]]; - b.b[13] = S5[u.temp[2][1]]; - b.b[14] = S5[u.temp[1][2]]; - b.b[15] = S5[u.temp[0][3]]; - *((u32*)(b.b )) ^= *((u32*)rk[0][0]); - *((u32*)(b.b+ 4)) ^= *((u32*)rk[0][1]); - *((u32*)(b.b+ 8)) ^= *((u32*)rk[0][2]); - *((u32*)(b.b+12)) ^= *((u32*)rk[0][3]); + memcpy (a.a, ax, 16); + do_decrypt_aligned (ctx, b.b, a.a); memcpy (bx, b.b, 16); #undef rk } + + + static void rijndael_decrypt (void *context, byte *b, const byte *a) @@ -2231,6 +2311,71 @@ rijndael_decrypt (void *context, byte *b, const byte *a) } +/* Bulk decryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unisgned lonhg boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + unsigned char temp; + int i; + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_aligned (ctx, iv, iv); + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + +/* Bulk decryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + unsigned char savebuf[BLOCKSIZE]; + + for ( ;nblocks; nblocks-- ) + { + /* We need to save INBUF away because it may be identical to + OUTBUF. */ + memcpy (savebuf, inbuf, BLOCKSIZE); + do_decrypt (ctx, outbuf, inbuf); + for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) + outbuf[i] ^= *ivp++; + memcpy (iv, savebuf, BLOCKSIZE); + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + + _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*)); +} + + + /* Test a single encryption and decryption with each key size. */ static const char* diff --git a/cipher/rsa.c b/cipher/rsa.c index fe545c7a..0971eaa4 100644 --- a/cipher/rsa.c +++ b/cipher/rsa.c @@ -627,6 +627,13 @@ _gcry_rsa_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, gcry_mpi_t *pkey, pk.e = pkey[1]; result = gcry_mpi_new ( 160 ); public( result, data[0], &pk ); +#ifdef IS_DEVELOPMENT_VERSION + if (DBG_CIPHER) + { + log_mpidump ("rsa verify result:", result ); + log_mpidump (" hash:", hash ); + } +#endif /*IS_DEVELOPMENT_VERSION*/ /*rc = (*cmp)( opaquev, result );*/ rc = mpi_cmp (result, hash) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR; gcry_mpi_release (result); diff --git a/configure.ac b/configure.ac index d0a9b5a4..8e3c78a7 100644 --- a/configure.ac +++ b/configure.ac @@ -48,6 +48,7 @@ LIBGCRYPT_CONFIG_API_VERSION=1 NEED_GPG_ERROR_VERSION=1.4 +is_development_version=my_issvn BUILD_REVISION=svn_revision PACKAGE=$PACKAGE_NAME VERSION=$PACKAGE_VERSION @@ -803,17 +804,12 @@ AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes) AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes) -# Allow users to append something to the version string without -# flagging it as development version. The user version part is -# considered everything after a dash. -changequote(,)# -tmp_pat='[a-zA-Z]' -changequote([,])# -if echo "$VERSION" | sed 's/-.*//' | grep "$tmp_pat" >/dev/null ; then +if test "$is_development_version" = "yes"; then AC_DEFINE(IS_DEVELOPMENT_VERSION,1, [Defined if this is not a regular release]) fi + AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes) @@ -1079,15 +1075,20 @@ tests/Makefile AC_OUTPUT # Give some feedback -echo +echo " + Libgcrypt v${VERSION} has been configured as follows: + + Platform: $PRINTABLE_OS_NAME ($host) +" + if test "$print_egd_notice" = "yes"; then cat <<G10EOF The performance of the Unix random gatherer module (rndunix) is not very good and it does not keep the entropy pool over multiple - invocations of GnuPG. The suggested way to overcome this problem is - to use the + invocations of Libgcrypt base applications. The suggested way to + overcome this problem is to use the Entropy Gathering Daemon (EGD) @@ -1108,6 +1109,3 @@ if test -n "$gpl"; then echo "use of this library has to comply with the conditions of the GPL." fi -# Give some feedback -echo " Configured for: $PRINTABLE_OS_NAME ($host)" -echo diff --git a/src/ChangeLog b/src/ChangeLog index c6e08de8..5be65475 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2008-03-11 Werner Koch <wk@g10code.com> + + * gcrypt.h.in (gcry_ac_em_t, gcry_ac_scheme_t): Remove trailing + comma for full C-89 compatibility. + 2008-01-21 Marcus Brinkmann <marcus@g10code.de> * hwfeatures.c (detect_ia32_gnuc): Fix inline asm. diff --git a/src/cipher.h b/src/cipher.h index 236e4d5d..4ac8d7a3 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -35,6 +35,21 @@ void _gcry_rmd160_hash_buffer (void *outbuf, void _gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length); +/*-- rijndael.c --*/ +void _gcry_aes_cfb_enc (void *context, unsigned char *iv, + void *outbuf, const void *inbuf, + unsigned int nblocks); +void _gcry_aes_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); +void _gcry_aes_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks, int cbc_mac); +void _gcry_aes_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); + + /*-- dsa.c --*/ void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data); gcry_err_code_t _gcry_dsa_generate2 (int algo, unsigned int nbits, diff --git a/src/gcrypt.h.in b/src/gcrypt.h.in index eeb5e4b2..0b9472bb 100644 --- a/src/gcrypt.h.in +++ b/src/gcrypt.h.in @@ -1234,7 +1234,7 @@ gcry_ac_key_type_t; typedef enum gcry_ac_em { GCRY_AC_EME_PKCS_V1_5, - GCRY_AC_EMSA_PKCS_V1_5, + GCRY_AC_EMSA_PKCS_V1_5 } gcry_ac_em_t; @@ -1242,7 +1242,7 @@ gcry_ac_em_t; typedef enum gcry_ac_scheme { GCRY_AC_ES_PKCS_V1_5, - GCRY_AC_SSA_PKCS_V1_5, + GCRY_AC_SSA_PKCS_V1_5 } gcry_ac_scheme_t; diff --git a/tests/ChangeLog b/tests/ChangeLog index 801a5dd7..c3ec3c2c 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2008-03-17 Werner Koch <wk@g10code.com> + + * benchmark.c (main): Add option --cipher-repetition. + (cipher_bench): Use it. + 2008-03-12 Werner Koch <wk@g10code.com> * benchmark.c (rsa_bench): Add arg NO_BLINDING. diff --git a/tests/benchmark.c b/tests/benchmark.c index c0fdb4ad..424461ad 100644 --- a/tests/benchmark.c +++ b/tests/benchmark.c @@ -35,6 +35,13 @@ static int verbose; +/* Do encryption tests with large buffers. */ +static int large_buffers; + +/* Number of cipher repetitions. */ +static int cipher_repetitions; + + static const char sample_private_dsa_key_1024[] = "(private-key\n" " (dsa\n" @@ -426,8 +433,9 @@ cipher_bench ( const char *algoname ) int i; int keylen, blklen; char key[128]; - char outbuf[1000], buf[1000]; - size_t buflen; + char *outbuf, *buf; + size_t allocated_buflen, buflen; + int repetitions; static struct { int mode; const char *name; int blocked; } modes[] = { { GCRY_CIPHER_MODE_ECB, "ECB", 1 }, { GCRY_CIPHER_MODE_CBC, "CBC", 1 }, @@ -449,9 +457,25 @@ cipher_bench ( const char *algoname ) return; } + if (large_buffers) + { + allocated_buflen = 1024 * 100; + repetitions = 10; + } + else + { + allocated_buflen = 1024; + repetitions = 1000; + } + repetitions *= cipher_repetitions; + + buf = gcry_xmalloc (allocated_buflen); + outbuf = gcry_xmalloc (allocated_buflen); if (!header_printed) { + if (cipher_repetitions != 1) + printf ("Running each test %d times.\n", cipher_repetitions); printf ("%-12s", ""); for (modeidx=0; modes[modeidx].mode; modeidx++) printf (" %-15s", modes[modeidx].name ); @@ -525,14 +549,15 @@ cipher_bench ( const char *algoname ) exit (1); } - buflen = sizeof buf; + buflen = allocated_buflen; if (modes[modeidx].blocked) buflen = (buflen / blklen) * blklen; - + start_timer (); - for (i=err=0; !err && i < 1000; i++) + for (i=err=0; !err && i < repetitions; i++) err = gcry_cipher_encrypt ( hd, outbuf, buflen, buf, buflen); stop_timer (); + printf (" %s", elapsed_time ()); fflush (stdout); gcry_cipher_close (hd); @@ -560,7 +585,7 @@ cipher_bench ( const char *algoname ) } start_timer (); - for (i=err=0; !err && i < 1000; i++) + for (i=err=0; !err && i < repetitions; i++) err = gcry_cipher_decrypt ( hd, outbuf, buflen, buf, buflen); stop_timer (); printf (" %s", elapsed_time ()); @@ -575,6 +600,8 @@ cipher_bench ( const char *algoname ) } putchar ('\n'); + gcry_free (buf); + gcry_free (outbuf); } @@ -961,6 +988,7 @@ main( int argc, char **argv ) int last_argc = -1; int no_blinding = 0; + if (argc) { argc--; argv++; } @@ -1006,9 +1034,25 @@ main( int argc, char **argv ) no_blinding = 1; argc--; argv++; } + else if (!strcmp (*argv, "--large-buffers")) + { + large_buffers = 1; + argc--; argv++; + } + else if (!strcmp (*argv, "--cipher-repetition")) + { + argc--; argv++; + if (argc) + { + cipher_repetitions = atoi(*argv); + argc--; argv++; + } + } } gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0); + if (cipher_repetitions < 1) + cipher_repetitions = 1; if ( !argc ) { |