diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-08-16 12:51:52 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-08-16 14:42:46 +0300 |
commit | 31e4b1a96a07e9a3698fcb7be0643a136ebb8e5c (patch) | |
tree | 2c85e865c291cca1a94b39193cd99bef09c2cef4 /cipher/blowfish.c | |
parent | 8d1faf56714598301580ce370e0bfa6d65e73644 (diff) | |
download | libgcrypt-31e4b1a96a07e9a3698fcb7be0643a136ebb8e5c.tar.gz |
blowfish: add ARMv6 assembly implementation
* cipher/Makefile.am: Add 'blowfish-armv6.S'.
* cipher/blowfish-armv6.S: New file.
* cipher/blowfish.c (USE_ARMV6_ASM): New macro.
[USE_ARMV6_ASM] (_gcry_blowfish_armv6_do_encrypt)
(_gcry_blowfish_armv6_encrypt_block)
(_gcry_blowfish_armv6_decrypt_block, _gcry_blowfish_armv6_ctr_enc)
(_gcry_blowfish_armv6_cbc_dec, _gcry_blowfish_armv6_cfb_dec): New
prototypes.
[USE_ARMV6_ASM] (do_encrypt, do_encrypt_block, do_decrypt_block)
(encrypt_block, decrypt_block): New functions.
(_gcry_blowfish_ctr_enc) [USE_ARMV6_ASM]: Use ARMv6 assembly function.
(_gcry_blowfish_cbc_dec) [USE_ARMV6_ASM]: Use ARMv6 assembly function.
(_gcry_blowfish_cfb_dec) [USE_ARMV6_ASM]: Use ARMv6 assembly function.
* configure.ac (blowfish) [arm]: Add 'blowfish-armv6.lo'.
--
Patch provides non-parallel implementations for small speed-up and 2-way
parallel implementations that gets accelerated on multi-issue CPUs (hand-tuned
for in-order dual-issue Cortex-A8). Unaligned access handling is done in
assembly.
For now, only enable this on little-endian systems as big-endian correctness
have not been tested yet.
Old vs new (Cortex-A8, Debian Wheezy/armhf):
ECB/Stream CBC CFB OFB CTR
--------------- --------------- --------------- --------------- ---------------
BLOWFISH 1.28x 1.16x 1.21x 2.16x 1.26x 1.86x 1.21x 1.25x 1.89x 1.96x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/blowfish.c')
-rw-r--r-- | cipher/blowfish.c | 115 |
1 files changed, 113 insertions, 2 deletions
diff --git a/cipher/blowfish.c b/cipher/blowfish.c index 69baebe0..fe4e2807 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -50,6 +50,20 @@ # define USE_AMD64_ASM 1 #endif +/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ +#undef USE_ARMV6_ASM +#if defined(__arm__) && defined(__ARMEL__) && \ + ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ + || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__)) +# if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARMV6_ASM 1 +# endif +#endif typedef struct { u32 s0[256]; @@ -305,7 +319,61 @@ static void decrypt_block (void *context, byte *outbuf, const byte *inbuf) _gcry_burn_stack (2*8); } -#else /*USE_AMD64_ASM*/ +#elif defined(USE_ARMV6_ASM) + +/* Assembly implementations of Blowfish. */ +extern void _gcry_blowfish_armv6_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, + u32 *ret_xr); + +extern void _gcry_blowfish_armv6_encrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +extern void _gcry_blowfish_armv6_decrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +/* These assembly implementations process two blocks in parallel. */ +extern void _gcry_blowfish_armv6_ctr_enc(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_blowfish_armv6_cbc_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_blowfish_armv6_cfb_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + _gcry_blowfish_armv6_do_encrypt (bc, ret_xl, ret_xr); +} + +static void +do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_armv6_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_armv6_decrypt_block (context, outbuf, inbuf); +} + +static void encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_encrypt_block (c, outbuf, inbuf); + _gcry_burn_stack (10*4); +} + +static void decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_decrypt_block (c, outbuf, inbuf); + _gcry_burn_stack (10*4); +} + +#else /*USE_ARMV6_ASM*/ #if BLOWFISH_ROUNDS != 16 static inline u32 @@ -527,7 +595,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) _gcry_burn_stack (64); } -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM&&!USE_ARMV6_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only @@ -562,6 +630,21 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } +#elif defined(USE_ARMV6_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_armv6_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } #endif for ( ;nblocks; nblocks-- ) @@ -615,6 +698,20 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } +#elif defined(USE_ARMV6_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_armv6_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } #endif for ( ;nblocks; nblocks-- ) @@ -664,6 +761,20 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } +#elif defined(USE_ARMV6_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_armv6_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } #endif for ( ;nblocks; nblocks-- ) |