summaryrefslogtreecommitdiff
path: root/cipher/blowfish.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-08-16 12:51:52 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-08-16 14:42:46 +0300
commit31e4b1a96a07e9a3698fcb7be0643a136ebb8e5c (patch)
tree2c85e865c291cca1a94b39193cd99bef09c2cef4 /cipher/blowfish.c
parent8d1faf56714598301580ce370e0bfa6d65e73644 (diff)
downloadlibgcrypt-31e4b1a96a07e9a3698fcb7be0643a136ebb8e5c.tar.gz
blowfish: add ARMv6 assembly implementation
* cipher/Makefile.am: Add 'blowfish-armv6.S'. * cipher/blowfish-armv6.S: New file. * cipher/blowfish.c (USE_ARMV6_ASM): New macro. [USE_ARMV6_ASM] (_gcry_blowfish_armv6_do_encrypt) (_gcry_blowfish_armv6_encrypt_block) (_gcry_blowfish_armv6_decrypt_block, _gcry_blowfish_armv6_ctr_enc) (_gcry_blowfish_armv6_cbc_dec, _gcry_blowfish_armv6_cfb_dec): New prototypes. [USE_ARMV6_ASM] (do_encrypt, do_encrypt_block, do_decrypt_block) (encrypt_block, decrypt_block): New functions. (_gcry_blowfish_ctr_enc) [USE_ARMV6_ASM]: Use ARMv6 assembly function. (_gcry_blowfish_cbc_dec) [USE_ARMV6_ASM]: Use ARMv6 assembly function. (_gcry_blowfish_cfb_dec) [USE_ARMV6_ASM]: Use ARMv6 assembly function. * configure.ac (blowfish) [arm]: Add 'blowfish-armv6.lo'. -- Patch provides non-parallel implementations for small speed-up and 2-way parallel implementations that gets accelerated on multi-issue CPUs (hand-tuned for in-order dual-issue Cortex-A8). Unaligned access handling is done in assembly. For now, only enable this on little-endian systems as big-endian correctness have not been tested yet. Old vs new (Cortex-A8, Debian Wheezy/armhf): ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- BLOWFISH 1.28x 1.16x 1.21x 2.16x 1.26x 1.86x 1.21x 1.25x 1.89x 1.96x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/blowfish.c')
-rw-r--r--cipher/blowfish.c115
1 files changed, 113 insertions, 2 deletions
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index 69baebe0..fe4e2807 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -50,6 +50,20 @@
# define USE_AMD64_ASM 1
#endif
+/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
+#undef USE_ARMV6_ASM
+#if defined(__arm__) && defined(__ARMEL__) && \
+ ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
+ || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
+ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7EM__))
+# if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARMV6_ASM 1
+# endif
+#endif
typedef struct {
u32 s0[256];
@@ -305,7 +319,61 @@ static void decrypt_block (void *context, byte *outbuf, const byte *inbuf)
_gcry_burn_stack (2*8);
}
-#else /*USE_AMD64_ASM*/
+#elif defined(USE_ARMV6_ASM)
+
+/* Assembly implementations of Blowfish. */
+extern void _gcry_blowfish_armv6_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+ u32 *ret_xr);
+
+extern void _gcry_blowfish_armv6_encrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+extern void _gcry_blowfish_armv6_decrypt_block(BLOWFISH_context *c, byte *out,
+ const byte *in);
+
+/* These assembly implementations process two blocks in parallel. */
+extern void _gcry_blowfish_armv6_ctr_enc(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_blowfish_armv6_cbc_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_blowfish_armv6_cfb_dec(BLOWFISH_context *ctx, byte *out,
+ const byte *in, byte *iv);
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+ _gcry_blowfish_armv6_do_encrypt (bc, ret_xl, ret_xr);
+}
+
+static void
+do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_armv6_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+ _gcry_blowfish_armv6_decrypt_block (context, outbuf, inbuf);
+}
+
+static void encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_encrypt_block (c, outbuf, inbuf);
+ _gcry_burn_stack (10*4);
+}
+
+static void decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+ BLOWFISH_context *c = (BLOWFISH_context *) context;
+ do_decrypt_block (c, outbuf, inbuf);
+ _gcry_burn_stack (10*4);
+}
+
+#else /*USE_ARMV6_ASM*/
#if BLOWFISH_ROUNDS != 16
static inline u32
@@ -527,7 +595,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf)
_gcry_burn_stack (64);
}
-#endif /*!USE_AMD64_ASM*/
+#endif /*!USE_AMD64_ASM&&!USE_ARMV6_ASM*/
/* Bulk encryption of complete blocks in CTR mode. This function is only
@@ -562,6 +630,21 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
/* Use generic code to handle smaller chunks... */
/* TODO: use caching instead? */
}
+#elif defined(USE_ARMV6_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_armv6_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ /* TODO: use caching instead? */
+ }
#endif
for ( ;nblocks; nblocks-- )
@@ -615,6 +698,20 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Use generic code to handle smaller chunks... */
}
+#elif defined(USE_ARMV6_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_armv6_cbc_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
#endif
for ( ;nblocks; nblocks-- )
@@ -664,6 +761,20 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
/* Use generic code to handle smaller chunks... */
}
+#elif defined(USE_ARMV6_ASM)
+ {
+ /* Process data in 2 block chunks. */
+ while (nblocks >= 2)
+ {
+ _gcry_blowfish_armv6_cfb_dec(ctx, outbuf, inbuf, iv);
+
+ nblocks -= 2;
+ outbuf += 2 * BLOWFISH_BLOCKSIZE;
+ inbuf += 2 * BLOWFISH_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
#endif
for ( ;nblocks; nblocks-- )