Enable assembler optimizations on earlier ARM cores

* cipher/blowfish-armv6.S => cipher/blowfish-arm.S: adapt to pre-armv6 CPUs. * cipher/blowfish.c: enable assembly on armv4/armv5 little-endian CPUs. * cipher/camellia-armv6.S => cipher/camellia-arm.S: adapt to pre-armv6 CPUs. * cipher/camellia.c, cipher-camellia-glue.c: enable assembly on armv4/armv5 little-endian CPUs. * cipher/cast5-armv6.S => cipher/cast5-arm.S: adapt to pre-armv6 CPUs. * cipher/cast5.c: enable assembly on armv4/armv5 little-endian CPUs. * cipher/rijndael-armv6.S => cipher/rijndael-arm.S: adapt to pre-armv6 CPUs. * cipher/rijndael.c: enable assembly on armv4/armv5 little-endian CPUs. * cipher/twofish-armv6.S => cipher/twofish-arm.S: adapt to pre-armv6 CPUs. * cipher/twofish.c: enable assembly on armv4/armv5 little-endian CPUs. -- Our ARMv6 assembly optimized code can be easily adapted to earlier CPUs. The only incompatible place is rev instruction used to do byte swapping. Replace it on <= ARMv6 with a series of 4 instructions. Compare: ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- AES 620ms 610ms 650ms 680ms 620ms 630ms 660ms 660ms 630ms 630ms CAMELLIA128 720ms 720ms 780ms 790ms 770ms 760ms 780ms 780ms 770ms 760ms CAMELLIA256 910ms 910ms 970ms 970ms 960ms 950ms 970ms 970ms 960ms 950ms CAST5 820ms 820ms 930ms 920ms 890ms 860ms 930ms 920ms 880ms 890ms BLOWFISH 550ms 560ms 650ms 660ms 630ms 600ms 660ms 650ms 610ms 620ms ECB/Stream CBC CFB OFB CTR --------------- --------------- --------------- --------------- --------------- AES 130ms 140ms 180ms 200ms 160ms 170ms 190ms 200ms 170ms 170ms CAMELLIA128 150ms 160ms 210ms 220ms 200ms 190ms 210ms 220ms 190ms 190ms CAMELLIA256 180ms 180ms 260ms 240ms 240ms 230ms 250ms 250ms 230ms 230ms CAST5 170ms 160ms 270ms 120ms 240ms 130ms 260ms 270ms 130ms 120ms BLOWFISH 160ms 150ms 260ms 110ms 230ms 120ms 250ms 260ms 110ms 120ms Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> [ jk: in camellia.h and twofish.c, USE_ARMV6_ASM => USE_ARM_ASM ] [ jk: fix blowfish-arm.S when __ARM_FEATURE_UNALIGNED defined ] [ jk: in twofish.S remove defined(HAVE_ARM_ARCH_V6) ] [ jk: ARMv6 => ARM in comments ]
author: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> 2013-10-23 18:36:10 +0300
committer: Jussi Kivilinna <jussi.kivilinna@iki.fi> 2013-10-23 18:36:10 +0300
commit: 2fd83faa876d0be91ab7884b1a9eaa7793559eb9 (patch)
tree: bb0111c6e9c86ad003aac73764a726b406629265 /cipher/rijndael.c
parent: 0b39fce7e3ce6761d6bd5195d093ec6857edb7c2 (diff)
download: libgcrypt-2fd83faa876d0be91ab7884b1a9eaa7793559eb9.tar.gz
1 files changed, 19 insertions, 19 deletions
diff --git a/cipher/rijndael.c b/cipher/rijndael.c
index 85c1a41d..e9bb4f68 100644
--- a/cipher/rijndael.c
+++ b/cipher/rijndael.c
@@ -67,11 +67,11 @@
 # define USE_AMD64_ASM 1
 #endif
 
-/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
-#undef USE_ARMV6_ASM
-#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
 # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
-#  define USE_ARMV6_ASM 1
+#  define USE_ARM_ASM 1
 # endif
 #endif
 
@@ -123,18 +123,18 @@ extern void _gcry_aes_amd64_decrypt_block(const void *keysched_dec,
 					  int rounds);
 #endif /*USE_AMD64_ASM*/
 
-#ifdef USE_ARMV6_ASM
-/* ARMv6 assembly implementations of AES */
-extern void _gcry_aes_armv6_encrypt_block(const void *keysched_enc,
+#ifdef USE_ARM_ASM
+/* ARM assembly implementations of AES */
+extern void _gcry_aes_arm_encrypt_block(const void *keysched_enc,
 					  unsigned char *out,
 					  const unsigned char *in,
 					  int rounds);
 
-extern void _gcry_aes_armv6_decrypt_block(const void *keysched_dec,
+extern void _gcry_aes_arm_decrypt_block(const void *keysched_dec,
 					  unsigned char *out,
 					  const unsigned char *in,
 					  int rounds);
-#endif /*USE_ARMV6_ASM*/
+#endif /*USE_ARM_ASM*/
 
 
 
@@ -567,8 +567,8 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
 {
 #ifdef USE_AMD64_ASM
   _gcry_aes_amd64_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
-#elif defined(USE_ARMV6_ASM)
-  _gcry_aes_armv6_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
+#elif defined(USE_ARM_ASM)
+  _gcry_aes_arm_encrypt_block(ctx->keyschenc, b, a, ctx->rounds);
 #else
 #define rk (ctx->keyschenc)
   int rounds = ctx->rounds;
@@ -651,7 +651,7 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx,
   *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
   *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
 #undef rk
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 }
 
 
@@ -659,7 +659,7 @@ static void
 do_encrypt (const RIJNDAEL_context *ctx,
             unsigned char *bx, const unsigned char *ax)
 {
-#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM)
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   /* BX and AX are not necessary correctly aligned.  Thus we might
      need to copy them here.  We try to align to a 16 bytes.  */
   if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
@@ -680,7 +680,7 @@ do_encrypt (const RIJNDAEL_context *ctx,
       memcpy (bx, b.b, 16);
     }
   else
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
     {
       do_encrypt_aligned (ctx, bx, ax);
     }
@@ -1694,8 +1694,8 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
 {
 #ifdef USE_AMD64_ASM
   _gcry_aes_amd64_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
-#elif defined(USE_ARMV6_ASM)
-  _gcry_aes_armv6_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
+#elif defined(USE_ARM_ASM)
+  _gcry_aes_arm_decrypt_block(ctx->keyschdec, b, a, ctx->rounds);
 #else
 #define rk  (ctx->keyschdec)
   int rounds = ctx->rounds;
@@ -1779,7 +1779,7 @@ do_decrypt_aligned (RIJNDAEL_context *ctx,
   *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
   *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
 #undef rk
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
 }
 
 
@@ -1794,7 +1794,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
       ctx->decryption_prepared = 1;
     }
 
-#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM)
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
   /* BX and AX are not necessary correctly aligned.  Thus we might
      need to copy them here.  We try to align to a 16 bytes. */
   if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
@@ -1815,7 +1815,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
       memcpy (bx, b.b, 16);
     }
   else
-#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
     {
       do_decrypt_aligned (ctx, bx, ax);
     }
author	Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>	2013-10-23 18:36:10 +0300
committer	Jussi Kivilinna <jussi.kivilinna@iki.fi>	2013-10-23 18:36:10 +0300
commit	2fd83faa876d0be91ab7884b1a9eaa7793559eb9 (patch)
tree	bb0111c6e9c86ad003aac73764a726b406629265 /cipher/rijndael.c
parent	0b39fce7e3ce6761d6bd5195d093ec6857edb7c2 (diff)
download	libgcrypt-2fd83faa876d0be91ab7884b1a9eaa7793559eb9.tar.gz