diff options
-rw-r--r-- | cipher/Makefile.am | 8 | ||||
-rw-r--r-- | cipher/blowfish-arm.S (renamed from cipher/blowfish-armv6.S) | 183 | ||||
-rw-r--r-- | cipher/blowfish.c | 44 | ||||
-rw-r--r-- | cipher/camellia-arm.S (renamed from cipher/camellia-armv6.S) | 70 | ||||
-rw-r--r-- | cipher/camellia-glue.c | 14 | ||||
-rw-r--r-- | cipher/camellia.c | 8 | ||||
-rw-r--r-- | cipher/camellia.h | 10 | ||||
-rw-r--r-- | cipher/cast5-arm.S (renamed from cipher/cast5-armv6.S) | 173 | ||||
-rw-r--r-- | cipher/cast5.c | 46 | ||||
-rw-r--r-- | cipher/rijndael-arm.S (renamed from cipher/rijndael-armv6.S) | 22 | ||||
-rw-r--r-- | cipher/rijndael.c | 38 | ||||
-rw-r--r-- | cipher/twofish-arm.S (renamed from cipher/twofish-armv6.S) | 28 | ||||
-rw-r--r-- | cipher/twofish.c | 32 | ||||
-rw-r--r-- | configure.ac | 10 |
14 files changed, 361 insertions, 325 deletions
diff --git a/cipher/Makefile.am b/cipher/Makefile.am index e6b1745d..d7db9337 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -56,7 +56,7 @@ rmd.h EXTRA_libcipher_la_SOURCES = \ arcfour.c \ blowfish.c blowfish-amd64.S \ -cast5.c cast5-amd64.S cast5-armv6.S \ +cast5.c cast5-amd64.S cast5-arm.S \ crc.c \ des.c \ dsa.c \ @@ -68,7 +68,7 @@ gost28147.c gost.h \ gostr3411-94.c \ md4.c \ md5.c \ -rijndael.c rijndael-tables.h rijndael-amd64.S rijndael-armv6.S \ +rijndael.c rijndael-tables.h rijndael-amd64.S rijndael-arm.S \ rmd160.c \ rsa.c \ salsa20.c \ @@ -81,10 +81,10 @@ sha512.c sha512-armv7-neon.S \ stribog.c \ tiger.c \ whirlpool.c \ -twofish.c twofish-amd64.S twofish-armv6.S \ +twofish.c twofish-amd64.S twofish-arm.S \ rfc2268.c \ camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ - camellia-aesni-avx2-amd64.S camellia-armv6.S + camellia-aesni-avx2-amd64.S camellia-arm.S if ENABLE_O_FLAG_MUNGING o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g' diff --git a/cipher/blowfish-armv6.S b/cipher/blowfish-arm.S index eea879f2..43090d7d 100644 --- a/cipher/blowfish-armv6.S +++ b/cipher/blowfish-arm.S @@ -1,4 +1,4 @@ -/* blowfish-armv6.S - ARM assembly implementation of Blowfish cipher +/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * @@ -20,7 +20,7 @@ #include <config.h> -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +#if defined(__ARMEL__) #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS .text @@ -97,20 +97,33 @@ #define str_unaligned_host str_unaligned_le /* bswap on little-endian */ - #define host_to_be(reg) \ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ rev reg, reg; - #define be_to_host(reg) \ + #define be_to_host(reg, rtmp) \ rev reg, reg; #else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else #define ldr_unaligned_host ldr_unaligned_be #define str_unaligned_host str_unaligned_be /* nop on big-endian */ - #define host_to_be(reg) /*_*/ - #define be_to_host(reg) /*_*/ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ #endif -#define host_to_host(x) /*_*/ +#define host_to_host(x, y) /*_*/ /*********************************************************************** * 1-way blowfish @@ -159,31 +172,31 @@ F(RL0, RR0); \ F(RR0, RL0); -#define read_block_aligned(rin, offs, l0, r0, convert) \ +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ ldr l0, [rin, #((offs) + 0)]; \ ldr r0, [rin, #((offs) + 4)]; \ - convert(l0); \ - convert(r0); + convert(l0, rtmp); \ + convert(r0, rtmp); -#define write_block_aligned(rout, offs, l0, r0, convert) \ - convert(l0); \ - convert(r0); \ +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ str l0, [rout, #((offs) + 0)]; \ str r0, [rout, #((offs) + 4)]; #ifdef __ARM_FEATURE_UNALIGNED /* unaligned word reads allowed */ #define read_block(rin, offs, l0, r0, rtmp0) \ - read_block_aligned(rin, offs, l0, r0, host_to_be) + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ - write_block_aligned(rout, offs, r0, l0, be_to_host) + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) #define read_block_host(rin, offs, l0, r0, rtmp0) \ - read_block_aligned(rin, offs, l0, r0, host_to_host) + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ - write_block_aligned(rout, offs, r0, l0, host_to_host) + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) #else /* need to handle unaligned reads by byte reads */ #define read_block(rin, offs, l0, r0, rtmp0) \ @@ -193,7 +206,7 @@ ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ b 2f; \ 1:;\ - read_block_aligned(rin, offs, l0, r0, host_to_be); \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ 2:; #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ @@ -203,7 +216,7 @@ str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block_aligned(rout, offs, l0, r0, be_to_host); \ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ 2:; #define read_block_host(rin, offs, l0, r0, rtmp0) \ @@ -213,7 +226,7 @@ ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ b 2f; \ 1:;\ - read_block_aligned(rin, offs, l0, r0, host_to_host); \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ 2:; #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ @@ -259,10 +272,10 @@ __blowfish_enc_blk1: .size __blowfish_enc_blk1,.-__blowfish_enc_blk1; .align 8 -.globl _gcry_blowfish_armv6_do_encrypt -.type _gcry_blowfish_armv6_do_encrypt,%function; +.globl _gcry_blowfish_arm_do_encrypt +.type _gcry_blowfish_arm_do_encrypt,%function; -_gcry_blowfish_armv6_do_encrypt: +_gcry_blowfish_arm_do_encrypt: /* input: * %r0: ctx, CTX * %r1: u32 *ret_xl @@ -280,13 +293,13 @@ _gcry_blowfish_armv6_do_encrypt: str RL0, [%r2]; pop {%r4-%r11, %ip, %pc}; -.size _gcry_blowfish_armv6_do_encrypt,.-_gcry_blowfish_armv6_do_encrypt; +.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; .align 3 -.global _gcry_blowfish_armv6_encrypt_block -.type _gcry_blowfish_armv6_encrypt_block,%function; +.global _gcry_blowfish_arm_encrypt_block +.type _gcry_blowfish_arm_encrypt_block,%function; -_gcry_blowfish_armv6_encrypt_block: +_gcry_blowfish_arm_encrypt_block: /* input: * %r0: ctx, CTX * %r1: dst @@ -301,13 +314,13 @@ _gcry_blowfish_armv6_encrypt_block: write_block(%r1, 0, RR0, RL0, RT0, RT1); pop {%r4-%r11, %ip, %pc}; -.size _gcry_blowfish_armv6_encrypt_block,.-_gcry_blowfish_armv6_encrypt_block; +.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; .align 3 -.global _gcry_blowfish_armv6_decrypt_block -.type _gcry_blowfish_armv6_decrypt_block,%function; +.global _gcry_blowfish_arm_decrypt_block +.type _gcry_blowfish_arm_decrypt_block,%function; -_gcry_blowfish_armv6_decrypt_block: +_gcry_blowfish_arm_decrypt_block: /* input: * %r0: ctx, CTX * %r1: dst @@ -336,7 +349,7 @@ _gcry_blowfish_armv6_decrypt_block: write_block(%r1, 0, RR0, RL0, RT0, RT1); pop {%r4-%r11, %ip, %pc}; -.size _gcry_blowfish_armv6_decrypt_block,.-_gcry_blowfish_armv6_decrypt_block; +.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; /*********************************************************************** * 2-way blowfish @@ -441,22 +454,22 @@ _gcry_blowfish_armv6_decrypt_block: #define round_dec2(n, load_next_key) \ F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1); -#define read_block2_aligned(rin, l0, r0, l1, r1, convert) \ +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ ldr l0, [rin, #(0)]; \ ldr r0, [rin, #(4)]; \ - convert(l0); \ + convert(l0, rtmp); \ ldr l1, [rin, #(8)]; \ - convert(r0); \ + convert(r0, rtmp); \ ldr r1, [rin, #(12)]; \ - convert(l1); \ - convert(r1); + convert(l1, rtmp); \ + convert(r1, rtmp); -#define write_block2_aligned(rout, l0, r0, l1, r1, convert) \ - convert(l0); \ - convert(r0); \ - convert(l1); \ +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ str l0, [rout, #(0)]; \ - convert(r1); \ + convert(r1, rtmp); \ str r0, [rout, #(4)]; \ str l1, [rout, #(8)]; \ str r1, [rout, #(12)]; @@ -464,16 +477,16 @@ _gcry_blowfish_armv6_decrypt_block: #ifdef __ARM_FEATURE_UNALIGNED /* unaligned word reads allowed */ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_be) + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ - write_block2_aligned(rout, l0, r0, l1, r1, be_to_host) + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_host) + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ - write_block2_aligned(rout, l0, r0, l1, r1, host_to_host) + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) #else /* need to handle unaligned reads by byte reads */ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ @@ -485,7 +498,7 @@ _gcry_blowfish_armv6_decrypt_block: ldr_unaligned_be(r1, rin, 12, rtmp0); \ b 2f; \ 1:;\ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_be); \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ 2:; #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ @@ -497,7 +510,7 @@ _gcry_blowfish_armv6_decrypt_block: str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block2_aligned(rout, l0, r0, l1, r1, be_to_host); \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ 2:; #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ @@ -509,7 +522,7 @@ _gcry_blowfish_armv6_decrypt_block: ldr_unaligned_host(r1, rin, 12, rtmp0); \ b 2f; \ 1:;\ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_host); \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ 2:; #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ @@ -521,21 +534,21 @@ _gcry_blowfish_armv6_decrypt_block: str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block2_aligned(rout, l0, r0, l1, r1, host_to_host); \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ 2:; #endif .align 3 -.type _gcry_blowfish_armv6_enc_blk2,%function; +.type _gcry_blowfish_arm_enc_blk2,%function; -_gcry_blowfish_armv6_enc_blk2: +_gcry_blowfish_arm_enc_blk2: /* input: * preloaded: CTX * [RL0, RR0], [RL1, RR1]: src * output: * [RR0, RL0], [RR1, RL1]: dst */ - push {%lr}; + push {RT0,%lr}; add CTXs2, CTXs0, #(s2 - s0); mov RMASK, #(0xff << 2); /* byte mask */ @@ -550,19 +563,19 @@ _gcry_blowfish_armv6_enc_blk2: round_enc2(14, next_key); round_enc2(16, dummy); - host_to_be(RR0); - host_to_be(RL0); - host_to_be(RR1); - host_to_be(RL1); + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); - pop {%pc}; -.size _gcry_blowfish_armv6_enc_blk2,.-_gcry_blowfish_armv6_enc_blk2; + pop {RT0,%pc}; +.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; .align 3 -.globl _gcry_blowfish_armv6_cfb_dec; -.type _gcry_blowfish_armv6_cfb_dec,%function; +.globl _gcry_blowfish_arm_cfb_dec; +.type _gcry_blowfish_arm_cfb_dec,%function; -_gcry_blowfish_armv6_cfb_dec: +_gcry_blowfish_arm_cfb_dec: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -575,15 +588,15 @@ _gcry_blowfish_armv6_cfb_dec: /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ ldm %r3, {RL0, RR0}; - host_to_be(RL0); - host_to_be(RR0); + host_to_be(RL0, RT0); + host_to_be(RR0, RT0); read_block(%r2, 0, RL1, RR1, RT0); /* Update IV, load src[1] and save to iv[0] */ read_block_host(%r2, 8, %r5, %r6, RT0); stm %lr, {%r5, %r6}; - bl _gcry_blowfish_armv6_enc_blk2; + bl _gcry_blowfish_arm_enc_blk2; /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ /* %r1: dst, %r0: %src */ @@ -599,13 +612,13 @@ _gcry_blowfish_armv6_cfb_dec: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_blowfish_armv6_cfb_dec,.-_gcry_blowfish_armv6_cfb_dec; +.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; .align 3 -.globl _gcry_blowfish_armv6_ctr_enc; -.type _gcry_blowfish_armv6_ctr_enc,%function; +.globl _gcry_blowfish_arm_ctr_enc; +.type _gcry_blowfish_arm_ctr_enc,%function; -_gcry_blowfish_armv6_ctr_enc: +_gcry_blowfish_arm_ctr_enc: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -617,7 +630,7 @@ _gcry_blowfish_armv6_ctr_enc: mov %lr, %r3; /* Load IV (big => host endian) */ - read_block_aligned(%lr, 0, RL0, RR0, be_to_host); + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); /* Construct IVs */ adds RR1, RR0, #1; /* +1 */ @@ -626,9 +639,9 @@ _gcry_blowfish_armv6_ctr_enc: adc %r5, RL1, #0; /* Store new IV (host => big-endian) */ - write_block_aligned(%lr, 0, %r5, %r6, host_to_be); + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); - bl _gcry_blowfish_armv6_enc_blk2; + bl _gcry_blowfish_arm_enc_blk2; /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ /* %r1: dst, %r0: %src */ @@ -644,12 +657,12 @@ _gcry_blowfish_armv6_ctr_enc: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_blowfish_armv6_ctr_enc,.-_gcry_blowfish_armv6_ctr_enc; +.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; .align 3 -.type _gcry_blowfish_armv6_dec_blk2,%function; +.type _gcry_blowfish_arm_dec_blk2,%function; -_gcry_blowfish_armv6_dec_blk2: +_gcry_blowfish_arm_dec_blk2: /* input: * preloaded: CTX * [RL0, RR0], [RL1, RR1]: src @@ -669,20 +682,20 @@ _gcry_blowfish_armv6_dec_blk2: round_dec2(3, next_key); round_dec2(1, dummy); - host_to_be(RR0); - host_to_be(RL0); - host_to_be(RR1); - host_to_be(RL1); + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); b .Ldec_cbc_tail; .ltorg -.size _gcry_blowfish_armv6_dec_blk2,.-_gcry_blowfish_armv6_dec_blk2; +.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2; .align 3 -.globl _gcry_blowfish_armv6_cbc_dec; -.type _gcry_blowfish_armv6_cbc_dec,%function; +.globl _gcry_blowfish_arm_cbc_dec; +.type _gcry_blowfish_arm_cbc_dec,%function; -_gcry_blowfish_armv6_cbc_dec: +_gcry_blowfish_arm_cbc_dec: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -695,7 +708,7 @@ _gcry_blowfish_armv6_cbc_dec: /* dec_blk2 is only used by cbc_dec, jump directly in/out instead * of function call. */ - b _gcry_blowfish_armv6_dec_blk2; + b _gcry_blowfish_arm_dec_blk2; .Ldec_cbc_tail: /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ @@ -724,7 +737,7 @@ _gcry_blowfish_armv6_cbc_dec: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_blowfish_armv6_cbc_dec,.-_gcry_blowfish_armv6_cbc_dec; +.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ #endif /*__ARM_ARCH >= 6*/ diff --git a/cipher/blowfish.c b/cipher/blowfish.c index 2f739c8f..ed4e901d 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -50,11 +50,11 @@ # define USE_AMD64_ASM 1 #endif -/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ -#undef USE_ARMV6_ASM -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) # if (BLOWFISH_ROUNDS == 16) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) -# define USE_ARMV6_ASM 1 +# define USE_ARM_ASM 1 # endif #endif @@ -314,44 +314,44 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (2*8); } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) /* Assembly implementations of Blowfish. */ -extern void _gcry_blowfish_armv6_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, +extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, u32 *ret_xr); -extern void _gcry_blowfish_armv6_encrypt_block(BLOWFISH_context *c, byte *out, +extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out, const byte *in); -extern void _gcry_blowfish_armv6_decrypt_block(BLOWFISH_context *c, byte *out, +extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out, const byte *in); /* These assembly implementations process two blocks in parallel. */ -extern void _gcry_blowfish_armv6_ctr_enc(BLOWFISH_context *ctx, byte *out, +extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, byte *ctr); -extern void _gcry_blowfish_armv6_cbc_dec(BLOWFISH_context *ctx, byte *out, +extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); -extern void _gcry_blowfish_armv6_cfb_dec(BLOWFISH_context *ctx, byte *out, +extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); static void do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { - _gcry_blowfish_armv6_do_encrypt (bc, ret_xl, ret_xr); + _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr); } static void do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { - _gcry_blowfish_armv6_encrypt_block (context, outbuf, inbuf); + _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf); } static void do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { - _gcry_blowfish_armv6_decrypt_block (context, outbuf, inbuf); + _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf); } static unsigned int @@ -370,7 +370,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (10*4); } -#else /*USE_ARMV6_ASM*/ +#else /*USE_ARM_ASM*/ #if BLOWFISH_ROUNDS != 16 static inline u32 @@ -580,7 +580,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (64); } -#endif /*!USE_AMD64_ASM&&!USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only @@ -615,12 +615,12 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_blowfish_armv6_ctr_enc(ctx, outbuf, inbuf, ctr); + _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; @@ -683,12 +683,12 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_blowfish_armv6_cbc_dec(ctx, outbuf, inbuf, iv); + _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; @@ -746,12 +746,12 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_blowfish_armv6_cfb_dec(ctx, outbuf, inbuf, iv); + _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * BLOWFISH_BLOCKSIZE; diff --git a/cipher/camellia-armv6.S b/cipher/camellia-arm.S index 3544754b..820c46ea 100644 --- a/cipher/camellia-armv6.S +++ b/cipher/camellia-arm.S @@ -1,4 +1,4 @@ -/* camellia-armv6.S - ARM assembly implementation of Camellia cipher +/* camellia-arm.S - ARM assembly implementation of Camellia cipher * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * @@ -20,7 +20,7 @@ #include <config.h> -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +#if defined(__ARMEL__) #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS .text @@ -73,44 +73,56 @@ strb rtmp0, [rdst, #((offs) + 0)]; #ifdef __ARMEL__ - /* bswap on little-endian */ - #define host_to_be(reg) \ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ rev reg, reg; - #define be_to_host(reg) \ + #define be_to_host(reg, rtmp) \ rev reg, reg; #else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else /* nop on big-endian */ - #define host_to_be(reg) /*_*/ - #define be_to_host(reg) /*_*/ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ #endif -#define ldr_input_aligned_be(rin, a, b, c, d) \ +#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ ldr a, [rin, #0]; \ ldr b, [rin, #4]; \ - be_to_host(a); \ + be_to_host(a, rtmp); \ ldr c, [rin, #8]; \ - be_to_host(b); \ + be_to_host(b, rtmp); \ ldr d, [rin, #12]; \ - be_to_host(c); \ - be_to_host(d); + be_to_host(c, rtmp); \ + be_to_host(d, rtmp); -#define str_output_aligned_be(rout, a, b, c, d) \ - be_to_host(a); \ - be_to_host(b); \ +#define str_output_aligned_be(rout, a, b, c, d, rtmp) \ + be_to_host(a, rtmp); \ + be_to_host(b, rtmp); \ str a, [rout, #0]; \ - be_to_host(c); \ + be_to_host(c, rtmp); \ str b, [rout, #4]; \ - be_to_host(d); \ + be_to_host(d, rtmp); \ str c, [rout, #8]; \ str d, [rout, #12]; #ifdef __ARM_FEATURE_UNALIGNED /* unaligned word reads/writes allowed */ #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ - ldr_input_aligned_be(rin, ra, rb, rc, rd) + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ - str_output_aligned_be(rout, ra, rb, rc, rd) + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) #else /* need to handle unaligned reads/writes by byte reads */ #define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \ @@ -122,7 +134,7 @@ ldr_unaligned_be(rd, rin, 12, rtmp0); \ b 2f; \ 1:;\ - ldr_input_aligned_be(rin, ra, rb, rc, rd); \ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \ 2:; #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ @@ -134,7 +146,7 @@ str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \ b 2f; \ 1:;\ - str_output_aligned_be(rout, ra, rb, rc, rd); \ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \ 2:; #endif @@ -240,10 +252,10 @@ str_output_be(%r1, YL, YR, XL, XR, RT0, RT1); .align 3 -.global _gcry_camellia_armv6_encrypt_block -.type _gcry_camellia_armv6_encrypt_block,%function; +.global _gcry_camellia_arm_encrypt_block +.type _gcry_camellia_arm_encrypt_block,%function; -_gcry_camellia_armv6_encrypt_block: +_gcry_camellia_arm_encrypt_block: /* input: * %r0: keytable * %r1: dst @@ -285,13 +297,13 @@ _gcry_camellia_armv6_encrypt_block: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_camellia_armv6_encrypt_block,.-_gcry_camellia_armv6_encrypt_block; +.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; .align 3 -.global _gcry_camellia_armv6_decrypt_block -.type _gcry_camellia_armv6_decrypt_block,%function; +.global _gcry_camellia_arm_decrypt_block +.type _gcry_camellia_arm_decrypt_block,%function; -_gcry_camellia_armv6_decrypt_block: +_gcry_camellia_arm_decrypt_block: /* input: * %r0: keytable * %r1: dst @@ -330,7 +342,7 @@ _gcry_camellia_armv6_decrypt_block: b .Ldec_128; .ltorg -.size _gcry_camellia_armv6_decrypt_block,.-_gcry_camellia_armv6_decrypt_block; +.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; .data diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c index 29cb7a55..e6d40298 100644 --- a/cipher/camellia-glue.c +++ b/cipher/camellia-glue.c @@ -193,14 +193,14 @@ camellia_setkey(void *c, const byte *key, unsigned keylen) return 0; } -#ifdef USE_ARMV6_ASM +#ifdef USE_ARM_ASM /* Assembly implementations of CAST5. */ -extern void _gcry_camellia_armv6_encrypt_block(const KEY_TABLE_TYPE keyTable, +extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable, byte *outbuf, const byte *inbuf, const int keybits); -extern void _gcry_camellia_armv6_decrypt_block(const KEY_TABLE_TYPE keyTable, +extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable, byte *outbuf, const byte *inbuf, const int keybits); @@ -209,7 +209,7 @@ static void Camellia_EncryptBlock(const int keyBitLength, const KEY_TABLE_TYPE keyTable, unsigned char *cipherText) { - _gcry_camellia_armv6_encrypt_block(keyTable, cipherText, plaintext, + _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext, keyBitLength); } @@ -218,7 +218,7 @@ static void Camellia_DecryptBlock(const int keyBitLength, const KEY_TABLE_TYPE keyTable, unsigned char *plaintext) { - _gcry_camellia_armv6_decrypt_block(keyTable, plaintext, cipherText, + _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText, keyBitLength); } @@ -240,7 +240,7 @@ camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); } -#else /*USE_ARMV6_ASM*/ +#else /*USE_ARM_ASM*/ static unsigned int camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) @@ -276,7 +276,7 @@ camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); } -#endif /*!USE_ARMV6_ASM*/ +#endif /*!USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only intended for the bulk encryption feature of cipher.c. CTR is expected to be diff --git a/cipher/camellia.c b/cipher/camellia.c index 03510a35..9067246d 100644 --- a/cipher/camellia.c +++ b/cipher/camellia.c @@ -861,7 +861,7 @@ void camellia_setup192(const unsigned char *key, u32 *subkey) } -#ifndef USE_ARMV6_ASM +#ifndef USE_ARM_ASM /** * Stuff related to camellia encryption/decryption * @@ -1321,7 +1321,7 @@ void camellia_decrypt256(const u32 *subkey, u32 *blocks) return; } -#endif /*!USE_ARMV6_ASM*/ +#endif /*!USE_ARM_ASM*/ /*** @@ -1349,7 +1349,7 @@ void Camellia_Ekeygen(const int keyBitLength, } -#ifndef USE_ARMV6_ASM +#ifndef USE_ARM_ASM void Camellia_EncryptBlock(const int keyBitLength, const unsigned char *plaintext, const KEY_TABLE_TYPE keyTable, @@ -1410,4 +1410,4 @@ void Camellia_DecryptBlock(const int keyBitLength, PUTU32(plaintext + 8, tmp[2]); PUTU32(plaintext + 12, tmp[3]); } -#endif /*!USE_ARMV6_ASM*/ +#endif /*!USE_ARM_ASM*/ diff --git a/cipher/camellia.h b/cipher/camellia.h index 72f2d1fa..d0e3c18e 100644 --- a/cipher/camellia.h +++ b/cipher/camellia.h @@ -30,11 +30,11 @@ */ #ifdef HAVE_CONFIG_H #include <config.h> -/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ -# undef USE_ARMV6_ASM -# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +# undef USE_ARM_ASM +# if defined(__ARMEL__) # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS -# define USE_ARMV6_ASM 1 +# define USE_ARM_ASM 1 # endif # endif #endif @@ -70,7 +70,7 @@ void Camellia_Ekeygen(const int keyBitLength, const unsigned char *rawKey, KEY_TABLE_TYPE keyTable); -#ifndef USE_ARMV6_ASM +#ifndef USE_ARM_ASM void Camellia_EncryptBlock(const int keyBitLength, const unsigned char *plaintext, const KEY_TABLE_TYPE keyTable, diff --git a/cipher/cast5-armv6.S b/cipher/cast5-arm.S index 038fc4f6..ce7fa93a 100644 --- a/cipher/cast5-armv6.S +++ b/cipher/cast5-arm.S @@ -1,4 +1,4 @@ -/* cast5-armv6.S - ARM assembly implementation of CAST5 cipher +/* cast5-arm.S - ARM assembly implementation of CAST5 cipher * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * @@ -20,7 +20,7 @@ #include <config.h> -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +#if defined(__ARMEL__) #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS .text @@ -99,20 +99,33 @@ #define str_unaligned_host str_unaligned_le /* bswap on little-endian */ - #define host_to_be(reg) \ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ rev reg, reg; - #define be_to_host(reg) \ + #define be_to_host(reg, rtmp) \ rev reg, reg; #else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else #define ldr_unaligned_host ldr_unaligned_be #define str_unaligned_host str_unaligned_be /* nop on big-endian */ - #define host_to_be(reg) /*_*/ - #define be_to_host(reg) /*_*/ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ #endif -#define host_to_host(x) /*_*/ +#define host_to_host(x, y) /*_*/ /********************************************************************** 1-way cast5 @@ -167,31 +180,31 @@ #define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr) -#define read_block_aligned(rin, offs, l0, r0, convert) \ +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ ldr l0, [rin, #((offs) + 0)]; \ ldr r0, [rin, #((offs) + 4)]; \ - convert(l0); \ - convert(r0); + convert(l0, rtmp); \ + convert(r0, rtmp); -#define write_block_aligned(rout, offs, l0, r0, convert) \ - convert(l0); \ - convert(r0); \ +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ str l0, [rout, #((offs) + 0)]; \ str r0, [rout, #((offs) + 4)]; #ifdef __ARM_FEATURE_UNALIGNED /* unaligned word reads allowed */ #define read_block(rin, offs, l0, r0, rtmp0) \ - read_block_aligned(rin, offs, l0, r0, host_to_be) + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ - write_block_aligned(rout, offs, r0, l0, be_to_host) + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) #define read_block_host(rin, offs, l0, r0, rtmp0) \ - read_block_aligned(rin, offs, l0, r0, host_to_host) + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ - write_block_aligned(rout, offs, r0, l0, host_to_host) + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) #else /* need to handle unaligned reads by byte reads */ #define read_block(rin, offs, l0, r0, rtmp0) \ @@ -201,7 +214,7 @@ ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ b 2f; \ 1:;\ - read_block_aligned(rin, offs, l0, r0, host_to_be); \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ 2:; #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ @@ -211,7 +224,7 @@ str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block_aligned(rout, offs, l0, r0, be_to_host); \ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ 2:; #define read_block_host(rin, offs, l0, r0, rtmp0) \ @@ -221,7 +234,7 @@ ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ b 2f; \ 1:;\ - read_block_aligned(rin, offs, l0, r0, host_to_host); \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ 2:; #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ @@ -231,15 +244,15 @@ str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block_aligned(rout, offs, l0, r0, host_to_host); \ + write_block_aligned(rout, offs, l0, r0, host_to_host, rtmp0); \ 2:; #endif .align 3 -.globl _gcry_cast5_armv6_encrypt_block -.type _gcry_cast5_armv6_encrypt_block,%function; +.globl _gcry_cast5_arm_encrypt_block +.type _gcry_cast5_arm_encrypt_block,%function; -_gcry_cast5_armv6_encrypt_block: +_gcry_cast5_arm_encrypt_block: /* input: * %r0: CTX * %r1: dst @@ -279,13 +292,13 @@ _gcry_cast5_armv6_encrypt_block: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_cast5_armv6_encrypt_block,.-_gcry_cast5_armv6_encrypt_block; +.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block; .align 3 -.globl _gcry_cast5_armv6_decrypt_block -.type _gcry_cast5_armv6_decrypt_block,%function; +.globl _gcry_cast5_arm_decrypt_block +.type _gcry_cast5_arm_decrypt_block,%function; -_gcry_cast5_armv6_decrypt_block: +_gcry_cast5_arm_decrypt_block: /* input: * %r0: CTX * %r1: dst @@ -325,7 +338,7 @@ _gcry_cast5_armv6_decrypt_block: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_cast5_armv6_decrypt_block,.-_gcry_cast5_armv6_decrypt_block; +.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block; /********************************************************************** 2-way cast5 @@ -391,22 +404,22 @@ _gcry_cast5_armv6_decrypt_block: #define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr) -#define read_block2_aligned(rin, l0, r0, l1, r1, convert) \ +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ ldr l0, [rin, #(0)]; \ ldr r0, [rin, #(4)]; \ - convert(l0); \ + convert(l0, rtmp); \ ldr l1, [rin, #(8)]; \ - convert(r0); \ + convert(r0, rtmp); \ ldr r1, [rin, #(12)]; \ - convert(l1); \ - convert(r1); + convert(l1, rtmp); \ + convert(r1, rtmp); -#define write_block2_aligned(rout, l0, r0, l1, r1, convert) \ - convert(l0); \ - convert(r0); \ - convert(l1); \ +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ str l0, [rout, #(0)]; \ - convert(r1); \ + convert(r1, rtmp); \ str r0, [rout, #(4)]; \ str l1, [rout, #(8)]; \ str r1, [rout, #(12)]; @@ -414,16 +427,16 @@ _gcry_cast5_armv6_decrypt_block: #ifdef __ARM_FEATURE_UNALIGNED /* unaligned word reads allowed */ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_be) + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ - write_block2_aligned(rout, l0, r0, l1, r1, be_to_host) + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_host) + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ - write_block2_aligned(rout, l0, r0, l1, r1, host_to_host) + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) #else /* need to handle unaligned reads by byte reads */ #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ @@ -435,7 +448,7 @@ _gcry_cast5_armv6_decrypt_block: ldr_unaligned_be(r1, rin, 12, rtmp0); \ b 2f; \ 1:;\ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_be); \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ 2:; #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ @@ -447,7 +460,7 @@ _gcry_cast5_armv6_decrypt_block: str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block2_aligned(rout, l0, r0, l1, r1, be_to_host); \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ 2:; #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ @@ -459,7 +472,7 @@ _gcry_cast5_armv6_decrypt_block: ldr_unaligned_host(r1, rin, 12, rtmp0); \ b 2f; \ 1:;\ - read_block2_aligned(rin, l0, r0, l1, r1, host_to_host); \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ 2:; #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ @@ -471,14 +484,14 @@ _gcry_cast5_armv6_decrypt_block: str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ b 2f; \ 1:;\ - write_block2_aligned(rout, l0, r0, l1, r1, host_to_host); \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ 2:; #endif .align 3 -.type _gcry_cast5_armv6_enc_blk2,%function; +.type _gcry_cast5_arm_enc_blk2,%function; -_gcry_cast5_armv6_enc_blk2: +_gcry_cast5_arm_enc_blk2: /* input: * preloaded: CTX * [RL0, RR0], [RL1, RR1]: src @@ -510,20 +523,20 @@ _gcry_cast5_armv6_enc_blk2: enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy); enc_round2(15, F1, RR, RL, dummy, dummy, dummy); - host_to_be(RR0); - host_to_be(RL0); - host_to_be(RR1); - host_to_be(RL1); + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); pop {%pc}; .ltorg -.size _gcry_cast5_armv6_enc_blk2,.-_gcry_cast5_armv6_enc_blk2; +.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2; .align 3 -.globl _gcry_cast5_armv6_cfb_dec; -.type _gcry_cast5_armv6_cfb_dec,%function; +.globl _gcry_cast5_arm_cfb_dec; +.type _gcry_cast5_arm_cfb_dec,%function; -_gcry_cast5_armv6_cfb_dec: +_gcry_cast5_arm_cfb_dec: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -536,15 +549,15 @@ _gcry_cast5_armv6_cfb_dec: /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ ldm %r3, {RL0, RR0}; - host_to_be(RL0); - host_to_be(RR0); + host_to_be(RL0, RT1); + host_to_be(RR0, RT1); read_block(%r2, 0, RL1, RR1, %ip); /* Update IV, load src[1] and save to iv[0] */ read_block_host(%r2, 8, %r5, %r6, %r7); stm %lr, {%r5, %r6}; - bl _gcry_cast5_armv6_enc_blk2; + bl _gcry_cast5_arm_enc_blk2; /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ /* %r0: dst, %r1: %src */ @@ -560,13 +573,13 @@ _gcry_cast5_armv6_cfb_dec: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_cast5_armv6_cfb_dec,.-_gcry_cast5_armv6_cfb_dec; +.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec; .align 3 -.globl _gcry_cast5_armv6_ctr_enc; -.type _gcry_cast5_armv6_ctr_enc,%function; +.globl _gcry_cast5_arm_ctr_enc; +.type _gcry_cast5_arm_ctr_enc,%function; -_gcry_cast5_armv6_ctr_enc: +_gcry_cast5_arm_ctr_enc: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -578,7 +591,7 @@ _gcry_cast5_armv6_ctr_enc: mov %lr, %r3; /* Load IV (big => host endian) */ - read_block_aligned(%lr, 0, RL0, RR0, be_to_host); + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1); /* Construct IVs */ adds RR1, RR0, #1; /* +1 */ @@ -587,9 +600,9 @@ _gcry_cast5_armv6_ctr_enc: adc %r5, RL1, #0; /* Store new IV (host => big-endian) */ - write_block_aligned(%lr, 0, %r5, %r6, host_to_be); + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1); - bl _gcry_cast5_armv6_enc_blk2; + bl _gcry_cast5_arm_enc_blk2; /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ /* %r0: dst, %r1: %src */ @@ -605,12 +618,12 @@ _gcry_cast5_armv6_ctr_enc: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_cast5_armv6_ctr_enc,.-_gcry_cast5_armv6_ctr_enc; +.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc; .align 3 -.type _gcry_cast5_armv6_dec_blk2,%function; +.type _gcry_cast5_arm_dec_blk2,%function; -_gcry_cast5_armv6_dec_blk2: +_gcry_cast5_arm_dec_blk2: /* input: * preloaded: CTX * [RL0, RR0], [RL1, RR1]: src @@ -641,20 +654,20 @@ _gcry_cast5_armv6_dec_blk2: dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy); dec_round2(0, F1, RR, RL, dummy, dummy, dummy); - host_to_be(RR0); - host_to_be(RL0); - host_to_be(RR1); - host_to_be(RL1); + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); b .Ldec_cbc_tail; .ltorg -.size _gcry_cast5_armv6_dec_blk2,.-_gcry_cast5_armv6_dec_blk2; +.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2; .align 3 -.globl _gcry_cast5_armv6_cbc_dec; -.type _gcry_cast5_armv6_cbc_dec,%function; +.globl _gcry_cast5_arm_cbc_dec; +.type _gcry_cast5_arm_cbc_dec,%function; -_gcry_cast5_armv6_cbc_dec: +_gcry_cast5_arm_cbc_dec: /* input: * %r0: CTX * %r1: dst (2 blocks) @@ -667,7 +680,7 @@ _gcry_cast5_armv6_cbc_dec: /* dec_blk2 is only used by cbc_dec, jump directly in/out instead * of function call. */ - b _gcry_cast5_armv6_dec_blk2; + b _gcry_cast5_arm_dec_blk2; .Ldec_cbc_tail: /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ @@ -696,7 +709,7 @@ _gcry_cast5_armv6_cbc_dec: pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_cast5_armv6_cbc_dec,.-_gcry_cast5_armv6_cbc_dec; +.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec; #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ #endif /*__ARM_ARCH >= 6*/ diff --git a/cipher/cast5.c b/cipher/cast5.c index 92d9af8c..8c016d7c 100644 --- a/cipher/cast5.c +++ b/cipher/cast5.c @@ -52,11 +52,11 @@ # define USE_AMD64_ASM 1 #endif -/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ -#undef USE_ARMV6_ASM -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS -# define USE_ARMV6_ASM 1 +# define USE_ARM_ASM 1 # endif #endif @@ -65,7 +65,7 @@ typedef struct { u32 Km[16]; byte Kr[16]; -#ifdef USE_ARMV6_ASM +#ifdef USE_ARM_ASM u32 Kr_arm_enc[16 / sizeof(u32)]; u32 Kr_arm_dec[16 / sizeof(u32)]; #endif @@ -400,35 +400,35 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (2*8); } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) -/* ARMv6 assembly implementations of CAST5. */ -extern void _gcry_cast5_armv6_encrypt_block(CAST5_context *c, byte *outbuf, +/* ARM assembly implementations of CAST5. */ +extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); -extern void _gcry_cast5_armv6_decrypt_block(CAST5_context *c, byte *outbuf, +extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf, const byte *inbuf); /* These assembly implementations process two blocks in parallel. */ -extern void _gcry_cast5_armv6_ctr_enc(CAST5_context *ctx, byte *out, +extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr); -extern void _gcry_cast5_armv6_cbc_dec(CAST5_context *ctx, byte *out, +extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); -extern void _gcry_cast5_armv6_cfb_dec(CAST5_context *ctx, byte *out, +extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv); static void do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { - _gcry_cast5_armv6_encrypt_block (context, outbuf, inbuf); + _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf); } static void do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) { - _gcry_cast5_armv6_decrypt_block (context, outbuf, inbuf); + _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf); } static unsigned int @@ -447,7 +447,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (10*4); } -#else /*USE_ARMV6_ASM*/ +#else /*USE_ARM_ASM*/ #define F1(D,m,r) ( (I = ((m) + (D))), (I=rol(I,(r))), \ (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) ) @@ -556,7 +556,7 @@ decrypt_block (void *context, byte *outbuf, const byte *inbuf) return /*burn_stack*/ (20+4*sizeof(void*)); } -#endif /*!USE_ARMV6_ASM*/ +#endif /*!USE_ARM_ASM*/ /* Bulk encryption of complete blocks in CTR mode. This function is only @@ -592,12 +592,12 @@ _gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ /* TODO: use caching instead? */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_cast5_armv6_ctr_enc(ctx, outbuf, inbuf, ctr); + _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; @@ -660,12 +660,12 @@ _gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_cast5_armv6_cbc_dec(ctx, outbuf, inbuf, iv); + _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; @@ -722,12 +722,12 @@ _gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Use generic code to handle smaller chunks... */ } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) { /* Process data in 2 block chunks. */ while (nblocks >= 2) { - _gcry_cast5_armv6_cfb_dec(ctx, outbuf, inbuf, iv); + _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 2; outbuf += 2 * CAST5_BLOCKSIZE; @@ -936,7 +936,7 @@ do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen ) for(i=0; i < 16; i++ ) c->Kr[i] = k[i] & 0x1f; -#ifdef USE_ARMV6_ASM +#ifdef USE_ARM_ASM for (i = 0; i < 4; i++) { byte Kr_arm[4]; diff --git a/cipher/rijndael-armv6.S b/cipher/rijndael-arm.S index bbbfb0ea..2a747bfc 100644 --- a/cipher/rijndael-armv6.S +++ b/cipher/rijndael-arm.S @@ -1,4 +1,4 @@ -/* rijndael-armv6.S - ARM assembly implementation of AES cipher +/* rijndael-arm.S - ARM assembly implementation of AES cipher * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * @@ -20,7 +20,7 @@ #include <config.h> -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +#if defined(__ARMEL__) #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS .text @@ -211,10 +211,10 @@ addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .align 3 -.global _gcry_aes_armv6_encrypt_block -.type _gcry_aes_armv6_encrypt_block,%function; +.global _gcry_aes_arm_encrypt_block +.type _gcry_aes_arm_encrypt_block,%function; -_gcry_aes_armv6_encrypt_block: +_gcry_aes_arm_encrypt_block: /* input: * %r0: keysched, CTX * %r1: dst @@ -324,7 +324,7 @@ _gcry_aes_armv6_encrypt_block: lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); b .Lenc_done; -.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block; +.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; #define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ @@ -465,10 +465,10 @@ _gcry_aes_armv6_encrypt_block: addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); .align 3 -.global _gcry_aes_armv6_decrypt_block -.type _gcry_aes_armv6_decrypt_block,%function; +.global _gcry_aes_arm_decrypt_block +.type _gcry_aes_arm_decrypt_block,%function; -_gcry_aes_armv6_decrypt_block: +_gcry_aes_arm_decrypt_block: /* input: * %r0: keysched, CTX * %r1: dst @@ -573,7 +573,7 @@ _gcry_aes_armv6_decrypt_block: decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); b .Ldec_tail; -.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block; +.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block; .data @@ -850,4 +850,4 @@ _gcry_aes_armv6_decrypt_block: .long 0x745c6c48, 0x0000000c, 0x4257b8d0, 0x0000007d #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ -#endif /*__ARM_ARCH >= 6*/ +#endif /*__ARMEL__ */ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 85c1a41d..e9bb4f68 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -67,11 +67,11 @@ # define USE_AMD64_ASM 1 #endif -/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ -#undef USE_ARMV6_ASM -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) # ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS -# define USE_ARMV6_ASM 1 +# define USE_ARM_ASM 1 # endif #endif @@ -123,18 +123,18 @@ extern void _gcry_aes_amd64_decrypt_block(const void *keysched_dec, int rounds); #endif /*USE_AMD64_ASM*/ -#ifdef USE_ARMV6_ASM -/* ARMv6 assembly implementations of AES */ -extern void _gcry_aes_armv6_encrypt_block(const void *keysched_enc, +#ifdef USE_ARM_ASM +/* ARM assembly implementations of AES */ +extern void _gcry_aes_arm_encrypt_block(const void *keysched_enc, unsigned char *out, const unsigned char *in, int rounds); -extern void _gcry_aes_armv6_decrypt_block(const void *keysched_dec, +extern void _gcry_aes_arm_decrypt_block(const void *keysched_dec, unsigned char *out, const unsigned char *in, int rounds); -#endif /*USE_ARMV6_ASM*/ +#endif /*USE_ARM_ASM*/ @@ -567,8 +567,8 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx, { #ifdef USE_AMD64_ASM _gcry_aes_amd64_encrypt_block(ctx->keyschenc, b, a, ctx->rounds); -#elif defined(USE_ARMV6_ASM) - _gcry_aes_armv6_encrypt_block(ctx->keyschenc, b, a, ctx->rounds); +#elif defined(USE_ARM_ASM) + _gcry_aes_arm_encrypt_block(ctx->keyschenc, b, a, ctx->rounds); #else #define rk (ctx->keyschenc) int rounds = ctx->rounds; @@ -651,7 +651,7 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx, *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]); *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]); #undef rk -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ } @@ -659,7 +659,7 @@ static void do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { -#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM) +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) /* BX and AX are not necessary correctly aligned. Thus we might need to copy them here. We try to align to a 16 bytes. */ if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) @@ -680,7 +680,7 @@ do_encrypt (const RIJNDAEL_context *ctx, memcpy (bx, b.b, 16); } else -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ { do_encrypt_aligned (ctx, bx, ax); } @@ -1694,8 +1694,8 @@ do_decrypt_aligned (RIJNDAEL_context *ctx, { #ifdef USE_AMD64_ASM _gcry_aes_amd64_decrypt_block(ctx->keyschdec, b, a, ctx->rounds); -#elif defined(USE_ARMV6_ASM) - _gcry_aes_armv6_decrypt_block(ctx->keyschdec, b, a, ctx->rounds); +#elif defined(USE_ARM_ASM) + _gcry_aes_arm_decrypt_block(ctx->keyschdec, b, a, ctx->rounds); #else #define rk (ctx->keyschdec) int rounds = ctx->rounds; @@ -1779,7 +1779,7 @@ do_decrypt_aligned (RIJNDAEL_context *ctx, *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]); *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]); #undef rk -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ } @@ -1794,7 +1794,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) ctx->decryption_prepared = 1; } -#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM) +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) /* BX and AX are not necessary correctly aligned. Thus we might need to copy them here. We try to align to a 16 bytes. */ if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) @@ -1815,7 +1815,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) memcpy (bx, b.b, 16); } else -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ { do_decrypt_aligned (ctx, bx, ax); } diff --git a/cipher/twofish-armv6.S b/cipher/twofish-arm.S index b76ab37c..ee22f568 100644 --- a/cipher/twofish-armv6.S +++ b/cipher/twofish-arm.S @@ -1,4 +1,4 @@ -/* twofish-armv6.S - ARM assembly implementation of Twofish cipher +/* twofish-arm.S - ARM assembly implementation of Twofish cipher * * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * @@ -20,7 +20,7 @@ #include <config.h> -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +#if defined(__ARMEL__) #ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS .text @@ -257,10 +257,10 @@ ror1(RD); .align 3 -.global _gcry_twofish_armv6_encrypt_block -.type _gcry_twofish_armv6_encrypt_block,%function; +.global _gcry_twofish_arm_encrypt_block +.type _gcry_twofish_arm_encrypt_block,%function; -_gcry_twofish_armv6_encrypt_block: +_gcry_twofish_arm_encrypt_block: /* input: * %r0: ctx * %r1: dst @@ -303,16 +303,15 @@ _gcry_twofish_armv6_encrypt_block: str_output_le(%r1, RC, RD, RA, RB, RT0, RT1); - pop {%r4-%r11, %ip, %lr}; - bx %lr; + pop {%r4-%r11, %ip, %pc}; .ltorg -.size _gcry_twofish_armv6_encrypt_block,.-_gcry_twofish_armv6_encrypt_block; +.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block; .align 3 -.global _gcry_twofish_armv6_decrypt_block -.type _gcry_twofish_armv6_decrypt_block,%function; +.global _gcry_twofish_arm_decrypt_block +.type _gcry_twofish_arm_decrypt_block,%function; -_gcry_twofish_armv6_decrypt_block: +_gcry_twofish_arm_decrypt_block: /* input: * %r0: ctx * %r1: dst @@ -357,9 +356,8 @@ _gcry_twofish_armv6_decrypt_block: str_output_le(%r1, RA, RB, RC, RD, RT0, RT1); - pop {%r4-%r11, %ip, %lr}; - bx %lr; -.size _gcry_twofish_armv6_decrypt_block,.-_gcry_twofish_armv6_decrypt_block; + pop {%r4-%r11, %ip, %pc}; +.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block; #endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ -#endif /*__ARM_ARCH >= 6*/ +#endif /*__ARMEL__*/ diff --git a/cipher/twofish.c b/cipher/twofish.c index d2cabbe8..086df76f 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -57,11 +57,11 @@ # define USE_AMD64_ASM 1 #endif -/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ -#undef USE_ARMV6_ASM -#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) # if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) -# define USE_ARMV6_ASM 1 +# define USE_ARM_ASM 1 # endif #endif @@ -754,16 +754,16 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) /* Assembly implementations of Twofish. */ -extern void _gcry_twofish_armv6_encrypt_block(const TWOFISH_context *c, +extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in); -extern void _gcry_twofish_armv6_decrypt_block(const TWOFISH_context *c, +extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in); -#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ /* Macros to compute the g() function in the encryption and decryption * rounds. G1 is the straight g() function; G2 includes the 8-bit @@ -837,17 +837,17 @@ twofish_encrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (4*sizeof (void*)); } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_armv6_encrypt_block(ctx, out, in); + _gcry_twofish_arm_encrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } -#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ static void do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in) @@ -889,7 +889,7 @@ twofish_encrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (24+3*sizeof (void*)); } -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ /* Decrypt one block. in and out may be the same. */ @@ -904,17 +904,17 @@ twofish_decrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (4*sizeof (void*)); } -#elif defined(USE_ARMV6_ASM) +#elif defined(USE_ARM_ASM) static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_armv6_decrypt_block(ctx, out, in); + _gcry_twofish_arm_decrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } -#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/ static void do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in) @@ -957,7 +957,7 @@ twofish_decrypt (void *context, byte *out, const byte *in) return /*burn_stack*/ (24+3*sizeof (void*)); } -#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/ diff --git a/configure.ac b/configure.ac index a1ffdc7d..5b7ba0d8 100644 --- a/configure.ac +++ b/configure.ac @@ -1424,7 +1424,7 @@ if test "$found" = "1" ; then ;; arm*-*-*) # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-armv6.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish-arm.lo" ;; esac fi @@ -1441,7 +1441,7 @@ if test "$found" = "1" ; then ;; arm*-*-*) # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-armv6.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5-arm.lo" ;; esac fi @@ -1464,7 +1464,7 @@ if test "$found" = "1" ; then ;; arm*-*-*) # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv6.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-arm.lo" ;; esac fi @@ -1481,7 +1481,7 @@ if test "$found" = "1" ; then ;; arm*-*-*) # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-armv6.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish-arm.lo" ;; esac fi @@ -1524,7 +1524,7 @@ if test "$found" = "1" ; then case "${host}" in arm*-*-*) # Build with the assembly implementation - GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-armv6.lo" + GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia-arm.lo" ;; esac |