diff options
-rw-r--r-- | cipher/Makefile.am | 2 | ||||
-rw-r--r-- | cipher/rijndael-armv6.S | 860 | ||||
-rw-r--r-- | cipher/rijndael.c | 48 | ||||
-rw-r--r-- | configure.ac | 32 |
4 files changed, 933 insertions, 9 deletions
diff --git a/cipher/Makefile.am b/cipher/Makefile.am index 75ad9875..931675ce 100644 --- a/cipher/Makefile.am +++ b/cipher/Makefile.am @@ -64,7 +64,7 @@ ecc.c \ idea.c \ md4.c \ md5.c \ -rijndael.c rijndael-tables.h rijndael-amd64.S \ +rijndael.c rijndael-tables.h rijndael-amd64.S rijndael-armv6.S \ rmd160.c \ rsa.c \ salsa20.c \ diff --git a/cipher/rijndael-armv6.S b/cipher/rijndael-armv6.S new file mode 100644 index 00000000..e778a94e --- /dev/null +++ b/cipher/rijndael-armv6.S @@ -0,0 +1,860 @@ +/* rijndael-armv6.S - ARM assembly implementation of AES cipher + * + * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#if defined(__arm__) && defined(__ARMEL__) && \ + ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ + || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__)) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* register macros */ +#define CTX %r0 +#define RTAB %lr +#define RMASK %ip + +#define RA %r4 +#define RB %r5 +#define RC %r6 +#define RD %r7 + +#define RNA %r8 +#define RNB %r9 +#define RNC %r10 +#define RND %r11 + +#define RT0 %r1 +#define RT1 %r2 +#define RT2 %r3 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +/*********************************************************************** + * ARM assembly implementation of the AES cipher + ***********************************************************************/ +#define preload_first_key(round, ra) \ + ldr ra, [CTX, #(((round) * 16) + 0 * 4)]; + +#define dummy(round, ra) /* nothing */ + +#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldm CTX, {rna, rnb, rnc, rnd}; \ + eor ra, rna; \ + eor rb, rnb; \ + eor rc, rnc; \ + preload_key(1, rna); \ + eor rd, rnd; + +#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#3; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 3); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 3); \ + ldr RT0, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd, lsl#3; \ + ldr ra, [RTAB, ra]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 3); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 3); \ + eor rnb, rnb, ra, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#3; \ + ldr rd, [RTAB, rd]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 3); \ + eor rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 3); \ + eor rna, rna, rd, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#3; \ + ldr rc, [RTAB, rc]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 3); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 3); \ + eor rnd, rnd, rc, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rb, RMASK, rb, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, RT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rb, [RTAB, rb]; \ + \ + eor rnd, rnd, RT2, ror #16; \ + preload_key((next_r) + 1, ra); \ + eor rnc, rnc, rb, ror #8; + +#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra, lsl#3; \ + and RT1, RMASK, ra, lsr#(8 - 3); \ + and RT2, RMASK, ra, lsr#(16 - 3); \ + ldr rna, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 3); \ + ldr rnd, [RTAB, RT1]; \ + and RT0, RMASK, rd, lsl#3; \ + ldr rnc, [RTAB, RT2]; \ + mov rnd, rnd, ror #24; \ + ldr rnb, [RTAB, ra]; \ + and RT1, RMASK, rd, lsr#(8 - 3); \ + mov rnc, rnc, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 3); \ + mov rnb, rnb, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 3); \ + ldr RT1, [RTAB, RT1]; \ + \ + orr rnd, rnd, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#3; \ + ldr rd, [RTAB, rd]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 3); \ + orr rnb, rnb, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 3); \ + orr rna, rna, rd, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 3); \ + ldr RT1, [RTAB, RT1]; \ + \ + orr rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#3; \ + ldr rc, [RTAB, rc]; \ + orr rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 3); \ + orr rna, rna, RT2, ror #16; \ + ldr RT0, [RTAB, RT0]; \ + and RT2, RMASK, rb, lsr#(16 - 3); \ + ldr RT1, [RTAB, RT1]; \ + orr rnd, rnd, rc, ror #8; \ + ldr RT2, [RTAB, RT2]; \ + and rb, RMASK, rb, lsr#(24 - 3); \ + ldr rb, [RTAB, rb]; \ + \ + orr rnb, rnb, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnd, rnd, RT2, ror #16; \ + orr rnc, rnc, rb, ror #8; + +#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add CTX, #(((round) + 1) * 16); \ + add RTAB, #4; \ + do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.align 3 +.global _gcry_aes_armv6_encrypt_block +.type _gcry_aes_armv6_encrypt_block,%function; + +_gcry_aes_armv6_encrypt_block: + /* input: + * %r0: keysched, CTX + * %r1: dst + * %r2: src + * %r3: number of rounds.. 10, 12 or 14 + */ + push {%r4-%r11, %ip, %lr}; + + /* read input block */ +#ifndef __ARM_FEATURE_UNALIGNED + /* test if src is unaligned */ + tst %r2, #3; + beq 1f; + + /* unaligned load */ + ldr_unaligned_le(RA, %r2, 0, RNA); + ldr_unaligned_le(RB, %r2, 4, RNB); + ldr_unaligned_le(RC, %r2, 8, RNA); + ldr_unaligned_le(RD, %r2, 12, RNB); + b 2f; +.ltorg +1: +#endif + /* aligned load */ + ldm %r2, {RA, RB, RC, RD}; +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif +2: + sub %sp, #16; + + ldr RTAB, =.LtableE0; + + str %r1, [%sp, #4]; /* dst */ + mov RMASK, #0xff; + str %r3, [%sp, #8]; /* nrounds */ + mov RMASK, RMASK, lsl#3; /* byte mask */ + + firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND); + encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + + ldr RT0, [%sp, #8]; /* nrounds */ + cmp RT0, #12; + bge .Lenc_not_128; + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD); + +.Lenc_done: + ldr RT0, [%sp, #4]; /* dst */ + add %sp, #16; + + /* store output block */ +#ifndef __ARM_FEATURE_UNALIGNED + /* test if dst is unaligned */ + tst RT0, #3; + beq 1f; + + /* unaligned store */ + str_unaligned_le(RA, RT0, 0, RNA, RNB); + str_unaligned_le(RB, RT0, 4, RNA, RNB); + str_unaligned_le(RC, RT0, 8, RNA, RNB); + str_unaligned_le(RD, RT0, 12, RNA, RNB); + b 2f; +.ltorg +1: +#endif + /* aligned store */ +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stm RT0, {RA, RB, RC, RD}; +2: + pop {%r4-%r11, %ip, %pc}; + +.ltorg +.Lenc_not_128: + beq .Lenc_192 + + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; + +.ltorg +.Lenc_192: + encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + b .Lenc_done; +.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block; + +#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \ + ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \ + eor ra, rna; \ + ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \ + eor rb, rnb; \ + ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \ + eor rc, rnc; \ + preload_first_key((round) - 1, rna); \ + eor rd, rnd; + +#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \ + \ + and RT0, RMASK, ra, lsl#3; \ + ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \ + and RT1, RMASK, ra, lsr#(8 - 3); \ + ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \ + and RT2, RMASK, ra, lsr#(16 - 3); \ + ldr RT0, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rna, rna, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rb, lsl#3; \ + ldr ra, [RTAB, ra]; \ + \ + eor rnb, rnb, RT1, ror #24; \ + and RT1, RMASK, rb, lsr#(8 - 3); \ + eor rnc, rnc, RT2, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 3); \ + eor rnd, rnd, ra, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rb, RMASK, rb, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnb, rnb, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#3; \ + ldr rb, [RTAB, rb]; \ + \ + eor rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 3); \ + eor rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 3); \ + eor rna, rna, rb, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd, lsl#3; \ + ldr rc, [RTAB, rc]; \ + \ + eor rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 3); \ + eor rna, rna, RT2, ror #16; \ + and RT2, RMASK, rd, lsr#(16 - 3); \ + eor rnb, rnb, rc, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rd, RMASK, rd, lsr#(24 - 3); \ + \ + ldr RT1, [RTAB, RT1]; \ + eor rnd, rnd, RT0; \ + ldr RT2, [RTAB, RT2]; \ + eor rna, rna, RT1, ror #24; \ + ldr rd, [RTAB, rd]; \ + \ + eor rnb, rnb, RT2, ror #16; \ + preload_key((next_r) - 1, ra); \ + eor rnc, rnc, rd, ror #8; + +#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + and RT0, RMASK, ra, lsl#3; \ + and RT1, RMASK, ra, lsr#(8 - 3); \ + and RT2, RMASK, ra, lsr#(16 - 3); \ + ldr rna, [RTAB, RT0]; \ + and ra, RMASK, ra, lsr#(24 - 3); \ + ldr rnb, [RTAB, RT1]; \ + and RT0, RMASK, rb, lsl#3; \ + ldr rnc, [RTAB, RT2]; \ + mov rnb, rnb, ror #24; \ + ldr rnd, [RTAB, ra]; \ + and RT1, RMASK, rb, lsr#(8 - 3); \ + mov rnc, rnc, ror #16; \ + and RT2, RMASK, rb, lsr#(16 - 3); \ + mov rnd, rnd, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rb, RMASK, rb, lsr#(24 - 3); \ + ldr RT1, [RTAB, RT1]; \ + \ + orr rnb, rnb, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rc, lsl#3; \ + ldr rb, [RTAB, rb]; \ + orr rnc, rnc, RT1, ror #24; \ + and RT1, RMASK, rc, lsr#(8 - 3); \ + orr rnd, rnd, RT2, ror #16; \ + and RT2, RMASK, rc, lsr#(16 - 3); \ + orr rna, rna, rb, ror #8; \ + ldr RT0, [RTAB, RT0]; \ + and rc, RMASK, rc, lsr#(24 - 3); \ + ldr RT1, [RTAB, RT1]; \ + \ + orr rnc, rnc, RT0; \ + ldr RT2, [RTAB, RT2]; \ + and RT0, RMASK, rd, lsl#3; \ + ldr rc, [RTAB, rc]; \ + orr rnd, rnd, RT1, ror #24; \ + and RT1, RMASK, rd, lsr#(8 - 3); \ + orr rna, rna, RT2, ror #16; \ + ldr RT0, [RTAB, RT0]; \ + and RT2, RMASK, rd, lsr#(16 - 3); \ + ldr RT1, [RTAB, RT1]; \ + orr rnb, rnb, rc, ror #8; \ + ldr RT2, [RTAB, RT2]; \ + and rd, RMASK, rd, lsr#(24 - 3); \ + ldr rd, [RTAB, rd]; \ + \ + orr rnd, rnd, RT0; \ + orr rna, rna, RT1, ror #24; \ + orr rnb, rnb, RT2, ror #16; \ + orr rnc, rnc, rd, ror #8; + +#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); + +#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \ + do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key); + +#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \ + add RTAB, #4; \ + do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \ + addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy); + +.align 3 +.global _gcry_aes_armv6_decrypt_block +.type _gcry_aes_armv6_decrypt_block,%function; + +_gcry_aes_armv6_decrypt_block: + /* input: + * %r0: keysched, CTX + * %r1: dst + * %r2: src + * %r3: number of rounds.. 10, 12 or 14 + */ + push {%r4-%r11, %ip, %lr}; + + /* read input block */ +#ifndef __ARM_FEATURE_UNALIGNED + /* test if src is unaligned */ + tst %r2, #3; + beq 1f; + + /* unaligned load */ + ldr_unaligned_le(RA, %r2, 0, RNA); + ldr_unaligned_le(RB, %r2, 4, RNB); + ldr_unaligned_le(RC, %r2, 8, RNA); + ldr_unaligned_le(RD, %r2, 12, RNB); + b 2f; +.ltorg +1: +#endif + /* aligned load */ + ldm %r2, {RA, RB, RC, RD}; +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif +2: + sub %sp, #16; + + ldr RTAB, =.LtableD0; + + mov RMASK, #0xff; + str %r1, [%sp, #4]; /* dst */ + mov RMASK, RMASK, lsl#3; /* byte mask */ + + cmp %r3, #12; + bge .Ldec_256; + + firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND); +.Ldec_tail: + decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy); + lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD); + + ldr RT0, [%sp, #4]; /* dst */ + add %sp, #16; + + /* store output block */ +#ifndef __ARM_FEATURE_UNALIGNED + /* test if dst is unaligned */ + tst RT0, #3; + beq 1f; + + /* unaligned store */ + str_unaligned_le(RA, RT0, 0, RNA, RNB); + str_unaligned_le(RB, RT0, 4, RNA, RNB); + str_unaligned_le(RC, RT0, 8, RNA, RNB); + str_unaligned_le(RD, RT0, 12, RNA, RNB); + b 2f; +.ltorg +1: +#endif + /* aligned store */ +#ifndef __ARMEL__ + rev RA, RA; + rev RB, RB; + rev RC, RC; + rev RD, RD; +#endif + /* write output block */ + stm RT0, {RA, RB, RC, RD}; +2: + pop {%r4-%r11, %ip, %pc}; + +.ltorg +.Ldec_256: + beq .Ldec_192; + + firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; + +.ltorg +.Ldec_192: + firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND); + decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key); + decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key); + + b .Ldec_tail; +.size _gcry_aes_armv6_encrypt_block,.-_gcry_aes_armv6_encrypt_block; + +.data + +/* Encryption tables */ +.align 5 +.type .LtableE0, %object +.type .LtableEs0, %object +.LtableE0: +.long 0xa56363c6 +.LtableEs0: +.long 0x00000063, 0x847c7cf8, 0x0000007c +.long 0x997777ee, 0x00000077, 0x8d7b7bf6, 0x0000007b +.long 0x0df2f2ff, 0x000000f2, 0xbd6b6bd6, 0x0000006b +.long 0xb16f6fde, 0x0000006f, 0x54c5c591, 0x000000c5 +.long 0x50303060, 0x00000030, 0x03010102, 0x00000001 +.long 0xa96767ce, 0x00000067, 0x7d2b2b56, 0x0000002b +.long 0x19fefee7, 0x000000fe, 0x62d7d7b5, 0x000000d7 +.long 0xe6abab4d, 0x000000ab, 0x9a7676ec, 0x00000076 +.long 0x45caca8f, 0x000000ca, 0x9d82821f, 0x00000082 +.long 0x40c9c989, 0x000000c9, 0x877d7dfa, 0x0000007d +.long 0x15fafaef, 0x000000fa, 0xeb5959b2, 0x00000059 +.long 0xc947478e, 0x00000047, 0x0bf0f0fb, 0x000000f0 +.long 0xecadad41, 0x000000ad, 0x67d4d4b3, 0x000000d4 +.long 0xfda2a25f, 0x000000a2, 0xeaafaf45, 0x000000af +.long 0xbf9c9c23, 0x0000009c, 0xf7a4a453, 0x000000a4 +.long 0x967272e4, 0x00000072, 0x5bc0c09b, 0x000000c0 +.long 0xc2b7b775, 0x000000b7, 0x1cfdfde1, 0x000000fd +.long 0xae93933d, 0x00000093, 0x6a26264c, 0x00000026 +.long 0x5a36366c, 0x00000036, 0x413f3f7e, 0x0000003f +.long 0x02f7f7f5, 0x000000f7, 0x4fcccc83, 0x000000cc +.long 0x5c343468, 0x00000034, 0xf4a5a551, 0x000000a5 +.long 0x34e5e5d1, 0x000000e5, 0x08f1f1f9, 0x000000f1 +.long 0x937171e2, 0x00000071, 0x73d8d8ab, 0x000000d8 +.long 0x53313162, 0x00000031, 0x3f15152a, 0x00000015 +.long 0x0c040408, 0x00000004, 0x52c7c795, 0x000000c7 +.long 0x65232346, 0x00000023, 0x5ec3c39d, 0x000000c3 +.long 0x28181830, 0x00000018, 0xa1969637, 0x00000096 +.long 0x0f05050a, 0x00000005, 0xb59a9a2f, 0x0000009a +.long 0x0907070e, 0x00000007, 0x36121224, 0x00000012 +.long 0x9b80801b, 0x00000080, 0x3de2e2df, 0x000000e2 +.long 0x26ebebcd, 0x000000eb, 0x6927274e, 0x00000027 +.long 0xcdb2b27f, 0x000000b2, 0x9f7575ea, 0x00000075 +.long 0x1b090912, 0x00000009, 0x9e83831d, 0x00000083 +.long 0x742c2c58, 0x0000002c, 0x2e1a1a34, 0x0000001a +.long 0x2d1b1b36, 0x0000001b, 0xb26e6edc, 0x0000006e +.long 0xee5a5ab4, 0x0000005a, 0xfba0a05b, 0x000000a0 +.long 0xf65252a4, 0x00000052, 0x4d3b3b76, 0x0000003b +.long 0x61d6d6b7, 0x000000d6, 0xceb3b37d, 0x000000b3 +.long 0x7b292952, 0x00000029, 0x3ee3e3dd, 0x000000e3 +.long 0x712f2f5e, 0x0000002f, 0x97848413, 0x00000084 +.long 0xf55353a6, 0x00000053, 0x68d1d1b9, 0x000000d1 +.long 0x00000000, 0x00000000, 0x2cededc1, 0x000000ed +.long 0x60202040, 0x00000020, 0x1ffcfce3, 0x000000fc +.long 0xc8b1b179, 0x000000b1, 0xed5b5bb6, 0x0000005b +.long 0xbe6a6ad4, 0x0000006a, 0x46cbcb8d, 0x000000cb +.long 0xd9bebe67, 0x000000be, 0x4b393972, 0x00000039 +.long 0xde4a4a94, 0x0000004a, 0xd44c4c98, 0x0000004c +.long 0xe85858b0, 0x00000058, 0x4acfcf85, 0x000000cf +.long 0x6bd0d0bb, 0x000000d0, 0x2aefefc5, 0x000000ef +.long 0xe5aaaa4f, 0x000000aa, 0x16fbfbed, 0x000000fb +.long 0xc5434386, 0x00000043, 0xd74d4d9a, 0x0000004d +.long 0x55333366, 0x00000033, 0x94858511, 0x00000085 +.long 0xcf45458a, 0x00000045, 0x10f9f9e9, 0x000000f9 +.long 0x06020204, 0x00000002, 0x817f7ffe, 0x0000007f +.long 0xf05050a0, 0x00000050, 0x443c3c78, 0x0000003c +.long 0xba9f9f25, 0x0000009f, 0xe3a8a84b, 0x000000a8 +.long 0xf35151a2, 0x00000051, 0xfea3a35d, 0x000000a3 +.long 0xc0404080, 0x00000040, 0x8a8f8f05, 0x0000008f +.long 0xad92923f, 0x00000092, 0xbc9d9d21, 0x0000009d +.long 0x48383870, 0x00000038, 0x04f5f5f1, 0x000000f5 +.long 0xdfbcbc63, 0x000000bc, 0xc1b6b677, 0x000000b6 +.long 0x75dadaaf, 0x000000da, 0x63212142, 0x00000021 +.long 0x30101020, 0x00000010, 0x1affffe5, 0x000000ff +.long 0x0ef3f3fd, 0x000000f3, 0x6dd2d2bf, 0x000000d2 +.long 0x4ccdcd81, 0x000000cd, 0x140c0c18, 0x0000000c +.long 0x35131326, 0x00000013, 0x2fececc3, 0x000000ec +.long 0xe15f5fbe, 0x0000005f, 0xa2979735, 0x00000097 +.long 0xcc444488, 0x00000044, 0x3917172e, 0x00000017 +.long 0x57c4c493, 0x000000c4, 0xf2a7a755, 0x000000a7 +.long 0x827e7efc, 0x0000007e, 0x473d3d7a, 0x0000003d +.long 0xac6464c8, 0x00000064, 0xe75d5dba, 0x0000005d +.long 0x2b191932, 0x00000019, 0x957373e6, 0x00000073 +.long 0xa06060c0, 0x00000060, 0x98818119, 0x00000081 +.long 0xd14f4f9e, 0x0000004f, 0x7fdcdca3, 0x000000dc +.long 0x66222244, 0x00000022, 0x7e2a2a54, 0x0000002a +.long 0xab90903b, 0x00000090, 0x8388880b, 0x00000088 +.long 0xca46468c, 0x00000046, 0x29eeeec7, 0x000000ee +.long 0xd3b8b86b, 0x000000b8, 0x3c141428, 0x00000014 +.long 0x79dedea7, 0x000000de, 0xe25e5ebc, 0x0000005e +.long 0x1d0b0b16, 0x0000000b, 0x76dbdbad, 0x000000db +.long 0x3be0e0db, 0x000000e0, 0x56323264, 0x00000032 +.long 0x4e3a3a74, 0x0000003a, 0x1e0a0a14, 0x0000000a +.long 0xdb494992, 0x00000049, 0x0a06060c, 0x00000006 +.long 0x6c242448, 0x00000024, 0xe45c5cb8, 0x0000005c +.long 0x5dc2c29f, 0x000000c2, 0x6ed3d3bd, 0x000000d3 +.long 0xefacac43, 0x000000ac, 0xa66262c4, 0x00000062 +.long 0xa8919139, 0x00000091, 0xa4959531, 0x00000095 +.long 0x37e4e4d3, 0x000000e4, 0x8b7979f2, 0x00000079 +.long 0x32e7e7d5, 0x000000e7, 0x43c8c88b, 0x000000c8 +.long 0x5937376e, 0x00000037, 0xb76d6dda, 0x0000006d +.long 0x8c8d8d01, 0x0000008d, 0x64d5d5b1, 0x000000d5 +.long 0xd24e4e9c, 0x0000004e, 0xe0a9a949, 0x000000a9 +.long 0xb46c6cd8, 0x0000006c, 0xfa5656ac, 0x00000056 +.long 0x07f4f4f3, 0x000000f4, 0x25eaeacf, 0x000000ea +.long 0xaf6565ca, 0x00000065, 0x8e7a7af4, 0x0000007a +.long 0xe9aeae47, 0x000000ae, 0x18080810, 0x00000008 +.long 0xd5baba6f, 0x000000ba, 0x887878f0, 0x00000078 +.long 0x6f25254a, 0x00000025, 0x722e2e5c, 0x0000002e +.long 0x241c1c38, 0x0000001c, 0xf1a6a657, 0x000000a6 +.long 0xc7b4b473, 0x000000b4, 0x51c6c697, 0x000000c6 +.long 0x23e8e8cb, 0x000000e8, 0x7cdddda1, 0x000000dd +.long 0x9c7474e8, 0x00000074, 0x211f1f3e, 0x0000001f +.long 0xdd4b4b96, 0x0000004b, 0xdcbdbd61, 0x000000bd +.long 0x868b8b0d, 0x0000008b, 0x858a8a0f, 0x0000008a +.long 0x907070e0, 0x00000070, 0x423e3e7c, 0x0000003e +.long 0xc4b5b571, 0x000000b5, 0xaa6666cc, 0x00000066 +.long 0xd8484890, 0x00000048, 0x05030306, 0x00000003 +.long 0x01f6f6f7, 0x000000f6, 0x120e0e1c, 0x0000000e +.long 0xa36161c2, 0x00000061, 0x5f35356a, 0x00000035 +.long 0xf95757ae, 0x00000057, 0xd0b9b969, 0x000000b9 +.long 0x91868617, 0x00000086, 0x58c1c199, 0x000000c1 +.long 0x271d1d3a, 0x0000001d, 0xb99e9e27, 0x0000009e +.long 0x38e1e1d9, 0x000000e1, 0x13f8f8eb, 0x000000f8 +.long 0xb398982b, 0x00000098, 0x33111122, 0x00000011 +.long 0xbb6969d2, 0x00000069, 0x70d9d9a9, 0x000000d9 +.long 0x898e8e07, 0x0000008e, 0xa7949433, 0x00000094 +.long 0xb69b9b2d, 0x0000009b, 0x221e1e3c, 0x0000001e +.long 0x92878715, 0x00000087, 0x20e9e9c9, 0x000000e9 +.long 0x49cece87, 0x000000ce, 0xff5555aa, 0x00000055 +.long 0x78282850, 0x00000028, 0x7adfdfa5, 0x000000df +.long 0x8f8c8c03, 0x0000008c, 0xf8a1a159, 0x000000a1 +.long 0x80898909, 0x00000089, 0x170d0d1a, 0x0000000d +.long 0xdabfbf65, 0x000000bf, 0x31e6e6d7, 0x000000e6 +.long 0xc6424284, 0x00000042, 0xb86868d0, 0x00000068 +.long 0xc3414182, 0x00000041, 0xb0999929, 0x00000099 +.long 0x772d2d5a, 0x0000002d, 0x110f0f1e, 0x0000000f +.long 0xcbb0b07b, 0x000000b0, 0xfc5454a8, 0x00000054 +.long 0xd6bbbb6d, 0x000000bb, 0x3a16162c, 0x00000016 + +/* Decryption tables */ +.align 5 +.type .LtableD0, %object +.type .LtableDs0, %object +.LtableD0: +.long 0x50a7f451 +.LtableDs0: +.long 0x00000052, 0x5365417e, 0x00000009 +.long 0xc3a4171a, 0x0000006a, 0x965e273a, 0x000000d5 +.long 0xcb6bab3b, 0x00000030, 0xf1459d1f, 0x00000036 +.long 0xab58faac, 0x000000a5, 0x9303e34b, 0x00000038 +.long 0x55fa3020, 0x000000bf, 0xf66d76ad, 0x00000040 +.long 0x9176cc88, 0x000000a3, 0x254c02f5, 0x0000009e +.long 0xfcd7e54f, 0x00000081, 0xd7cb2ac5, 0x000000f3 +.long 0x80443526, 0x000000d7, 0x8fa362b5, 0x000000fb +.long 0x495ab1de, 0x0000007c, 0x671bba25, 0x000000e3 +.long 0x980eea45, 0x00000039, 0xe1c0fe5d, 0x00000082 +.long 0x02752fc3, 0x0000009b, 0x12f04c81, 0x0000002f +.long 0xa397468d, 0x000000ff, 0xc6f9d36b, 0x00000087 +.long 0xe75f8f03, 0x00000034, 0x959c9215, 0x0000008e +.long 0xeb7a6dbf, 0x00000043, 0xda595295, 0x00000044 +.long 0x2d83bed4, 0x000000c4, 0xd3217458, 0x000000de +.long 0x2969e049, 0x000000e9, 0x44c8c98e, 0x000000cb +.long 0x6a89c275, 0x00000054, 0x78798ef4, 0x0000007b +.long 0x6b3e5899, 0x00000094, 0xdd71b927, 0x00000032 +.long 0xb64fe1be, 0x000000a6, 0x17ad88f0, 0x000000c2 +.long 0x66ac20c9, 0x00000023, 0xb43ace7d, 0x0000003d +.long 0x184adf63, 0x000000ee, 0x82311ae5, 0x0000004c +.long 0x60335197, 0x00000095, 0x457f5362, 0x0000000b +.long 0xe07764b1, 0x00000042, 0x84ae6bbb, 0x000000fa +.long 0x1ca081fe, 0x000000c3, 0x942b08f9, 0x0000004e +.long 0x58684870, 0x00000008, 0x19fd458f, 0x0000002e +.long 0x876cde94, 0x000000a1, 0xb7f87b52, 0x00000066 +.long 0x23d373ab, 0x00000028, 0xe2024b72, 0x000000d9 +.long 0x578f1fe3, 0x00000024, 0x2aab5566, 0x000000b2 +.long 0x0728ebb2, 0x00000076, 0x03c2b52f, 0x0000005b +.long 0x9a7bc586, 0x000000a2, 0xa50837d3, 0x00000049 +.long 0xf2872830, 0x0000006d, 0xb2a5bf23, 0x0000008b +.long 0xba6a0302, 0x000000d1, 0x5c8216ed, 0x00000025 +.long 0x2b1ccf8a, 0x00000072, 0x92b479a7, 0x000000f8 +.long 0xf0f207f3, 0x000000f6, 0xa1e2694e, 0x00000064 +.long 0xcdf4da65, 0x00000086, 0xd5be0506, 0x00000068 +.long 0x1f6234d1, 0x00000098, 0x8afea6c4, 0x00000016 +.long 0x9d532e34, 0x000000d4, 0xa055f3a2, 0x000000a4 +.long 0x32e18a05, 0x0000005c, 0x75ebf6a4, 0x000000cc +.long 0x39ec830b, 0x0000005d, 0xaaef6040, 0x00000065 +.long 0x069f715e, 0x000000b6, 0x51106ebd, 0x00000092 +.long 0xf98a213e, 0x0000006c, 0x3d06dd96, 0x00000070 +.long 0xae053edd, 0x00000048, 0x46bde64d, 0x00000050 +.long 0xb58d5491, 0x000000fd, 0x055dc471, 0x000000ed +.long 0x6fd40604, 0x000000b9, 0xff155060, 0x000000da +.long 0x24fb9819, 0x0000005e, 0x97e9bdd6, 0x00000015 +.long 0xcc434089, 0x00000046, 0x779ed967, 0x00000057 +.long 0xbd42e8b0, 0x000000a7, 0x888b8907, 0x0000008d +.long 0x385b19e7, 0x0000009d, 0xdbeec879, 0x00000084 +.long 0x470a7ca1, 0x00000090, 0xe90f427c, 0x000000d8 +.long 0xc91e84f8, 0x000000ab, 0x00000000, 0x00000000 +.long 0x83868009, 0x0000008c, 0x48ed2b32, 0x000000bc +.long 0xac70111e, 0x000000d3, 0x4e725a6c, 0x0000000a +.long 0xfbff0efd, 0x000000f7, 0x5638850f, 0x000000e4 +.long 0x1ed5ae3d, 0x00000058, 0x27392d36, 0x00000005 +.long 0x64d90f0a, 0x000000b8, 0x21a65c68, 0x000000b3 +.long 0xd1545b9b, 0x00000045, 0x3a2e3624, 0x00000006 +.long 0xb1670a0c, 0x000000d0, 0x0fe75793, 0x0000002c +.long 0xd296eeb4, 0x0000001e, 0x9e919b1b, 0x0000008f +.long 0x4fc5c080, 0x000000ca, 0xa220dc61, 0x0000003f +.long 0x694b775a, 0x0000000f, 0x161a121c, 0x00000002 +.long 0x0aba93e2, 0x000000c1, 0xe52aa0c0, 0x000000af +.long 0x43e0223c, 0x000000bd, 0x1d171b12, 0x00000003 +.long 0x0b0d090e, 0x00000001, 0xadc78bf2, 0x00000013 +.long 0xb9a8b62d, 0x0000008a, 0xc8a91e14, 0x0000006b +.long 0x8519f157, 0x0000003a, 0x4c0775af, 0x00000091 +.long 0xbbdd99ee, 0x00000011, 0xfd607fa3, 0x00000041 +.long 0x9f2601f7, 0x0000004f, 0xbcf5725c, 0x00000067 +.long 0xc53b6644, 0x000000dc, 0x347efb5b, 0x000000ea +.long 0x7629438b, 0x00000097, 0xdcc623cb, 0x000000f2 +.long 0x68fcedb6, 0x000000cf, 0x63f1e4b8, 0x000000ce +.long 0xcadc31d7, 0x000000f0, 0x10856342, 0x000000b4 +.long 0x40229713, 0x000000e6, 0x2011c684, 0x00000073 +.long 0x7d244a85, 0x00000096, 0xf83dbbd2, 0x000000ac +.long 0x1132f9ae, 0x00000074, 0x6da129c7, 0x00000022 +.long 0x4b2f9e1d, 0x000000e7, 0xf330b2dc, 0x000000ad +.long 0xec52860d, 0x00000035, 0xd0e3c177, 0x00000085 +.long 0x6c16b32b, 0x000000e2, 0x99b970a9, 0x000000f9 +.long 0xfa489411, 0x00000037, 0x2264e947, 0x000000e8 +.long 0xc48cfca8, 0x0000001c, 0x1a3ff0a0, 0x00000075 +.long 0xd82c7d56, 0x000000df, 0xef903322, 0x0000006e +.long 0xc74e4987, 0x00000047, 0xc1d138d9, 0x000000f1 +.long 0xfea2ca8c, 0x0000001a, 0x360bd498, 0x00000071 +.long 0xcf81f5a6, 0x0000001d, 0x28de7aa5, 0x00000029 +.long 0x268eb7da, 0x000000c5, 0xa4bfad3f, 0x00000089 +.long 0xe49d3a2c, 0x0000006f, 0x0d927850, 0x000000b7 +.long 0x9bcc5f6a, 0x00000062, 0x62467e54, 0x0000000e +.long 0xc2138df6, 0x000000aa, 0xe8b8d890, 0x00000018 +.long 0x5ef7392e, 0x000000be, 0xf5afc382, 0x0000001b +.long 0xbe805d9f, 0x000000fc, 0x7c93d069, 0x00000056 +.long 0xa92dd56f, 0x0000003e, 0xb31225cf, 0x0000004b +.long 0x3b99acc8, 0x000000c6, 0xa77d1810, 0x000000d2 +.long 0x6e639ce8, 0x00000079, 0x7bbb3bdb, 0x00000020 +.long 0x097826cd, 0x0000009a, 0xf418596e, 0x000000db +.long 0x01b79aec, 0x000000c0, 0xa89a4f83, 0x000000fe +.long 0x656e95e6, 0x00000078, 0x7ee6ffaa, 0x000000cd +.long 0x08cfbc21, 0x0000005a, 0xe6e815ef, 0x000000f4 +.long 0xd99be7ba, 0x0000001f, 0xce366f4a, 0x000000dd +.long 0xd4099fea, 0x000000a8, 0xd67cb029, 0x00000033 +.long 0xafb2a431, 0x00000088, 0x31233f2a, 0x00000007 +.long 0x3094a5c6, 0x000000c7, 0xc066a235, 0x00000031 +.long 0x37bc4e74, 0x000000b1, 0xa6ca82fc, 0x00000012 +.long 0xb0d090e0, 0x00000010, 0x15d8a733, 0x00000059 +.long 0x4a9804f1, 0x00000027, 0xf7daec41, 0x00000080 +.long 0x0e50cd7f, 0x000000ec, 0x2ff69117, 0x0000005f +.long 0x8dd64d76, 0x00000060, 0x4db0ef43, 0x00000051 +.long 0x544daacc, 0x0000007f, 0xdf0496e4, 0x000000a9 +.long 0xe3b5d19e, 0x00000019, 0x1b886a4c, 0x000000b5 +.long 0xb81f2cc1, 0x0000004a, 0x7f516546, 0x0000000d +.long 0x04ea5e9d, 0x0000002d, 0x5d358c01, 0x000000e5 +.long 0x737487fa, 0x0000007a, 0x2e410bfb, 0x0000009f +.long 0x5a1d67b3, 0x00000093, 0x52d2db92, 0x000000c9 +.long 0x335610e9, 0x0000009c, 0x1347d66d, 0x000000ef +.long 0x8c61d79a, 0x000000a0, 0x7a0ca137, 0x000000e0 +.long 0x8e14f859, 0x0000003b, 0x893c13eb, 0x0000004d +.long 0xee27a9ce, 0x000000ae, 0x35c961b7, 0x0000002a +.long 0xede51ce1, 0x000000f5, 0x3cb1477a, 0x000000b0 +.long 0x59dfd29c, 0x000000c8, 0x3f73f255, 0x000000eb +.long 0x79ce1418, 0x000000bb, 0xbf37c773, 0x0000003c +.long 0xeacdf753, 0x00000083, 0x5baafd5f, 0x00000053 +.long 0x146f3ddf, 0x00000099, 0x86db4478, 0x00000061 +.long 0x81f3afca, 0x00000017, 0x3ec468b9, 0x0000002b +.long 0x2c342438, 0x00000004, 0x5f40a3c2, 0x0000007e +.long 0x72c31d16, 0x000000ba, 0x0c25e2bc, 0x00000077 +.long 0x8b493c28, 0x000000d6, 0x41950dff, 0x00000026 +.long 0x7101a839, 0x000000e1, 0xdeb30c08, 0x00000069 +.long 0x9ce4b4d8, 0x00000014, 0x90c15664, 0x00000063 +.long 0x6184cb7b, 0x00000055, 0x70b632d5, 0x00000021 +.long 0x745c6c48, 0x0000000c, 0x4257b8d0, 0x0000007d + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 35e599a3..8855e0cb 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -67,6 +67,21 @@ # define USE_AMD64_ASM 1 #endif +/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */ +#undef USE_ARMV6_ASM +#if defined(__arm__) && defined(__ARMEL__) && \ + ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \ + || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__)) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARMV6_ASM 1 +# endif +#endif + /* USE_PADLOCK indicates whether to compile the padlock specific code. */ #undef USE_PADLOCK @@ -115,6 +130,19 @@ extern void _gcry_aes_amd64_decrypt_block(const void *keysched_dec, int rounds); #endif /*USE_AMD64_ASM*/ +#ifdef USE_ARMV6_ASM +/* ARMv6 assembly implementations of AES */ +extern void _gcry_aes_armv6_encrypt_block(const void *keysched_enc, + unsigned char *out, + const unsigned char *in, + int rounds); + +extern void _gcry_aes_armv6_decrypt_block(const void *keysched_dec, + unsigned char *out, + const unsigned char *in, + int rounds); +#endif /*USE_ARMV6_ASM*/ + /* Our context object. */ @@ -546,7 +574,9 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx, { #ifdef USE_AMD64_ASM _gcry_aes_amd64_encrypt_block(ctx->keyschenc, b, a, ctx->rounds); -#else /*!USE_AMD64_ASM*/ +#elif defined(USE_ARMV6_ASM) + _gcry_aes_armv6_encrypt_block(ctx->keyschenc, b, a, ctx->rounds); +#else #define rk (ctx->keyschenc) int rounds = ctx->rounds; int r; @@ -628,7 +658,7 @@ do_encrypt_aligned (const RIJNDAEL_context *ctx, *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]); *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]); #undef rk -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ } @@ -636,7 +666,7 @@ static void do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx, const unsigned char *ax) { -#ifndef USE_AMD64_ASM +#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM) /* BX and AX are not necessary correctly aligned. Thus we might need to copy them here. We try to align to a 16 bytes. */ if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) @@ -657,7 +687,7 @@ do_encrypt (const RIJNDAEL_context *ctx, memcpy (bx, b.b, 16); } else -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ { do_encrypt_aligned (ctx, bx, ax); } @@ -1667,7 +1697,9 @@ do_decrypt_aligned (RIJNDAEL_context *ctx, { #ifdef USE_AMD64_ASM _gcry_aes_amd64_decrypt_block(ctx->keyschdec, b, a, ctx->rounds); -#else /*!USE_AMD64_ASM*/ +#elif defined(USE_ARMV6_ASM) + _gcry_aes_armv6_decrypt_block(ctx->keyschdec, b, a, ctx->rounds); +#else #define rk (ctx->keyschdec) int rounds = ctx->rounds; int r; @@ -1750,7 +1782,7 @@ do_decrypt_aligned (RIJNDAEL_context *ctx, *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]); *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]); #undef rk -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ } @@ -1765,7 +1797,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) ctx->decryption_prepared = 1; } -#ifndef USE_AMD64_ASM +#if !defined(USE_AMD64_ASM) && !defined(USE_ARMV6_ASM) /* BX and AX are not necessary correctly aligned. Thus we might need to copy them here. We try to align to a 16 bytes. */ if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) @@ -1786,7 +1818,7 @@ do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) memcpy (bx, b.b, 16); } else -#endif /*!USE_AMD64_ASM*/ +#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/ { do_decrypt_aligned (ctx, bx, ax); } diff --git a/configure.ac b/configure.ac index 06c0b790..04dee7e2 100644 --- a/configure.ac +++ b/configure.ac @@ -964,6 +964,34 @@ if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then fi +# +# Check whether GCC assembler supports features needed for our ARM +# implementations +# +AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly implementations], + [gcry_cv_gcc_arm_platform_as_ok], + [gcry_cv_gcc_arm_platform_as_ok=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[__asm__( + /* Test if assembler supports UAL syntax. */ + ".syntax unified\n\t" + ".thumb\n\t" /* thumb-2 in UAL, thumb-1 otherwise. */ + ".code 16\n\t" + /* Following causes error if assembler ignored '.syntax unified'. */ + "asmfunc:\n\t" + "add.w %r0, %r4, %r8, ror #12;\n\t" + + /* Test if '.type' and '.size' are supported. */ + ".size asmfunc,.-asmfunc;\n\t" + ".type asmfunc,%function;\n\t" + );]])], + [gcry_cv_gcc_arm_platform_as_ok=yes])]) +if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then + AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1, + [Defined if underlying assembler is compatible with ARM assembly implementations]) +fi + + ####################################### #### Checks for library functions. #### ####################################### @@ -1284,6 +1312,10 @@ if test "$found" = "1" ; then # Build with the assembly implementation GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-amd64.lo" ;; + arm*-*-*) + # Build with the assembly implementation + GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael-armv6.lo" + ;; esac fi |