From 0b39fce7e3ce6761d6bd5195d093ec6857edb7c2 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Wed, 23 Oct 2013 18:36:10 +0300
Subject: mpi: enable assembler on all arm architectures

* mpi/config.links: remove check for arm >= v6
* mpi/armv6 => mpi/arm: rename directory to reflect that is is generic
  enough

--
MPI ARM assembly do not depend on CPU being armv6. Verified on PXA255:

Before:
Algorithm         generate  100*sign  100*verify
------------------------------------------------
RSA 1024 bit        3990ms   57980ms      1680ms
RSA 2048 bit       59620ms   389430ms      5690ms
RSA 3072 bit       535850ms   1223200ms     12000ms
RSA 4096 bit       449350ms   2707370ms     20050ms

After:
Algorithm         generate  100*sign  100*verify
------------------------------------------------
RSA 1024 bit        2190ms   13730ms       320ms
RSA 2048 bit       12750ms   67640ms       810ms
RSA 3072 bit       110520ms   166100ms      1350ms
RSA 4096 bit       100870ms   357560ms      2170ms

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
[ jk: ARMv6 => ARM in header comments ]
---
 mpi/arm/mpi-asm-defs.h |   4 ++
 mpi/arm/mpih-add1.S    |  76 +++++++++++++++++++++++++++++++++++++
 mpi/arm/mpih-mul1.S    |  80 +++++++++++++++++++++++++++++++++++++++
 mpi/arm/mpih-mul2.S    |  94 ++++++++++++++++++++++++++++++++++++++++++++++
 mpi/arm/mpih-mul3.S    | 100 +++++++++++++++++++++++++++++++++++++++++++++++++
 mpi/arm/mpih-sub1.S    |  77 +++++++++++++++++++++++++++++++++++++
 6 files changed, 431 insertions(+)
 create mode 100644 mpi/arm/mpi-asm-defs.h
 create mode 100644 mpi/arm/mpih-add1.S
 create mode 100644 mpi/arm/mpih-mul1.S
 create mode 100644 mpi/arm/mpih-mul2.S
 create mode 100644 mpi/arm/mpih-mul3.S
 create mode 100644 mpi/arm/mpih-sub1.S

(limited to 'mpi/arm')

diff --git a/mpi/arm/mpi-asm-defs.h b/mpi/arm/mpi-asm-defs.h
new file mode 100644
index 00000000..047d1f5a
--- /dev/null
+++ b/mpi/arm/mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/mpi/arm/mpih-add1.S b/mpi/arm/mpih-add1.S
new file mode 100644
index 00000000..de6d5ede
--- /dev/null
+++ b/mpi/arm/mpih-add1.S
@@ -0,0 +1,76 @@
+/* ARM add_n -- Add two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	%r0
+ *		   mpi_ptr_t s1_ptr,		%r1
+ *		   mpi_ptr_t s2_ptr,		%r2
+ *		   mpi_size_t size)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_add_n
+.type  _gcry_mpih_add_n,%function
+_gcry_mpih_add_n:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+	cmn	%r0, #0; /* clear carry flag */
+
+	tst	%r3, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r4, [%r1], #4;
+	sub	%r3, #1;
+	ldr	%lr, [%r2], #4;
+	adcs	%r4, %lr;
+	tst	%r3, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r3, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r1!, {%r4, %r6, %r8, %r10};
+	ldm	%r2!, {%r5, %r7, %r9, %lr};
+	sub	%r3, #4;
+	adcs	%r4, %r5;
+	adcs	%r6, %r7;
+	adcs	%r8, %r9;
+	adcs	%r10, %lr;
+	teq	%r3, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	adc	%r0, %r3, #0;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
diff --git a/mpi/arm/mpih-mul1.S b/mpi/arm/mpih-mul1.S
new file mode 100644
index 00000000..9e6f361c
--- /dev/null
+++ b/mpi/arm/mpih-mul1.S
@@ -0,0 +1,80 @@
+/* ARM mul_1 -- Multiply a limb vector with a limb and store the result in
+ *              a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,		%r0
+ *		  mpi_ptr_t s1_ptr,		%r1
+ *		  mpi_size_t s1_size,		%r2
+ *		  mpi_limb_t s2_limb)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_mul_1
+.type  _gcry_mpih_mul_1,%function
+_gcry_mpih_mul_1:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr};
+	mov	%r4, #0;
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r5, [%r1], #4;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+	sub	%r2, #1;
+	str	%r4, [%r0], #4;
+	tst	%r2, #3;
+	mov	%r4, %lr;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r1!, {%r5, %r6, %r7, %r8};
+	mov	%r9, #0;
+	mov	%r10, #0;
+	umlal	%r4, %r9, %r5, %r3;
+	mov	%r11, #0;
+	umlal	%r9, %r10, %r6, %r3;
+	str	%r4, [%r0], #4;
+	mov	%r4, #0;
+	umlal	%r10, %r11, %r7, %r3;
+	subs	%r2, #4;
+	umlal	%r11, %r4, %r8, %r3;
+	stm	%r0!, {%r9, %r10, %r11};
+	bne	.Large_loop;
+
+.Lend:
+	mov	%r0, %r4;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc};
+.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
diff --git a/mpi/arm/mpih-mul2.S b/mpi/arm/mpih-mul2.S
new file mode 100644
index 00000000..2063be54
--- /dev/null
+++ b/mpi/arm/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* ARM mul_2 -- Multiply a limb vector with a limb and add the result to
+ *              a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,	%r0
+ *		     mpi_ptr_t s1_ptr,		%r1
+ *		     mpi_size_t s1_size,	%r2
+ *		     mpi_limb_t s2_limb)	%r3
+ */
+
+.text
+
+.globl _gcry_mpih_addmul_1
+.type  _gcry_mpih_addmul_1,%function
+_gcry_mpih_addmul_1:
+	push	{%r4, %r5, %r6, %r8, %r10, %lr};
+	mov	%lr, #0;
+	cmn	%r0, #0; /* clear carry flag */
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+.Loop:
+	ldr	%r5, [%r1], #4;
+	ldr	%r4, [%r0];
+	sub	%r2, #1;
+	adcs	%r4, %lr;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+	tst	%r2, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldr	%r5, [%r1], #4;
+	ldm	%r0, {%r4, %r6, %r8, %r10};
+
+	sub	%r2, #4;
+	adcs	%r4, %lr;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r6, %lr;
+	mov	%lr, #0;
+	umlal	%r6, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r8, %lr;
+	mov	%lr, #0;
+	umlal	%r8, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r10, %lr;
+	mov	%lr, #0;
+	umlal	%r10, %lr, %r5, %r3;
+
+	teq	%r2, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	adc	%r0, %lr, #0;
+	pop	{%r4, %r5, %r6, %r8, %r10, %pc};
+.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
diff --git a/mpi/arm/mpih-mul3.S b/mpi/arm/mpih-mul3.S
new file mode 100644
index 00000000..be2c5e63
--- /dev/null
+++ b/mpi/arm/mpih-mul3.S
@@ -0,0 +1,100 @@
+/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result
+ *              from a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,	%r0
+ *		     mpi_ptr_t s1_ptr,		%r1
+ *		     mpi_size_t s1_size,	%r2
+ *		     mpi_limb_t s2_limb)	%r3
+ */
+
+.text
+
+.globl _gcry_mpih_submul_1
+.type  _gcry_mpih_submul_1,%function
+_gcry_mpih_submul_1:
+	push	{%r4, %r5, %r6, %r8, %r9, %r10, %lr};
+	mov	%lr, #0;
+	cmp	%r0, #0; /* prepare carry flag for sbc */
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+.Loop:
+	ldr	%r5, [%r1], #4;
+	mov	%r4, %lr;
+	mov	%lr, #0;
+	ldr	%r6, [%r0];
+	umlal	%r4, %lr, %r5, %r3;
+	sub	%r2, #1;
+	sbcs	%r4, %r6, %r4;
+	tst	%r2, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldr	%r5, [%r1], #4;
+	mov	%r9, #0;
+	ldr	%r4, [%r0, #0];
+
+	umlal	%lr, %r9, %r5, %r3;
+	ldr	%r6, [%r0, #4];
+	ldr	%r5, [%r1], #4;
+	sbcs	%r4, %r4, %lr;
+
+	mov	%lr, #0;
+	umlal	%r9, %lr, %r5, %r3;
+	ldr	%r8, [%r0, #8];
+	ldr	%r5, [%r1], #4;
+	sbcs	%r6, %r6, %r9;
+
+	mov	%r9, #0;
+	umlal	%lr, %r9, %r5, %r3;
+	ldr	%r10, [%r0, #12];
+	ldr	%r5, [%r1], #4;
+	sbcs	%r8, %r8, %lr;
+
+	mov	%lr, #0;
+	umlal	%r9, %lr, %r5, %r3;
+	sub	%r2, #4;
+	sbcs	%r10, %r10, %r9;
+
+	teq	%r2, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	it	cc
+	movcc	%r2, #1;
+	add	%r0, %lr, %r2;
+	pop	{%r4, %r5, %r6, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
diff --git a/mpi/arm/mpih-sub1.S b/mpi/arm/mpih-sub1.S
new file mode 100644
index 00000000..a573efff
--- /dev/null
+++ b/mpi/arm/mpih-sub1.S
@@ -0,0 +1,77 @@
+/* ARM sub_n -- Subtract two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	%r0
+ *		   mpi_ptr_t s1_ptr,		%r1
+ *		   mpi_ptr_t s2_ptr,		%r2
+ *		   mpi_size_t size)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_sub_n
+.type  _gcry_mpih_sub_n,%function
+_gcry_mpih_sub_n:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+	cmp	%r0, #0; /* prepare carry flag for sub */
+
+	tst	%r3, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r4, [%r1], #4;
+	sub	%r3, #1;
+	ldr	%lr, [%r2], #4;
+	sbcs	%r4, %lr;
+	tst	%r3, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r3, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r1!, {%r4, %r6, %r8, %r10};
+	sub	%r3, #4;
+	ldm	%r2!, {%r5, %r7, %r9, %lr};
+	sbcs	%r4, %r5;
+	sbcs	%r6, %r7;
+	sbcs	%r8, %r9;
+	sbcs	%r10, %lr;
+	teq	%r3, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	sbc	%r0, %r3, #0;
+	neg	%r0, %r0;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
-- 
cgit v1.2.1