mpi: add ARMv6 assembly

* mpi/armv6/mpi-asm-defs.h: New. * mpi/armv6/mpih-add1.S: New. * mpi/armv6/mpih-mul1.S: New. * mpi/armv6/mpih-mul2.S: New. * mpi/armv6/mpih-mul3.S: New. * mpi/armv6/mpih-sub1.S: New. * mpi/config.links [arm]: Enable ARMv6 assembly. -- Add mpi assembly for ARMv6 (or later). These are partly based on ARM assembly found in GMP 4.2.1. Old vs new (Cortex-A8, 1Ghz): Algorithm generate 100*sign 100*verify ------------------------------------------------ ECDSA 192 bit 1.14x 1.10x 1.13x ECDSA 224 bit 1.11x 1.12x 1.12x ECDSA 256 bit 1.20x 1.13x 1.14x ECDSA 384 bit 1.13x 1.21x 1.21x ECDSA 521 bit 1.17x 1.20x 1.22x Algorithm generate 100*sign 100*verify ------------------------------------------------ RSA 1024 bit - 1.31x 1.60x RSA 2048 bit - 1.41x 1.47x RSA 3072 bit - 1.50x 1.63x RSA 4096 bit - 1.50x 1.57x Algorithm generate 100*sign 100*verify ------------------------------------------------ DSA 1024/160 - 1.39x 1.38x DSA 2048/224 - 1.50x 1.51x DSA 3072/256 - 1.59x 1.64x NEW: Algorithm generate 100*sign 100*verify ------------------------------------------------ ECDSA 192 bit 70ms 1750ms 3170ms ECDSA 224 bit 90ms 2210ms 4250ms ECDSA 256 bit 100ms 2710ms 5170ms ECDSA 384 bit 230ms 5670ms 11040ms ECDSA 521 bit 540ms 13370ms 25870ms Algorithm generate 100*sign 100*verify ------------------------------------------------ RSA 1024 bit 360ms 2200ms 50ms RSA 2048 bit 2770ms 11900ms 150ms RSA 3072 bit 6680ms 32530ms 270ms RSA 4096 bit 10320ms 69440ms 460ms Algorithm generate 100*sign 100*verify ------------------------------------------------ DSA 1024/160 - 990ms 910ms DSA 2048/224 - 3830ms 3410ms DSA 3072/256 - 8270ms 7030ms OLD: Algorithm generate 100*sign 100*verify ------------------------------------------------ ECDSA 192 bit 80ms 1920ms 3580ms ECDSA 224 bit 100ms 2470ms 4760ms ECDSA 256 bit 120ms 3050ms 5870ms ECDSA 384 bit 260ms 6840ms 13330ms ECDSA 521 bit 630ms 16080ms 31500ms Algorithm generate 100*sign 100*verify ------------------------------------------------ RSA 1024 bit 450ms 2890ms 80ms RSA 2048 bit 2320ms 16760ms 220ms RSA 3072 bit 26300ms 48650ms 440ms RSA 4096 bit 15700ms 103910ms 720ms Algorithm generate 100*sign 100*verify ------------------------------------------------ DSA 1024/160 - 1380ms 1260ms DSA 2048/224 - 5740ms 5140ms DSA 3072/256 - 13130ms 11510ms Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
author: Jussi Kivilinna <jussi.kivilinna@iki.fi> 2013-08-17 13:41:03 +0300
committer: Jussi Kivilinna <jussi.kivilinna@iki.fi> 2013-08-20 12:33:34 +0300
commit: da327aef3fe24fdf98fffbc8aea69de42ed12456 (patch)
tree: 7e93726e8d066523df3e839ed8644b6b78463a2f /mpi
parent: 151f1e518be2d16bed748ba832384b0472ddcf9b (diff)
download: libgcrypt-da327aef3fe24fdf98fffbc8aea69de42ed12456.tar.gz
7 files changed, 443 insertions, 0 deletions
diff --git a/mpi/armv6/mpi-asm-defs.h b/mpi/armv6/mpi-asm-defs.h
new file mode 100644
index 00000000..047d1f5a
--- /dev/null
+++ b/mpi/armv6/mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/mpi/armv6/mpih-add1.S b/mpi/armv6/mpih-add1.S
new file mode 100644
index 00000000..60ea4c32
--- /dev/null
+++ b/mpi/armv6/mpih-add1.S
@@ -0,0 +1,76 @@
+/* ARMv6 add_n -- Add two limb vectors of the same length > 0 and store
+ *                sum in a third limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,	%r0
+ *		   mpi_ptr_t s1_ptr,		%r1
+ *		   mpi_ptr_t s2_ptr,		%r2
+ *		   mpi_size_t size)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_add_n
+.type  _gcry_mpih_add_n,%function
+_gcry_mpih_add_n:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+	cmn	%r0, #0; /* clear carry flag */
+
+	tst	%r3, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r4, [%r1], #4;
+	sub	%r3, #1;
+	ldr	%lr, [%r2], #4;
+	adcs	%r4, %lr;
+	tst	%r3, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r3, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r1!, {%r4, %r6, %r8, %r10};
+	ldm	%r2!, {%r5, %r7, %r9, %lr};
+	sub	%r3, #4;
+	adcs	%r4, %r5;
+	adcs	%r6, %r7;
+	adcs	%r8, %r9;
+	adcs	%r10, %lr;
+	teq	%r3, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	adc	%r0, %r3, #0;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
diff --git a/mpi/armv6/mpih-mul1.S b/mpi/armv6/mpih-mul1.S
new file mode 100644
index 00000000..ae19a153
--- /dev/null
+++ b/mpi/armv6/mpih-mul1.S
@@ -0,0 +1,80 @@
+/* ARMv6 mul_1 -- Multiply a limb vector with a limb and store the result in
+ *                a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,		%r0
+ *		  mpi_ptr_t s1_ptr,		%r1
+ *		  mpi_size_t s1_size,		%r2
+ *		  mpi_limb_t s2_limb)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_mul_1
+.type  _gcry_mpih_mul_1,%function
+_gcry_mpih_mul_1:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr};
+	mov	%r4, #0;
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r5, [%r1], #4;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+	sub	%r2, #1;
+	str	%r4, [%r0], #4;
+	tst	%r2, #3;
+	mov	%r4, %lr;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	mov	%r9, #0;
+	ldm	%r1!, {%r5, %r6, %r7, %r8};
+	mov	%r10, #0;
+	umlal	%r4, %r9, %r5, %r3;
+	mov	%r11, #0;
+	umlal	%r9, %r10, %r6, %r3;
+	mov	%lr, #0;
+	umlal	%r10, %r11, %r7, %r3;
+	subs	%r2, #4;
+	umlal	%r11, %lr, %r8, %r3;
+	stm	%r0!, {%r4, %r9, %r10, %r11};
+	mov	%r4, %lr;
+	bne	.Large_loop;
+
+.Lend:
+	mov	%r0, %r4;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc};
+.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
diff --git a/mpi/armv6/mpih-mul2.S b/mpi/armv6/mpih-mul2.S
new file mode 100644
index 00000000..02f7c07e
--- /dev/null
+++ b/mpi/armv6/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* ARMv6 mul_2 -- Multiply a limb vector with a limb and add the result to
+ *                a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,	%r0
+ *		     mpi_ptr_t s1_ptr,		%r1
+ *		     mpi_size_t s1_size,	%r2
+ *		     mpi_limb_t s2_limb)	%r3
+ */
+
+.text
+
+.globl _gcry_mpih_addmul_1
+.type  _gcry_mpih_addmul_1,%function
+_gcry_mpih_addmul_1:
+	push	{%r4, %r5, %r6, %r8, %r10, %lr};
+	mov	%lr, #0;
+	cmn	%r0, #0; /* clear carry flag */
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+.Loop:
+	ldr	%r5, [%r1], #4;
+	ldr	%r4, [%r0];
+	sub	%r2, #1;
+	adcs	%r4, %lr;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+	tst	%r2, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r0, {%r4, %r6, %r8, %r10};
+	ldr	%r5, [%r1], #4;
+
+	sub	%r2, #4;
+	adcs	%r4, %lr;
+	mov	%lr, #0;
+	umlal	%r4, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r6, %lr;
+	mov	%lr, #0;
+	umlal	%r6, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r8, %lr;
+	mov	%lr, #0;
+	umlal	%r8, %lr, %r5, %r3;
+
+	ldr	%r5, [%r1], #4;
+	adcs	%r10, %lr;
+	mov	%lr, #0;
+	umlal	%r10, %lr, %r5, %r3;
+
+	teq	%r2, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	adc	%r0, %lr, #0;
+	pop	{%r4, %r5, %r6, %r8, %r10, %pc};
+.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
diff --git a/mpi/armv6/mpih-mul3.S b/mpi/armv6/mpih-mul3.S
new file mode 100644
index 00000000..e42fc304
--- /dev/null
+++ b/mpi/armv6/mpih-mul3.S
@@ -0,0 +1,97 @@
+/* ARMv6 mul_3 -- Multiply a limb vector with a limb and subtract the result
+ *                from a second limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,	%r0
+ *		     mpi_ptr_t s1_ptr,		%r1
+ *		     mpi_size_t s1_size,	%r2
+ *		     mpi_limb_t s2_limb)	%r3
+ */
+
+.text
+
+.globl _gcry_mpih_submul_1
+.type  _gcry_mpih_submul_1,%function
+_gcry_mpih_submul_1:
+	push	{%r4, %r5, %r6, %r8, %r9, %r10, %lr};
+	mov	%lr, #0;
+	cmp	%r0, #0; /* prepare carry flag for sbc */
+
+	tst	%r2, #3;
+	beq	.Large_loop;
+.Loop:
+	ldr	%r5, [%r1], #4;
+	mov	%r4, %lr;
+	mov	%lr, #0;
+	ldr	%r6, [%r0];
+	umlal	%r4, %lr, %r5, %r3;
+	sub	%r2, #1;
+	sbcs	%r4, %r6, %r4;
+	tst	%r2, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r2, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldr	%r5, [%r1], #4;
+	ldm	%r0, {%r4, %r6, %r8, %r10};
+
+	mov	%r9, #0;
+	umlal	%lr, %r9, %r5, %r3;
+	ldr	%r5, [%r1], #4;
+	sbcs	%r4, %r4, %lr;
+
+	mov	%lr, #0;
+	umlal	%r9, %lr, %r5, %r3;
+	ldr	%r5, [%r1], #4;
+	sbcs	%r6, %r6, %r9;
+
+	mov	%r9, #0;
+	umlal	%lr, %r9, %r5, %r3;
+	ldr	%r5, [%r1], #4;
+	sbcs	%r8, %r8, %lr;
+
+	mov	%lr, #0;
+	umlal	%r9, %lr, %r5, %r3;
+	sub	%r2, #4;
+	sbcs	%r10, %r10, %r9;
+
+	teq	%r2, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	it	cc
+	movcc	%r2, #1;
+	add	%r0, %lr, %r2;
+	pop	{%r4, %r5, %r6, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
diff --git a/mpi/armv6/mpih-sub1.S b/mpi/armv6/mpih-sub1.S
new file mode 100644
index 00000000..77d05eb6
--- /dev/null
+++ b/mpi/armv6/mpih-sub1.S
@@ -0,0 +1,77 @@
+/* ARMv6 sub_n -- Subtract two limb vectors of the same length > 0 and store
+ *		  sum in a third limb vector.
+ *
+ *      Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,	%r0
+ *		   mpi_ptr_t s1_ptr,		%r1
+ *		   mpi_ptr_t s2_ptr,		%r2
+ *		   mpi_size_t size)		%r3
+ */
+
+.text
+
+.globl _gcry_mpih_sub_n
+.type  _gcry_mpih_sub_n,%function
+_gcry_mpih_sub_n:
+	push	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+	cmp	%r0, #0; /* prepare carry flag for sub */
+
+	tst	%r3, #3;
+	beq	.Large_loop;
+
+.Loop:
+	ldr	%r4, [%r1], #4;
+	sub	%r3, #1;
+	ldr	%lr, [%r2], #4;
+	sbcs	%r4, %lr;
+	tst	%r3, #3;
+	str	%r4, [%r0], #4;
+	bne	.Loop;
+
+	teq	%r3, #0;
+	beq	.Lend;
+
+.Large_loop:
+	ldm	%r1!, {%r4, %r6, %r8, %r10};
+	sub	%r3, #4;
+	ldm	%r2!, {%r5, %r7, %r9, %lr};
+	sbcs	%r4, %r5;
+	sbcs	%r6, %r7;
+	sbcs	%r8, %r9;
+	sbcs	%r10, %lr;
+	teq	%r3, #0;
+	stm	%r0!, {%r4, %r6, %r8, %r10};
+	bne	.Large_loop;
+
+.Lend:
+	sbc	%r0, %r3, #0;
+	neg	%r0, %r0;
+	pop	{%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
diff --git a/mpi/config.links b/mpi/config.links
index bcc6e3ea..f3002557 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -136,6 +136,21 @@ case "${host}" in
 	mpi_extra_modules="udiv-qrnnd"
         mpi_cpu_arch="alpha"
 	;;
+    arm*-*-*)
+	if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+	  if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+	    echo '/* configured for armv6 */' >>./mpi/asm-syntax.h
+	    path="armv6"
+	    mpi_cpu_arch="armv6"
+	  else
+	    echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+	    path=""
+	  fi
+	else
+	  echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+	  path=""
+	fi
+	;;
     hppa7000*-*-*)
 	echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
 	path="hppa1.1 hppa"
author	Jussi Kivilinna <jussi.kivilinna@iki.fi>	2013-08-17 13:41:03 +0300
committer	Jussi Kivilinna <jussi.kivilinna@iki.fi>	2013-08-20 12:33:34 +0300
commit	da327aef3fe24fdf98fffbc8aea69de42ed12456 (patch)
tree	7e93726e8d066523df3e839ed8644b6b78463a2f /mpi
parent	151f1e518be2d16bed748ba832384b0472ddcf9b (diff)
download	libgcrypt-da327aef3fe24fdf98fffbc8aea69de42ed12456.tar.gz