add-key works

author: Werner Koch <wk@gnupg.org> 1998-05-26 13:37:59 +0000
committer: Werner Koch <wk@gnupg.org> 1998-05-26 13:37:59 +0000
commit: 525b0bc632ccfef3b5d5b5829a082f5a2b6150ff (patch)
tree: 5ca7ba86848678c3198daadc917de67891e81798 /mpi
parent: 17e97734857f1032785583d760f19d0f23a431f7 (diff)
download: libgcrypt-525b0bc632ccfef3b5d5b5829a082f5a2b6150ff.tar.gz
32 files changed, 1413 insertions, 653 deletions
diff --git a/mpi/ChangeLog b/mpi/ChangeLog
index 86d7413b..15da105f 100644
--- a/mpi/ChangeLog
+++ b/mpi/ChangeLog
@@ -1,3 +1,9 @@
+Mon May 18 13:47:06 1998  Werner Koch  (wk@isil.d.shuttle.de)
+
+	* config.links: split mpih-shift into mpih-[lr]shift and
+	changed all implementations.
+	* mpi/alpha: add some new assembler stuff.
+
 Wed May 13 11:04:29 1998  Werner Koch  (wk@isil.d.shuttle.de)
 
 	* config.links: Add support for MIPS
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
index e7a50d28..ead60a68 100644
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -7,7 +7,7 @@ SUFFIXES = .S .s
 
 EXTRA_DIST = config.links
 DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S  \
-		 mpih-shift.S mpih-sub1.S asm-syntax.h sysdep.h
+		 mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
 
 
 noinst_LIBRARIES = libmpi.a
@@ -43,7 +43,8 @@ common_asm_objects = mpih-mul1.o    \
 		     mpih-mul3.o    \
 		     mpih-add1.o    \
 		     mpih-sub1.o    \
-		     mpih-shift.o
+		     mpih-lshift.o  \
+		     mpih-rshift.o
 
 libmpi_a_DEPENDENCIES = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
 libmpi_a_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index c0db398a..cd14c384 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -98,7 +98,7 @@ SUFFIXES = .S .s
 
 EXTRA_DIST = config.links
 DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S  \
-		 mpih-shift.S mpih-sub1.S asm-syntax.h sysdep.h
+		 mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
 
 noinst_LIBRARIES = libmpi.a
 # noinst_HEADERS   =
@@ -131,7 +131,8 @@ common_asm_objects = mpih-mul1.o    \
 		     mpih-mul3.o    \
 		     mpih-add1.o    \
 		     mpih-sub1.o    \
-		     mpih-shift.o
+		     mpih-lshift.o  \
+		     mpih-rshift.o
 
 libmpi_a_DEPENDENCIES = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
 libmpi_a_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
diff --git a/mpi/alpha/distfiles b/mpi/alpha/distfiles
index e92d183d..f2ab9fc3 100644
--- a/mpi/alpha/distfiles
+++ b/mpi/alpha/distfiles
@@ -1,6 +1,11 @@
 README
 mpih-add1.S
-mpih-shift.S
+mpih-sub1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
 
 udiv-qrnnd.S
 
diff --git a/mpi/alpha/mpih-add1.S b/mpi/alpha/mpih-add1.S
index 54cec43f..dc3bcfbb 100644
--- a/mpi/alpha/mpih-add1.S
+++ b/mpi/alpha/mpih-add1.S
@@ -19,14 +19,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *	 Actually it's the same code with only minor changes in the
- *	 way the data is stored; this is to support the abstraction
- *	 of an optional secure memory allocation which may be used
- *	 to avoid revealing of sensitive data due to paging etc.
- *	 The GNU MP Library itself is published under the LGPL;
- *	 however I decided to publish this code under the plain GPL.
  */
 
 
diff --git a/mpi/alpha/mpih-shift.S b/mpi/alpha/mpih-lshift.S
index 8bbd10cd..9688588f 100644
--- a/mpi/alpha/mpih-shift.S
+++ b/mpi/alpha/mpih-lshift.S
@@ -1,4 +1,4 @@
-/* alpha    rshift, lshift
+/* alpha    lshift
  *	Copyright (C) 1994, 1995 Free Software Foundation, Inc.
  *	Copyright (C) 1998 Free Software Foundation, Inc.
  *
@@ -120,94 +120,3 @@ mpihelp_lshift:
 	.end	mpihelp_lshift
 
 
-
-
-
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp,	(r16)
- *		   mpi_ptr_t up,	(r17)
- *		   mpi_size_t usize,	(r18)
- *		   unsigned cnt)	(r19)
- *
- * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
- * it would take 4 cycles/limb.  It should be possible to get down to 3
- * cycles/limb since both ldq and stq can be paired with the other used
- * instructions.  But there are many restrictions in the 21064 pipeline that
- * makes it hard, if not impossible, to get down to 3 cycles/limb:
- *
- * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
- * 2. Only aligned instruction pairs can be paired.
- * 3. The store buffer or silo might not be able to deal with the bandwidth.
- */
-
-	.set	noreorder
-	.set	noat
-.text
-	.align	3
-	.globl	mpihelp_rshift
-	.ent	mpihelp_rshift
-mpihelp_rshift:
-	.frame	$30,0,$26,0
-
-	ldq	$4,0($17)	# load first limb
-	addq	$17,8,$17
-	subq	$31,$19,$7
-	subq	$18,1,$18
-	and	$18,4-1,$20	# number of limbs in first loop
-	sll	$4,$7,$0	# compute function result
-
-	beq	$20,.R0
-	subq	$18,$20,$18
-
-	.align	3
-.Roop0:
-	ldq	$3,0($17)
-	addq	$16,8,$16
-	addq	$17,8,$17
-	subq	$20,1,$20
-	srl	$4,$19,$5
-	sll	$3,$7,$6
-	bis	$3,$3,$4
-	bis	$5,$6,$8
-	stq	$8,-8($16)
-	bne	$20,.Roop0
-
-.R0:	beq	$18,.Rend
-
-	.align	3
-.Roop:	ldq	$3,0($17)
-	addq	$16,32,$16
-	subq	$18,4,$18
-	srl	$4,$19,$5
-	sll	$3,$7,$6
-
-	ldq	$4,8($17)
-	srl	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,-32($16)
-	sll	$4,$7,$2
-
-	ldq	$3,16($17)
-	srl	$4,$19,$5
-	bis	$1,$2,$8
-	stq	$8,-24($16)
-	sll	$3,$7,$6
-
-	ldq	$4,24($17)
-	srl	$3,$19,$1
-	bis	$5,$6,$8
-	stq	$8,-16($16)
-	sll	$4,$7,$2
-
-	addq	$17,32,$17
-	bis	$1,$2,$8
-	stq	$8,-8($16)
-
-	bgt	$18,.Roop
-
-.Rend:	srl	$4,$19,$8
-	stq	$8,0($16)
-	ret	$31,($26),1
-	.end	mpihelp_rshift
-
diff --git a/mpi/alpha/mpih-mul1.S b/mpi/alpha/mpih-mul1.S
new file mode 100644
index 00000000..5b24d98d
--- /dev/null
+++ b/mpi/alpha/mpih-mul1.S
@@ -0,0 +1,89 @@
+/* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store
+ *			      the result in a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr,	(r16)
+ *		  mpi_ptr_t s1_ptr,	(r17)
+ *		  mpi_size_t s1_size,	(r18)
+ *		  mpi_limb_t s2_limb)	(r19)
+ *
+ * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+ *
+ * To improve performance for long multiplications, we would use
+ * 'fetch' for S1 and 'fetch_m' for RES.  It's not obvious how to use
+ * these instructions without slowing down the general code: 1. We can
+ * only have two prefetches in operation at any time in the Alpha
+ * architecture.  2. There will seldom be any special alignment
+ * between RES_PTR and S1_PTR.	Maybe we can simply divide the current
+ * loop into an inner and outer loop, having the inner loop handle
+ * exactly one prefetch block?
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_mul_1
+	.ent	mpihelp_mul_1 2
+mpihelp_mul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	bic	$31,$31,$4	# clear cy_limb
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,Lend1	# jump if size was == 1
+	ldq	$2,8($17)	# $2 = s1_limb
+	subq	$18,1,$18	# size--
+	stq	$3,0($16)
+	beq	$18,Lend2	# jump if size was == 2
+
+	.align	3
+Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,16($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	stq	$3,8($16)
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$16,8,$16	# res_ptr++
+	bne	$18,Loop
+
+Lend2:	mulq	$2,$19,$3	# $3 = prod_low
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	stq	$3,8($16)
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+Lend1:	stq	$3,0($16)
+	ret	$31,($26),1
+
+	.end	mpihelp_mul_1
+
+
diff --git a/mpi/alpha/mpih-mul2.S b/mpi/alpha/mpih-mul2.S
new file mode 100644
index 00000000..0c8d361c
--- /dev/null
+++ b/mpi/alpha/mpih-mul2.S
@@ -0,0 +1,96 @@
+/* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add
+ *			   the result to a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr,      (r16)
+ *		     mpi_ptr_t s1_ptr,	     (r17)
+ *		     mpi_size_t s1_size,     (r18)
+ *		     mpi_limb_t s2_limb)     (r19)
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_addmul_1
+	.ent	mpihelp_addmul_1 2
+mpihelp_addmul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	addq	$5,$3,$3
+	cmpult	$3,$5,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2: mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1: addq	$5,$3,$3
+	cmpult	$3,$5,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_addmul_1
+
diff --git a/mpi/alpha/mpih-mul3.S b/mpi/alpha/mpih-mul3.S
new file mode 100644
index 00000000..bdf16b57
--- /dev/null
+++ b/mpi/alpha/mpih-mul3.S
@@ -0,0 +1,94 @@
+/* Alpha 21064	submul_1 -- Multiply a limb vector with a limb and
+ *			    subtract the result from a second limb vector.
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr,      (r16   )
+ *		     mpi_ptr_t s1_ptr,	     (r17   )
+ *		     mpi_size_t s1_size,     (r18   )
+ *		     mpi_limb_t s2_limb)     (r19   )
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_submul_1
+	.ent	mpihelp_submul_1 2
+mpihelp_submul_1:
+	.frame	$30,0,$26
+
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	umulh	$2,$19,$0	# $0 = prod_high
+	beq	$18,.Lend1	# jump if size was == 1
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	subq	$18,1,$18	# size--
+	subq	$5,$3,$3
+	cmpult	$5,$3,$4
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	beq	$18,.Lend2	# jump if size was == 2
+
+	.align	3
+.Loop:	mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	subq	$18,1,$18	# size--
+	umulh	$2,$19,$4	# $4 = cy_limb
+	ldq	$2,0($17)	# $2 = s1_limb
+	addq	$17,8,$17	# s1_ptr++
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$16,8,$16	# res_ptr++
+	addq	$5,$0,$0	# combine carries
+	bne	$18,.Loop
+
+.Lend2: mulq	$2,$19,$3	# $3 = prod_low
+	ldq	$5,0($16)	# $5 = *res_ptr
+	addq	$4,$0,$0	# cy_limb = cy_limb + 'cy'
+	umulh	$2,$19,$4	# $4 = cy_limb
+	addq	$3,$0,$3	# $3 = cy_limb + prod_low
+	cmpult	$3,$0,$0	# $0 = carry from (cy_limb + prod_low)
+	subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$5,$0,$0	# combine carries
+	addq	$4,$0,$0	# cy_limb = prod_high + cy
+	ret	$31,($26),1
+.Lend1: subq	$5,$3,$3
+	cmpult	$5,$3,$5
+	stq	$3,0($16)
+	addq	$0,$5,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_submul_1
+
diff --git a/mpi/alpha/mpih-rshift.S b/mpi/alpha/mpih-rshift.S
new file mode 100644
index 00000000..e93315ae
--- /dev/null
+++ b/mpi/alpha/mpih-rshift.S
@@ -0,0 +1,120 @@
+/* alpha    rshift
+ *	Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+ *	Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(r16)
+ *		   mpi_ptr_t up,	(r17)
+ *		   mpi_size_t usize,	(r18)
+ *		   unsigned cnt)	(r19)
+ *
+ * This code runs at 4.8 cycles/limb on the 21064.  With infinite unrolling,
+ * it would take 4 cycles/limb.  It should be possible to get down to 3
+ * cycles/limb since both ldq and stq can be paired with the other used
+ * instructions.  But there are many restrictions in the 21064 pipeline that
+ * makes it hard, if not impossible, to get down to 3 cycles/limb:
+ *
+ * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ * 2. Only aligned instruction pairs can be paired.
+ * 3. The store buffer or silo might not be able to deal with the bandwidth.
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_rshift
+	.ent	mpihelp_rshift
+mpihelp_rshift:
+	.frame	$30,0,$26,0
+
+	ldq	$4,0($17)	# load first limb
+	addq	$17,8,$17
+	subq	$31,$19,$7
+	subq	$18,1,$18
+	and	$18,4-1,$20	# number of limbs in first loop
+	sll	$4,$7,$0	# compute function result
+
+	beq	$20,.R0
+	subq	$18,$20,$18
+
+	.align	3
+.Roop0:
+	ldq	$3,0($17)
+	addq	$16,8,$16
+	addq	$17,8,$17
+	subq	$20,1,$20
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+	bis	$3,$3,$4
+	bis	$5,$6,$8
+	stq	$8,-8($16)
+	bne	$20,.Roop0
+
+.R0:	beq	$18,.Rend
+
+	.align	3
+.Roop:	ldq	$3,0($17)
+	addq	$16,32,$16
+	subq	$18,4,$18
+	srl	$4,$19,$5
+	sll	$3,$7,$6
+
+	ldq	$4,8($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-32($16)
+	sll	$4,$7,$2
+
+	ldq	$3,16($17)
+	srl	$4,$19,$5
+	bis	$1,$2,$8
+	stq	$8,-24($16)
+	sll	$3,$7,$6
+
+	ldq	$4,24($17)
+	srl	$3,$19,$1
+	bis	$5,$6,$8
+	stq	$8,-16($16)
+	sll	$4,$7,$2
+
+	addq	$17,32,$17
+	bis	$1,$2,$8
+	stq	$8,-8($16)
+
+	bgt	$18,.Roop
+
+.Rend:	srl	$4,$19,$8
+	stq	$8,0($16)
+	ret	$31,($26),1
+	.end	mpihelp_rshift
+
diff --git a/mpi/alpha/mpih-sub1.S b/mpi/alpha/mpih-sub1.S
new file mode 100644
index 00000000..bf614309
--- /dev/null
+++ b/mpi/alpha/mpih-sub1.S
@@ -0,0 +1,123 @@
+/* Alpha  sub_n -- Subtract two limb vectors of the same length > 0 and
+ *		  store difference in a third limb vector.
+ * Copyright (C) 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ *  mpi_limb_t
+ *  mpihelp_sub_n( mpi_ptr_t res_ptr,	(r16)
+ *		   mpi_ptr_t s1_ptr,	(r17)
+ *		   mpi_ptr_t s2_ptr,	(r18)
+ *		   mpi_size_t size)	(r19)
+ */
+
+	.set	noreorder
+	.set	noat
+.text
+	.align	3
+	.globl	mpihelp_sub_n
+	.ent	mpihelp_sub_n
+mpihelp_sub_n:
+	.frame	$30,0,$26,0
+
+	ldq	$3,0($17)
+	ldq	$4,0($18)
+
+	subq	$19,1,$19
+	and	$19,4-1,$2	# number of limbs in first loop
+	bis	$31,$31,$0
+	beq	$2,.L0		# if multiple of 4 limbs, skip first loop
+
+	subq	$19,$2,$19
+
+.Loop0: subq	$2,1,$2
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	addq	$17,8,$17
+	addq	$18,8,$18
+	bis	$5,$5,$3
+	bis	$6,$6,$4
+	addq	$16,8,$16
+	bne	$2,.Loop0
+
+.L0:	beq	$19,.Lend
+
+	.align	3
+.Loop:	subq	$19,4,$19
+
+	ldq	$5,8($17)
+	addq	$4,$0,$4
+	ldq	$6,8($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+
+	ldq	$3,16($17)
+	addq	$6,$0,$6
+	ldq	$4,16($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,8($16)
+	or	$0,$1,$0
+
+	ldq	$5,24($17)
+	addq	$4,$0,$4
+	ldq	$6,24($18)
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,16($16)
+	or	$0,$1,$0
+
+	ldq	$3,32($17)
+	addq	$6,$0,$6
+	ldq	$4,32($18)
+	cmpult	$6,$0,$1
+	subq	$5,$6,$6
+	cmpult	$5,$6,$0
+	stq	$6,24($16)
+	or	$0,$1,$0
+
+	addq	$17,32,$17
+	addq	$18,32,$18
+	addq	$16,32,$16
+	bne	$19,.Loop
+
+.Lend:	addq	$4,$0,$4
+	cmpult	$4,$0,$1
+	subq	$3,$4,$4
+	cmpult	$3,$4,$0
+	stq	$4,0($16)
+	or	$0,$1,$0
+	ret	$31,($26),1
+
+	.end	mpihelp_sub_n
+
+
diff --git a/mpi/config.links b/mpi/config.links
index 699ca858..bd525abc 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -150,7 +150,7 @@ fi
 
 # fixme: grep these modules from Makefile.in
 mpi_ln_modules="${mpi_extra_modules} mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \
-		 mpih-shift mpih-sub1"
+		 mpih-lshift mpih-rshift mpih-sub1"
 
 mpi_ln_objects=
 mpi_ln_src=
diff --git a/mpi/generic/distfiles b/mpi/generic/distfiles
index 1febb49d..02a1ca4c 100644
--- a/mpi/generic/distfiles
+++ b/mpi/generic/distfiles
@@ -2,6 +2,7 @@ mpih-add1.c
 mpih-mul1.c
 mpih-mul2.c
 mpih-mul3.c
-mpih-shift.c
+mpih-lshift.c
+mpih-rshift.c
 mpih-sub1.c
 
diff --git a/mpi/hppa/distfiles b/mpi/hppa/distfiles
index 83c59ea6..7f24205d 100644
--- a/mpi/hppa/distfiles
+++ b/mpi/hppa/distfiles
@@ -2,5 +2,6 @@ README
 udiv-qrnnd.S
 mpih-add1.S
 mpih-sub1.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
 
diff --git a/mpi/hppa/mpih-shift.S b/mpi/hppa/mpih-lshift.S
index 153fbd7f..ada09f59 100644
--- a/mpi/hppa/mpih-shift.S
+++ b/mpi/hppa/mpih-lshift.S
@@ -1,6 +1,6 @@
-/* hppa   rshift, lshift
- *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
- *	Copyright (C) 1998 Free Software Foundation, Inc.
+/* hppa   lshift
+*
+ * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc.
  *
  * This file is part of GNUPG.
  *
@@ -17,14 +17,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *	 Actually it's the same code with only minor changes in the
- *	 way the data is stored; this is to support the abstraction
- *	 of an optional secure memory allocation which may be used
- *	 to avoid revealing of sensitive data due to paging etc.
- *	 The GNU MP Library itself is published under the LGPL;
- *	 however I decided to publish this code under the plain GPL.
  */
 
 
@@ -78,47 +70,3 @@ L$0004	vshd		%r22,%r0,%r20
 
 
 
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
- *		   mpi_ptr_t up,       (gr25)
- *		   mpi_size_t usize,   (gr24)
- *		   unsigned cnt)       (gr23)
- */
-
-	.code
-	.export 	mpihelp_rshift
-mpihelp_rshift
-	.proc
-	.callinfo	frame=64,no_calls
-	.entry
-
-	ldws,ma 	4(0,%r25),%r22
-	mtsar		%r23
-	addib,= 	-1,%r24,L$r004
-	vshd		%r22,%r0,%r28		; compute carry out limb
-	ldws,ma 	4(0,%r25),%r29
-	addib,= 	-1,%r24,L$r002
-	vshd		%r29,%r22,%r20
-
-L$roop	ldws,ma 	4(0,%r25),%r22
-	stws,ma 	%r20,4(0,%r26)
-	addib,= 	-1,%r24,L$r003
-	vshd		%r22,%r29,%r20
-	ldws,ma 	4(0,%r25),%r29
-	stws,ma 	%r20,4(0,%r26)
-	addib,<>	-1,%r24,L$roop
-	vshd		%r29,%r22,%r20
-
-L$r002	stws,ma 	%r20,4(0,%r26)
-	vshd		%r0,%r29,%r20
-	bv		0(%r2)
-	stw		%r20,0(0,%r26)
-L$r003	stws,ma 	%r20,4(0,%r26)
-L$r004	vshd		%r0,%r22,%r20
-	bv		0(%r2)
-	stw		%r20,0(0,%r26)
-
-	.exit
-	.procend
-
diff --git a/mpi/hppa/mpih-rshift.S b/mpi/hppa/mpih-rshift.S
new file mode 100644
index 00000000..0299d2e2
--- /dev/null
+++ b/mpi/hppa/mpih-rshift.S
@@ -0,0 +1,68 @@
+/* hppa   rshift
+*
+ * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
+ *		   mpi_ptr_t up,       (gr25)
+ *		   mpi_size_t usize,   (gr24)
+ *		   unsigned cnt)       (gr23)
+ */
+
+	.code
+	.export 	mpihelp_rshift
+mpihelp_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r22
+	mtsar		%r23
+	addib,= 	-1,%r24,L$r004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma 	4(0,%r25),%r29
+	addib,= 	-1,%r24,L$r002
+	vshd		%r29,%r22,%r20
+
+L$roop	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	addib,= 	-1,%r24,L$r003
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,<>	-1,%r24,L$roop
+	vshd		%r29,%r22,%r20
+
+L$r002	stws,ma 	%r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+L$r003	stws,ma 	%r20,4(0,%r26)
+L$r004	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
+
diff --git a/mpi/i386/distfiles b/mpi/i386/distfiles
index 34de9157..88d2a30c 100644
--- a/mpi/i386/distfiles
+++ b/mpi/i386/distfiles
@@ -2,7 +2,8 @@ mpih-add1.S
 mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
 mpih-sub1.S
 syntax.h
 
diff --git a/mpi/i386/mpih-lshift.S b/mpi/i386/mpih-lshift.S
new file mode 100644
index 00000000..a84f6b1f
--- /dev/null
+++ b/mpi/i386/mpih-lshift.S
@@ -0,0 +1,96 @@
+/* i80386   lshift
+ *	Copyright (C) 1998 Free Software Foundation, Inc.
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+
+	movl	16(%esp),%edi		/* res_ptr */
+	movl	20(%esp),%esi		/* s_ptr */
+	movl	24(%esp),%edx		/* size */
+	movl	28(%esp),%ecx		/* cnt */
+
+	subl	$4,%esi 		/* adjust s_ptr */
+
+	movl	(%esi,%edx,4),%ebx	/* read most significant limb */
+	xorl	%eax,%eax
+	shldl	%cl,%ebx,%eax		/* compute carry limb */
+	decl	%edx
+	jz	Lend
+	pushl	%eax			/* push carry limb onto stack */
+	testb	$1,%edx
+	jnz	L1			/* enter loop in the middle */
+	movl	%ebx,%eax
+
+	ALIGN (3)
+Loop:	movl	(%esi,%edx,4),%ebx	/* load next lower limb */
+	shldl	%cl,%ebx,%eax		/* compute result limb */
+	movl	%eax,(%edi,%edx,4)	/* store it */
+	decl	%edx
+L1:	movl	(%esi,%edx,4),%eax
+	shldl	%cl,%eax,%ebx
+	movl	%ebx,(%edi,%edx,4)
+	decl	%edx
+	jnz	Loop
+
+	shll	%cl,%eax		/* compute least significant limb */
+	movl	%eax,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+Lend:	shll	%cl,%ebx		/* compute least significant limb */
+	movl	%ebx,(%edi)		/* store it */
+
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
diff --git a/mpi/i386/mpih-shift.S b/mpi/i386/mpih-rshift.S
index f10f268e..3dd9caf7 100644
--- a/mpi/i386/mpih-shift.S
+++ b/mpi/i386/mpih-rshift.S
@@ -1,4 +1,4 @@
-/* i80386   rshift, lshift
+/* i80386   rshift
  *	Copyright (C) 1998 Free Software Foundation, Inc.
  *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
  *
@@ -34,68 +34,6 @@
 
 /*******************
  * mpi_limb_t
- * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
- *		   mpi_ptr_t up,	(sp + 8)
- *		   mpi_size_t usize,	(sp + 12)
- *		   unsigned cnt)	(sp + 16)
- */
-
-.text
-	ALIGN (3)
-	.globl C_SYMBOL_NAME(mpihelp_lshift)
-C_SYMBOL_NAME(mpihelp_lshift:)
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-
-	movl	16(%esp),%edi		/* res_ptr */
-	movl	20(%esp),%esi		/* s_ptr */
-	movl	24(%esp),%edx		/* size */
-	movl	28(%esp),%ecx		/* cnt */
-
-	subl	$4,%esi 		/* adjust s_ptr */
-
-	movl	(%esi,%edx,4),%ebx	/* read most significant limb */
-	xorl	%eax,%eax
-	shldl	%cl,%ebx,%eax		/* compute carry limb */
-	decl	%edx
-	jz	Lend
-	pushl	%eax			/* push carry limb onto stack */
-	testb	$1,%edx
-	jnz	L1			/* enter loop in the middle */
-	movl	%ebx,%eax
-
-	ALIGN (3)
-Loop:	movl	(%esi,%edx,4),%ebx	/* load next lower limb */
-	shldl	%cl,%ebx,%eax		/* compute result limb */
-	movl	%eax,(%edi,%edx,4)	/* store it */
-	decl	%edx
-L1:	movl	(%esi,%edx,4),%eax
-	shldl	%cl,%eax,%ebx
-	movl	%ebx,(%edi,%edx,4)
-	decl	%edx
-	jnz	Loop
-
-	shll	%cl,%eax		/* compute least significant limb */
-	movl	%eax,(%edi)		/* store it */
-
-	popl	%eax			/* pop carry limb */
-
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-
-Lend:	shll	%cl,%ebx		/* compute least significant limb */
-	movl	%ebx,(%edi)		/* store it */
-
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-
-/*******************
- * mpi_limb_t
  * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
  *		   mpi_ptr_t up,	(sp + 8)
  *		   mpi_size_t usize,	(sp + 12)
diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles
index 951480fd..8f821fbf 100644
--- a/mpi/i586/distfiles
+++ b/mpi/i586/distfiles
@@ -2,7 +2,8 @@ mpih-add1.S
 mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
 mpih-sub1.S
 README
 
diff --git a/mpi/i586/mpih-lshift.S b/mpi/i586/mpih-lshift.S
new file mode 100644
index 00000000..9e0cb029
--- /dev/null
+++ b/mpi/i586/mpih-lshift.S
@@ -0,0 +1,230 @@
+/* i80586   lshift
+ *	Copyright (C) 1998 Free Software Foundation, Inc.
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ *	 The GNU MP Library itself is published under the LGPL;
+ *	 however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+.text
+	ALIGN (3)
+	.globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+
+	pushl	%edi
+	pushl	%esi
+	pushl	%ebx
+	pushl	%ebp
+
+	movl	20(%esp),%edi		/* res_ptr */
+	movl	24(%esp),%esi		/* s_ptr */
+	movl	28(%esp),%ebp		/* size */
+	movl	32(%esp),%ecx		/* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions.  */
+	cmp	$1,%ecx
+	jne	Lnormal
+	leal	4(%esi),%eax
+	cmpl	%edi,%eax
+	jnc	Lspecial		/* jump if s_ptr + 1 >= res_ptr */
+	leal	(%esi,%ebp,4),%eax
+	cmpl	%eax,%edi
+	jnc	Lspecial		/* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+	leal	-4(%edi,%ebp,4),%edi
+	leal	-4(%esi,%ebp,4),%esi
+
+	movl	(%esi),%edx
+	subl	$4,%esi
+	xorl	%eax,%eax
+	shldl	%cl,%edx,%eax		/* compute carry limb */
+	pushl	%eax			/* push carry limb onto stack */
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+	jz	Lend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+Loop:	movl	-28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	-4(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,(%edi)
+	movl	%eax,-4(%edi)
+
+	movl	-8(%esi),%ebx
+	movl	-12(%esi),%eax
+	shldl	%cl,%ebx,%edx
+	shldl	%cl,%eax,%ebx
+	movl	%edx,-8(%edi)
+	movl	%ebx,-12(%edi)
+
+	movl	-16(%esi),%edx
+	movl	-20(%esi),%ebx
+	shldl	%cl,%edx,%eax
+	shldl	%cl,%ebx,%edx
+	movl	%eax,-16(%edi)
+	movl	%edx,-20(%edi)
+
+	movl	-24(%esi),%eax
+	movl	-28(%esi),%edx
+	shldl	%cl,%eax,%ebx
+	shldl	%cl,%edx,%eax
+	movl	%ebx,-24(%edi)
+	movl	%eax,-28(%edi)
+
+	subl	$32,%esi
+	subl	$32,%edi
+	decl	%ebp
+	jnz	Loop
+
+Lend:	popl	%ebp
+	andl	$7,%ebp
+	jz	Lend2
+Loop2:	movl	(%esi),%eax
+	shldl	%cl,%eax,%edx
+	movl	%edx,(%edi)
+	movl	%eax,%edx
+	subl	$4,%esi
+	subl	$4,%edi
+	decl	%ebp
+	jnz	Loop2
+
+Lend2:	shll	%cl,%edx		/* compute least significant limb */
+	movl	%edx,(%edi)		/* store it */
+
+	popl	%eax			/* pop carry limb */
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+/* We loop from least significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+	movl	(%esi),%edx
+	addl	$4,%esi
+
+	decl	%ebp
+	pushl	%ebp
+	shrl	$3,%ebp
+
+	addl	%edx,%edx
+	incl	%ebp
+	decl	%ebp
+	jz	LLend
+
+	movl	(%edi),%eax		/* fetch destination cache line */
+
+	ALIGN	(2)
+LLoop:	movl	28(%edi),%eax		/* fetch destination cache line */
+	movl	%edx,%ebx
+
+	movl	(%esi),%eax
+	movl	4(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,(%edi)
+	adcl	%edx,%edx
+	movl	%eax,4(%edi)
+
+	movl	8(%esi),%ebx
+	movl	12(%esi),%eax
+	adcl	%ebx,%ebx
+	movl	%edx,8(%edi)
+	adcl	%eax,%eax
+	movl	%ebx,12(%edi)
+
+	movl	16(%esi),%edx
+	movl	20(%esi),%ebx
+	adcl	%edx,%edx
+	movl	%eax,16(%edi)
+	adcl	%ebx,%ebx
+	movl	%edx,20(%edi)
+
+	movl	24(%esi),%eax
+	movl	28(%esi),%edx
+	adcl	%eax,%eax
+	movl	%ebx,24(%edi)
+	adcl	%edx,%edx
+	movl	%eax,28(%edi)
+
+	leal	32(%esi),%esi		/* use leal not to clobber carry */
+	leal	32(%edi),%edi
+	decl	%ebp
+	jnz	LLoop
+
+LLend:	popl	%ebp
+	sbbl	%eax,%eax		/* save carry in %eax */
+	andl	$7,%ebp
+	jz	LLend2
+	addl	%eax,%eax		/* restore carry from eax */
+LLoop2: movl	%edx,%ebx
+	movl	(%esi),%edx
+	adcl	%edx,%edx
+	movl	%ebx,(%edi)
+
+	leal	4(%esi),%esi		/* use leal not to clobber carry */
+	leal	4(%edi),%edi
+	decl	%ebp
+	jnz	LLoop2
+
+	jmp	LL1
+LLend2: addl	%eax,%eax		/* restore carry from eax */
+LL1:	movl	%edx,(%edi)		/* store last limb */
+
+	sbbl	%eax,%eax
+	negl	%eax
+
+	popl	%ebp
+	popl	%ebx
+	popl	%esi
+	popl	%edi
+	ret
+
+
diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-rshift.S
index 07d5fbc8..aec26e64 100644
--- a/mpi/i586/mpih-shift.S
+++ b/mpi/i586/mpih-rshift.S
@@ -1,4 +1,4 @@
-/* i80586   rshift, lshift
+/* i80586   rshift
  *	Copyright (C) 1998 Free Software Foundation, Inc.
  *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
  *
@@ -32,203 +32,6 @@
 #include "asm-syntax.h"
 
 
-/*******************
- * mpi_limb_t
- * mpihelp_lshift( mpi_ptr_t wp,	(sp + 4)
- *		   mpi_ptr_t up,	(sp + 8)
- *		   mpi_size_t usize,	(sp + 12)
- *		   unsigned cnt)	(sp + 16)
- */
-
-.text
-	ALIGN (3)
-	.globl C_SYMBOL_NAME(mpihelp_lshift)
-C_SYMBOL_NAME(mpihelp_lshift:)
-
-	pushl	%edi
-	pushl	%esi
-	pushl	%ebx
-	pushl	%ebp
-
-	movl	20(%esp),%edi		/* res_ptr */
-	movl	24(%esp),%esi		/* s_ptr */
-	movl	28(%esp),%ebp		/* size */
-	movl	32(%esp),%ecx		/* cnt */
-
-/* We can use faster code for shift-by-1 under certain conditions.  */
-	cmp	$1,%ecx
-	jne	Lnormal
-	leal	4(%esi),%eax
-	cmpl	%edi,%eax
-	jnc	Lspecial		/* jump if s_ptr + 1 >= res_ptr */
-	leal	(%esi,%ebp,4),%eax
-	cmpl	%eax,%edi
-	jnc	Lspecial		/* jump if res_ptr >= s_ptr + size */
-
-Lnormal:
-	leal	-4(%edi,%ebp,4),%edi
-	leal	-4(%esi,%ebp,4),%esi
-
-	movl	(%esi),%edx
-	subl	$4,%esi
-	xorl	%eax,%eax
-	shldl	%cl,%edx,%eax		/* compute carry limb */
-	pushl	%eax			/* push carry limb onto stack */
-
-	decl	%ebp
-	pushl	%ebp
-	shrl	$3,%ebp
-	jz	Lend
-
-	movl	(%edi),%eax		/* fetch destination cache line */
-
-	ALIGN	(2)
-Loop:	movl	-28(%edi),%eax		/* fetch destination cache line */
-	movl	%edx,%ebx
-
-	movl	(%esi),%eax
-	movl	-4(%esi),%edx
-	shldl	%cl,%eax,%ebx
-	shldl	%cl,%edx,%eax
-	movl	%ebx,(%edi)
-	movl	%eax,-4(%edi)
-
-	movl	-8(%esi),%ebx
-	movl	-12(%esi),%eax
-	shldl	%cl,%ebx,%edx
-	shldl	%cl,%eax,%ebx
-	movl	%edx,-8(%edi)
-	movl	%ebx,-12(%edi)
-
-	movl	-16(%esi),%edx
-	movl	-20(%esi),%ebx
-	shldl	%cl,%edx,%eax
-	shldl	%cl,%ebx,%edx
-	movl	%eax,-16(%edi)
-	movl	%edx,-20(%edi)
-
-	movl	-24(%esi),%eax
-	movl	-28(%esi),%edx
-	shldl	%cl,%eax,%ebx
-	shldl	%cl,%edx,%eax
-	movl	%ebx,-24(%edi)
-	movl	%eax,-28(%edi)
-
-	subl	$32,%esi
-	subl	$32,%edi
-	decl	%ebp
-	jnz	Loop
-
-Lend:	popl	%ebp
-	andl	$7,%ebp
-	jz	Lend2
-Loop2:	movl	(%esi),%eax
-	shldl	%cl,%eax,%edx
-	movl	%edx,(%edi)
-	movl	%eax,%edx
-	subl	$4,%esi
-	subl	$4,%edi
-	decl	%ebp
-	jnz	Loop2
-
-Lend2:	shll	%cl,%edx		/* compute least significant limb */
-	movl	%edx,(%edi)		/* store it */
-
-	popl	%eax			/* pop carry limb */
-
-	popl	%ebp
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-
-/* We loop from least significant end of the arrays, which is only
-   permissable if the source and destination don't overlap, since the
-   function is documented to work for overlapping source and destination.
-*/
-
-Lspecial:
-	movl	(%esi),%edx
-	addl	$4,%esi
-
-	decl	%ebp
-	pushl	%ebp
-	shrl	$3,%ebp
-
-	addl	%edx,%edx
-	incl	%ebp
-	decl	%ebp
-	jz	LLend
-
-	movl	(%edi),%eax		/* fetch destination cache line */
-
-	ALIGN	(2)
-LLoop:	movl	28(%edi),%eax		/* fetch destination cache line */
-	movl	%edx,%ebx
-
-	movl	(%esi),%eax
-	movl	4(%esi),%edx
-	adcl	%eax,%eax
-	movl	%ebx,(%edi)
-	adcl	%edx,%edx
-	movl	%eax,4(%edi)
-
-	movl	8(%esi),%ebx
-	movl	12(%esi),%eax
-	adcl	%ebx,%ebx
-	movl	%edx,8(%edi)
-	adcl	%eax,%eax
-	movl	%ebx,12(%edi)
-
-	movl	16(%esi),%edx
-	movl	20(%esi),%ebx
-	adcl	%edx,%edx
-	movl	%eax,16(%edi)
-	adcl	%ebx,%ebx
-	movl	%edx,20(%edi)
-
-	movl	24(%esi),%eax
-	movl	28(%esi),%edx
-	adcl	%eax,%eax
-	movl	%ebx,24(%edi)
-	adcl	%edx,%edx
-	movl	%eax,28(%edi)
-
-	leal	32(%esi),%esi		/* use leal not to clobber carry */
-	leal	32(%edi),%edi
-	decl	%ebp
-	jnz	LLoop
-
-LLend:	popl	%ebp
-	sbbl	%eax,%eax		/* save carry in %eax */
-	andl	$7,%ebp
-	jz	LLend2
-	addl	%eax,%eax		/* restore carry from eax */
-LLoop2: movl	%edx,%ebx
-	movl	(%esi),%edx
-	adcl	%edx,%edx
-	movl	%ebx,(%edi)
-
-	leal	4(%esi),%esi		/* use leal not to clobber carry */
-	leal	4(%edi),%edi
-	decl	%ebp
-	jnz	LLoop2
-
-	jmp	LL1
-LLend2: addl	%eax,%eax		/* restore carry from eax */
-LL1:	movl	%edx,(%edi)		/* store last limb */
-
-	sbbl	%eax,%eax
-	negl	%eax
-
-	popl	%ebp
-	popl	%ebx
-	popl	%esi
-	popl	%edi
-	ret
-
-
-
 
 /*******************
  * mpi_limb_t
diff --git a/mpi/m68k/distfiles b/mpi/m68k/distfiles
index c69629e7..4c0967b8 100644
--- a/mpi/m68k/distfiles
+++ b/mpi/m68k/distfiles
@@ -1,5 +1,6 @@
 syntax.h
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
 mpih-add1.S
 mpih-sub1.S
 
diff --git a/mpi/m68k/mpih-shift.S b/mpi/m68k/mpih-lshift.S
index 09b1bcd5..af305c84 100644
--- a/mpi/m68k/mpih-shift.S
+++ b/mpi/m68k/mpih-lshift.S
@@ -1,15 +1,15 @@
-/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
+/* mc68020 lshift -- Shift left a low-level natural-number integer.
 
-Copyright (C) 1996 Free Software Foundation, Inc.
+Copyright (C) 1996, 1998 Free Software Foundation, Inc.
 
-This file is part of the GNU MP Library.
+This file is part of GNUPG.
 
-The GNU MP Library is free software; you can redistribute it and/or modify
+GNUPG is free software; you can redistribute it and/or modify
 it under the terms of the GNU Library General Public License as published by
 the Free Software Foundation; either version 2 of the License, or (at your
 option) any later version.
 
-The GNU MP Library is distributed in the hope that it will be useful, but
+GNUPG is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 License for more details.
@@ -17,7 +17,8 @@ License for more details.
 You should have received a copy of the GNU Library General Public License
 along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
 the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-MA 02111-1307, USA. */
+MA 02111-1307, USA.
+*/
 
 #include "sysdep.h"
 #include "asm-syntax.h"
@@ -154,133 +155,4 @@ EPILOG(mpihelp_lshift)
 
 
 
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
- *		   mpi_ptr_t up,	(sp + 8)
- *		   mpi_size_t usize,	(sp + 12)
- *		   unsigned cnt)	(sp + 16)
- */
-
-#define res_ptr a1
-#define s_ptr a0
-#define s_size d6
-#define cnt d4
-
-	TEXT
-	ALIGN
-	GLOBL	C_SYMBOL_NAME(mpihelp_rshift)
-
-C_SYMBOL_NAME(mpihelp_rshift:)
-PROLOG(mpihelp_rshift)
-	/* Save used registers on the stack.  */
-	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
-
-	/* Copy the arguments to registers.  */
-	movel	MEM_DISP(sp,28),R(res_ptr)
-	movel	MEM_DISP(sp,32),R(s_ptr)
-	movel	MEM_DISP(sp,36),R(s_size)
-	movel	MEM_DISP(sp,40),R(cnt)
-
-	moveql	#1,R(d5)
-	cmpl	R(d5),R(cnt)
-	bne	L(Rnormal)
-	cmpl	R(res_ptr),R(s_ptr)
-	bls	L(Rspecial)		/* jump if res_ptr >= s_ptr */
-#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
-	lea	MEM_INDX1(res_ptr,s_size,l,4),R(a2)
-#else /* not mc68020 */
-	movel	R(s_size),R(d0)
-	asll	#2,R(d0)
-	lea	MEM_INDX(res_ptr,d0,l),R(a2)
-#endif
-	cmpl	R(s_ptr),R(a2)
-	bls	L(Rspecial)		/* jump if s_ptr >= res_ptr + s_size */
-
-L(Rnormal:)
-	moveql	#32,R(d5)
-	subl	R(cnt),R(d5)
-	movel	MEM_POSTINC(s_ptr),R(d2)
-	movel	R(d2),R(d0)
-	lsll	R(d5),R(d0)		/* compute carry limb */
-
-	lsrl	R(cnt),R(d2)
-	movel	R(d2),R(d1)
-	subql	#1,R(s_size)
-	beq	L(Rend)
-	lsrl	#1,R(s_size)
-	bcs	L(R1)
-	subql	#1,R(s_size)
-
-L(Roop:)
-	movel	MEM_POSTINC(s_ptr),R(d2)
-	movel	R(d2),R(d3)
-	lsll	R(d5),R(d3)
-	orl	R(d3),R(d1)
-	movel	R(d1),MEM_POSTINC(res_ptr)
-	lsrl	R(cnt),R(d2)
-L(R1:)
-	movel	MEM_POSTINC(s_ptr),R(d1)
-	movel	R(d1),R(d3)
-	lsll	R(d5),R(d3)
-	orl	R(d3),R(d2)
-	movel	R(d2),MEM_POSTINC(res_ptr)
-	lsrl	R(cnt),R(d1)
-
-	dbf	R(s_size),L(Roop)
-	subl	#0x10000,R(s_size)
-	bcc	L(Roop)
-
-L(Rend:)
-	movel	R(d1),MEM(res_ptr) /* store most significant limb */
-
-/* Restore used registers from stack frame.  */
-	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
-	rts
-
-/* We loop from most significant end of the arrays, which is only
-   permissable if the source and destination don't overlap, since the
-   function is documented to work for overlapping source and destination.  */
-
-L(Rspecial:)
-#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
-	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
-	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
-#else /* not mc68000 */
-	movel	R(s_size),R(d0)
-	asll	#2,R(d0)
-	addl	R(s_size),R(s_ptr)
-	addl	R(s_size),R(res_ptr)
-#endif
-
-	clrl	R(d0)			/* initialize carry */
-	eorw	#1,R(s_size)
-	lsrl	#1,R(s_size)
-	bcc	L(LR1)
-	subql	#1,R(s_size)
-
-L(LRoop:)
-	movel	MEM_PREDEC(s_ptr),R(d2)
-	roxrl	#1,R(d2)
-	movel	R(d2),MEM_PREDEC(res_ptr)
-L(LR1:)
-	movel	MEM_PREDEC(s_ptr),R(d2)
-	roxrl	#1,R(d2)
-	movel	R(d2),MEM_PREDEC(res_ptr)
-
-	dbf	R(s_size),L(LRoop)
-	roxrl	#1,R(d0)		/* save cy in msb */
-	subl	#0x10000,R(s_size)
-	bcs	L(LRend)
-	addl	R(d0),R(d0)		/* restore cy */
-	bra	L(LRoop)
-
-L(LRend:)
-/* Restore used registers from stack frame.  */
-	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
-	rts
-EPILOG(mpihelp_rshift)
-
-
-
 
diff --git a/mpi/m68k/mpih-rshift.S b/mpi/m68k/mpih-rshift.S
new file mode 100644
index 00000000..5670888c
--- /dev/null
+++ b/mpi/m68k/mpih-rshift.S
@@ -0,0 +1,156 @@
+/* mc68020 rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of GNUPG.
+
+GNUPG is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+GNUPG is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,	(sp + 4)
+ *		   mpi_ptr_t up,	(sp + 8)
+ *		   mpi_size_t usize,	(sp + 12)
+ *		   unsigned cnt)	(sp + 16)
+ */
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+	TEXT
+	ALIGN
+	GLOBL	C_SYMBOL_NAME(mpihelp_rshift)
+
+C_SYMBOL_NAME(mpihelp_rshift:)
+PROLOG(mpihelp_rshift)
+	/* Save used registers on the stack.  */
+	moveml	R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+	/* Copy the arguments to registers.  */
+	movel	MEM_DISP(sp,28),R(res_ptr)
+	movel	MEM_DISP(sp,32),R(s_ptr)
+	movel	MEM_DISP(sp,36),R(s_size)
+	movel	MEM_DISP(sp,40),R(cnt)
+
+	moveql	#1,R(d5)
+	cmpl	R(d5),R(cnt)
+	bne	L(Rnormal)
+	cmpl	R(res_ptr),R(s_ptr)
+	bls	L(Rspecial)		/* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	lea	MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+	cmpl	R(s_ptr),R(a2)
+	bls	L(Rspecial)		/* jump if s_ptr >= res_ptr + s_size */
+
+L(Rnormal:)
+	moveql	#32,R(d5)
+	subl	R(cnt),R(d5)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d0)
+	lsll	R(d5),R(d0)		/* compute carry limb */
+
+	lsrl	R(cnt),R(d2)
+	movel	R(d2),R(d1)
+	subql	#1,R(s_size)
+	beq	L(Rend)
+	lsrl	#1,R(s_size)
+	bcs	L(R1)
+	subql	#1,R(s_size)
+
+L(Roop:)
+	movel	MEM_POSTINC(s_ptr),R(d2)
+	movel	R(d2),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d1)
+	movel	R(d1),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d2)
+L(R1:)
+	movel	MEM_POSTINC(s_ptr),R(d1)
+	movel	R(d1),R(d3)
+	lsll	R(d5),R(d3)
+	orl	R(d3),R(d2)
+	movel	R(d2),MEM_POSTINC(res_ptr)
+	lsrl	R(cnt),R(d1)
+
+	dbf	R(s_size),L(Roop)
+	subl	#0x10000,R(s_size)
+	bcc	L(Roop)
+
+L(Rend:)
+	movel	R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+
+/* We loop from most significant end of the arrays, which is only
+   permissable if the source and destination don't overlap, since the
+   function is documented to work for overlapping source and destination.  */
+
+L(Rspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+	lea	MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+	lea	MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+	movel	R(s_size),R(d0)
+	asll	#2,R(d0)
+	addl	R(s_size),R(s_ptr)
+	addl	R(s_size),R(res_ptr)
+#endif
+
+	clrl	R(d0)			/* initialize carry */
+	eorw	#1,R(s_size)
+	lsrl	#1,R(s_size)
+	bcc	L(LR1)
+	subql	#1,R(s_size)
+
+L(LRoop:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+L(LR1:)
+	movel	MEM_PREDEC(s_ptr),R(d2)
+	roxrl	#1,R(d2)
+	movel	R(d2),MEM_PREDEC(res_ptr)
+
+	dbf	R(s_size),L(LRoop)
+	roxrl	#1,R(d0)		/* save cy in msb */
+	subl	#0x10000,R(s_size)
+	bcs	L(LRend)
+	addl	R(d0),R(d0)		/* restore cy */
+	bra	L(LRoop)
+
+L(LRend:)
+/* Restore used registers from stack frame.  */
+	moveml	MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+	rts
+EPILOG(mpihelp_rshift)
+
+
+
+
diff --git a/mpi/pa7100/distfiles b/mpi/pa7100/distfiles
index d468fa20..fece9431 100644
--- a/mpi/pa7100/distfiles
+++ b/mpi/pa7100/distfiles
@@ -1,2 +1,3 @@
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
 
diff --git a/mpi/pa7100/mpih-shift.S b/mpi/pa7100/mpih-lshift.S
index 1287f30f..e17e1b70 100644
--- a/mpi/pa7100/mpih-shift.S
+++ b/mpi/pa7100/mpih-lshift.S
@@ -1,4 +1,4 @@
-/* hppa   rshift, lshift
+/* hppa   lshift
  *	  optimized for the PA7100, where is runs at 3.25 cycles/limb
  *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
  *	Copyright (C) 1998 Free Software Foundation, Inc.
@@ -18,14 +18,6 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *	 Actually it's the same code with only minor changes in the
- *	 way the data is stored; this is to support the abstraction
- *	 of an optional secure memory allocation which may be used
- *	 to avoid revealing of sensitive data due to paging etc.
- *	 The GNU MP Library itself is published under the LGPL;
- *	 however I decided to publish this code under the plain GPL.
  */
 
 
@@ -95,64 +87,3 @@ L$0004	vshd		%r22,%r0,%r20
 
 
 
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
- *		   mpi_ptr_t up,       (gr25)
- *		   mpi_size_t usize,   (gr24)
- *		   unsigned cnt)       (gr23)
- */
-
-	.code
-	.export 	mpihelp_rshift
-mpihelp_rshift
-	.proc
-	.callinfo	frame=64,no_calls
-	.entry
-
-	ldws,ma 	4(0,%r25),%r22
-	mtsar		%r23
-	addib,= 	-1,%r24,L$r004
-	vshd		%r22,%r0,%r28		; compute carry out limb
-	ldws,ma 	4(0,%r25),%r29
-	addib,<=	-5,%r24,L$rrest
-	vshd		%r29,%r22,%r20
-
-L$roop	ldws,ma 	4(0,%r25),%r22
-	stws,ma 	%r20,4(0,%r26)
-	vshd		%r22,%r29,%r20
-	ldws,ma 	4(0,%r25),%r29
-	stws,ma 	%r20,4(0,%r26)
-	vshd		%r29,%r22,%r20
-	ldws,ma 	4(0,%r25),%r22
-	stws,ma 	%r20,4(0,%r26)
-	vshd		%r22,%r29,%r20
-	ldws,ma 	4(0,%r25),%r29
-	stws,ma 	%r20,4(0,%r26)
-	addib,> 	-4,%r24,L$roop
-	vshd		%r29,%r22,%r20
-
-L$rrest addib,= 	4,%r24,L$rend1
-	nop
-L$eroop ldws,ma 	4(0,%r25),%r22
-	stws,ma 	%r20,4(0,%r26)
-	addib,<=	-1,%r24,L$rend2
-	vshd		%r22,%r29,%r20
-	ldws,ma 	4(0,%r25),%r29
-	stws,ma 	%r20,4(0,%r26)
-	addib,> 	-1,%r24,L$eroop
-	vshd		%r29,%r22,%r20
-
-L$rend1  stws,ma	 %r20,4(0,%r26)
-	vshd		%r0,%r29,%r20
-	bv		0(%r2)
-	stw		%r20,0(0,%r26)
-L$rend2  stws,ma	 %r20,4(0,%r26)
-L$r004	vshd		%r0,%r22,%r20
-	bv		0(%r2)
-	stw		%r20,0(0,%r26)
-
-	.exit
-	.procend
-
-
diff --git a/mpi/pa7100/mpih-rshift.S b/mpi/pa7100/mpih-rshift.S
new file mode 100644
index 00000000..b409de04
--- /dev/null
+++ b/mpi/pa7100/mpih-rshift.S
@@ -0,0 +1,85 @@
+/* hppa   rshift
+ *	  optimized for the PA7100, where is runs at 3.25 cycles/limb
+ *	Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *	Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp,       (gr26)
+ *		   mpi_ptr_t up,       (gr25)
+ *		   mpi_size_t usize,   (gr24)
+ *		   unsigned cnt)       (gr23)
+ */
+
+	.code
+	.export 	mpihelp_rshift
+mpihelp_rshift
+	.proc
+	.callinfo	frame=64,no_calls
+	.entry
+
+	ldws,ma 	4(0,%r25),%r22
+	mtsar		%r23
+	addib,= 	-1,%r24,L$r004
+	vshd		%r22,%r0,%r28		; compute carry out limb
+	ldws,ma 	4(0,%r25),%r29
+	addib,<=	-5,%r24,L$rrest
+	vshd		%r29,%r22,%r20
+
+L$roop	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r29,%r22,%r20
+	ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,> 	-4,%r24,L$roop
+	vshd		%r29,%r22,%r20
+
+L$rrest addib,= 	4,%r24,L$rend1
+	nop
+L$eroop ldws,ma 	4(0,%r25),%r22
+	stws,ma 	%r20,4(0,%r26)
+	addib,<=	-1,%r24,L$rend2
+	vshd		%r22,%r29,%r20
+	ldws,ma 	4(0,%r25),%r29
+	stws,ma 	%r20,4(0,%r26)
+	addib,> 	-1,%r24,L$eroop
+	vshd		%r29,%r22,%r20
+
+L$rend1  stws,ma	 %r20,4(0,%r26)
+	vshd		%r0,%r29,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+L$rend2  stws,ma	 %r20,4(0,%r26)
+L$r004	vshd		%r0,%r22,%r20
+	bv		0(%r2)
+	stw		%r20,0(0,%r26)
+
+	.exit
+	.procend
+
+
diff --git a/mpi/sparc32/distfiles b/mpi/sparc32/distfiles
index 7933edc8..95ff4288 100644
--- a/mpi/sparc32/distfiles
+++ b/mpi/sparc32/distfiles
@@ -1,4 +1,6 @@
 
+mpih-lshift.S
+mpih-rshift.S
 mpih-add1.S
 udiv.S
 
diff --git a/mpi/sparc32/mpih-add1.S b/mpi/sparc32/mpih-add1.S
index 04315d10..b90d9d60 100644
--- a/mpi/sparc32/mpih-add1.S
+++ b/mpi/sparc32/mpih-add1.S
@@ -1,24 +1,25 @@
-! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
-! sum in a third limb vector.
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Library General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE.	See the GNU Library General Public
-! License for more details.
+/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store
+ *		   sum in a third limb vector.
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
 
-! You should have received a copy of the GNU Library General Public License
-! along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-! MA 02111-1307, USA.
 
 
 /*******************
diff --git a/mpi/sparc32/mpih-lshift.S b/mpi/sparc32/mpih-lshift.S
new file mode 100644
index 00000000..5348a157
--- /dev/null
+++ b/mpi/sparc32/mpih-lshift.S
@@ -0,0 +1,96 @@
+/* sparc lshift
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift):
+	sll	%o2,2,%g1
+	add	%o1,%g1,%o1	! make %o1 point at end of src
+	ld	[%o1-4],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o0,%g1,%o0	! make %o0 point at end of res
+	add	%o2,-1,%o2
+	andcc	%o2,4-1,%g4	! number of limbs in first loop
+	srl	%g2,%o5,%g1	! compute function result
+	be	L0		! if multiple of 4 limbs, skip first loop
+	st	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+Loop0:	ld	[%o1-8],%g3
+	add	%o0,-4,%o0
+	add	%o1,-4,%o1
+	addcc	%g4,-1,%g4
+	sll	%g2,%o3,%o4
+	srl	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	bne	Loop0
+	 st	%o4,[%o0+0]
+
+L0:	tst	%o2
+	be	Lend
+	 nop
+
+Loop:	ld	[%o1-8],%g3
+	add	%o0,-16,%o0
+	addcc	%o2,-4,%o2
+	sll	%g2,%o3,%o4
+	srl	%g3,%o5,%g1
+
+	ld	[%o1-12],%g2
+	sll	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0+12]
+	srl	%g2,%o5,%g1
+
+	ld	[%o1-16],%g3
+	sll	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	st	%g4,[%o0+8]
+	srl	%g3,%o5,%g1
+
+	ld	[%o1-20],%g2
+	sll	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0+4]
+	srl	%g2,%o5,%g1
+
+	add	%o1,-16,%o1
+	or	%g4,%g1,%g4
+	bne	Loop
+	 st	%g4,[%o0+0]
+
+Lend:	sll	%g2,%o3,%g2
+	st	%g2,[%o0-4]
+	retl
+	ld	[%sp+80],%o0
+
diff --git a/mpi/sparc32/mpih-rshift.S b/mpi/sparc32/mpih-rshift.S
new file mode 100644
index 00000000..00996266
--- /dev/null
+++ b/mpi/sparc32/mpih-rshift.S
@@ -0,0 +1,92 @@
+/* sparc rshift
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+! INPUT PARAMETERS
+! res_ptr	%o0
+! src_ptr	%o1
+! size		%o2
+! cnt		%o3
+
+#include "sysdep.h"
+
+	.text
+	.align	4
+	.global C_SYMBOL_NAME(mpohelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift):
+	ld	[%o1],%g2	! load first limb
+	sub	%g0,%o3,%o5	! negate shift count
+	add	%o2,-1,%o2
+	andcc	%o2,4-1,%g4	! number of limbs in first loop
+	sll	%g2,%o5,%g1	! compute function result
+	be	L0		! if multiple of 4 limbs, skip first loop
+	st	%g1,[%sp+80]
+
+	sub	%o2,%g4,%o2	! adjust count for main loop
+
+Loop0:	ld	[%o1+4],%g3
+	add	%o0,4,%o0
+	add	%o1,4,%o1
+	addcc	%g4,-1,%g4
+	srl	%g2,%o3,%o4
+	sll	%g3,%o5,%g1
+	mov	%g3,%g2
+	or	%o4,%g1,%o4
+	bne	Loop0
+	 st	%o4,[%o0-4]
+
+L0:	tst	%o2
+	be	Lend
+	 nop
+
+Loop:	ld	[%o1+4],%g3
+	add	%o0,16,%o0
+	addcc	%o2,-4,%o2
+	srl	%g2,%o3,%o4
+	sll	%g3,%o5,%g1
+
+	ld	[%o1+8],%g2
+	srl	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0-16]
+	sll	%g2,%o5,%g1
+
+	ld	[%o1+12],%g3
+	srl	%g2,%o3,%o4
+	or	%g4,%g1,%g4
+	st	%g4,[%o0-12]
+	sll	%g3,%o5,%g1
+
+	ld	[%o1+16],%g2
+	srl	%g3,%o3,%g4
+	or	%o4,%g1,%o4
+	st	%o4,[%o0-8]
+	sll	%g2,%o5,%g1
+
+	add	%o1,16,%o1
+	or	%g4,%g1,%g4
+	bne	Loop
+	 st	%g4,[%o0-4]
+
+Lend:	srl	%g2,%o3,%g2
+	st	%g2,[%o0-0]
+	retl
+	ld	[%sp+80],%o0
+
author	Werner Koch <wk@gnupg.org>	1998-05-26 13:37:59 +0000
committer	Werner Koch <wk@gnupg.org>	1998-05-26 13:37:59 +0000
commit	525b0bc632ccfef3b5d5b5829a082f5a2b6150ff (patch)
tree	5ca7ba86848678c3198daadc917de67891e81798 /mpi
parent	17e97734857f1032785583d760f19d0f23a431f7 (diff)
download	libgcrypt-525b0bc632ccfef3b5d5b5829a082f5a2b6150ff.tar.gz