summaryrefslogtreecommitdiff
path: root/mpi
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>1998-05-26 13:37:59 +0000
committerWerner Koch <wk@gnupg.org>1998-05-26 13:37:59 +0000
commit525b0bc632ccfef3b5d5b5829a082f5a2b6150ff (patch)
tree5ca7ba86848678c3198daadc917de67891e81798 /mpi
parent17e97734857f1032785583d760f19d0f23a431f7 (diff)
downloadlibgcrypt-525b0bc632ccfef3b5d5b5829a082f5a2b6150ff.tar.gz
add-key works
Diffstat (limited to 'mpi')
-rw-r--r--mpi/ChangeLog6
-rw-r--r--mpi/Makefile.am5
-rw-r--r--mpi/Makefile.in5
-rw-r--r--mpi/alpha/distfiles7
-rw-r--r--mpi/alpha/mpih-add1.S8
-rw-r--r--mpi/alpha/mpih-lshift.S (renamed from mpi/alpha/mpih-shift.S)93
-rw-r--r--mpi/alpha/mpih-mul1.S89
-rw-r--r--mpi/alpha/mpih-mul2.S96
-rw-r--r--mpi/alpha/mpih-mul3.S94
-rw-r--r--mpi/alpha/mpih-rshift.S120
-rw-r--r--mpi/alpha/mpih-sub1.S123
-rw-r--r--mpi/config.links2
-rw-r--r--mpi/generic/distfiles3
-rw-r--r--mpi/hppa/distfiles3
-rw-r--r--mpi/hppa/mpih-lshift.S (renamed from mpi/hppa/mpih-shift.S)58
-rw-r--r--mpi/hppa/mpih-rshift.S68
-rw-r--r--mpi/i386/distfiles3
-rw-r--r--mpi/i386/mpih-lshift.S96
-rw-r--r--mpi/i386/mpih-rshift.S (renamed from mpi/i386/mpih-shift.S)64
-rw-r--r--mpi/i586/distfiles3
-rw-r--r--mpi/i586/mpih-lshift.S230
-rw-r--r--mpi/i586/mpih-rshift.S (renamed from mpi/i586/mpih-shift.S)199
-rw-r--r--mpi/m68k/distfiles3
-rw-r--r--mpi/m68k/mpih-lshift.S (renamed from mpi/m68k/mpih-shift.S)142
-rw-r--r--mpi/m68k/mpih-rshift.S156
-rw-r--r--mpi/pa7100/distfiles3
-rw-r--r--mpi/pa7100/mpih-lshift.S (renamed from mpi/pa7100/mpih-shift.S)71
-rw-r--r--mpi/pa7100/mpih-rshift.S85
-rw-r--r--mpi/sparc32/distfiles2
-rw-r--r--mpi/sparc32/mpih-add1.S41
-rw-r--r--mpi/sparc32/mpih-lshift.S96
-rw-r--r--mpi/sparc32/mpih-rshift.S92
32 files changed, 1413 insertions, 653 deletions
diff --git a/mpi/ChangeLog b/mpi/ChangeLog
index 86d7413b..15da105f 100644
--- a/mpi/ChangeLog
+++ b/mpi/ChangeLog
@@ -1,3 +1,9 @@
+Mon May 18 13:47:06 1998 Werner Koch (wk@isil.d.shuttle.de)
+
+ * config.links: split mpih-shift into mpih-[lr]shift and
+ changed all implementations.
+ * mpi/alpha: add some new assembler stuff.
+
Wed May 13 11:04:29 1998 Werner Koch (wk@isil.d.shuttle.de)
* config.links: Add support for MIPS
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
index e7a50d28..ead60a68 100644
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -7,7 +7,7 @@ SUFFIXES = .S .s
EXTRA_DIST = config.links
DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S \
- mpih-shift.S mpih-sub1.S asm-syntax.h sysdep.h
+ mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
noinst_LIBRARIES = libmpi.a
@@ -43,7 +43,8 @@ common_asm_objects = mpih-mul1.o \
mpih-mul3.o \
mpih-add1.o \
mpih-sub1.o \
- mpih-shift.o
+ mpih-lshift.o \
+ mpih-rshift.o
libmpi_a_DEPENDENCIES = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
libmpi_a_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index c0db398a..cd14c384 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -98,7 +98,7 @@ SUFFIXES = .S .s
EXTRA_DIST = config.links
DISTCLEANFILES = mpih-add1.S mpih-mul1.S mpih-mul2.S mpih-mul3.S \
- mpih-shift.S mpih-sub1.S asm-syntax.h sysdep.h
+ mpih-lshift.S mpih-rshift.S mpih-sub1.S asm-syntax.h sysdep.h
noinst_LIBRARIES = libmpi.a
# noinst_HEADERS =
@@ -131,7 +131,8 @@ common_asm_objects = mpih-mul1.o \
mpih-mul3.o \
mpih-add1.o \
mpih-sub1.o \
- mpih-shift.o
+ mpih-lshift.o \
+ mpih-rshift.o
libmpi_a_DEPENDENCIES = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
libmpi_a_LIBADD = $(common_asm_objects) @MPI_EXTRA_ASM_OBJS@
diff --git a/mpi/alpha/distfiles b/mpi/alpha/distfiles
index e92d183d..f2ab9fc3 100644
--- a/mpi/alpha/distfiles
+++ b/mpi/alpha/distfiles
@@ -1,6 +1,11 @@
README
mpih-add1.S
-mpih-shift.S
+mpih-sub1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-lshift.S
+mpih-rshift.S
udiv-qrnnd.S
diff --git a/mpi/alpha/mpih-add1.S b/mpi/alpha/mpih-add1.S
index 54cec43f..dc3bcfbb 100644
--- a/mpi/alpha/mpih-add1.S
+++ b/mpi/alpha/mpih-add1.S
@@ -19,14 +19,6 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- * Actually it's the same code with only minor changes in the
- * way the data is stored; this is to support the abstraction
- * of an optional secure memory allocation which may be used
- * to avoid revealing of sensitive data due to paging etc.
- * The GNU MP Library itself is published under the LGPL;
- * however I decided to publish this code under the plain GPL.
*/
diff --git a/mpi/alpha/mpih-shift.S b/mpi/alpha/mpih-lshift.S
index 8bbd10cd..9688588f 100644
--- a/mpi/alpha/mpih-shift.S
+++ b/mpi/alpha/mpih-lshift.S
@@ -1,4 +1,4 @@
-/* alpha rshift, lshift
+/* alpha lshift
* Copyright (C) 1994, 1995 Free Software Foundation, Inc.
* Copyright (C) 1998 Free Software Foundation, Inc.
*
@@ -120,94 +120,3 @@ mpihelp_lshift:
.end mpihelp_lshift
-
-
-
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp, (r16)
- * mpi_ptr_t up, (r17)
- * mpi_size_t usize, (r18)
- * unsigned cnt) (r19)
- *
- * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
- * it would take 4 cycles/limb. It should be possible to get down to 3
- * cycles/limb since both ldq and stq can be paired with the other used
- * instructions. But there are many restrictions in the 21064 pipeline that
- * makes it hard, if not impossible, to get down to 3 cycles/limb:
- *
- * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
- * 2. Only aligned instruction pairs can be paired.
- * 3. The store buffer or silo might not be able to deal with the bandwidth.
- */
-
- .set noreorder
- .set noat
-.text
- .align 3
- .globl mpihelp_rshift
- .ent mpihelp_rshift
-mpihelp_rshift:
- .frame $30,0,$26,0
-
- ldq $4,0($17) # load first limb
- addq $17,8,$17
- subq $31,$19,$7
- subq $18,1,$18
- and $18,4-1,$20 # number of limbs in first loop
- sll $4,$7,$0 # compute function result
-
- beq $20,.R0
- subq $18,$20,$18
-
- .align 3
-.Roop0:
- ldq $3,0($17)
- addq $16,8,$16
- addq $17,8,$17
- subq $20,1,$20
- srl $4,$19,$5
- sll $3,$7,$6
- bis $3,$3,$4
- bis $5,$6,$8
- stq $8,-8($16)
- bne $20,.Roop0
-
-.R0: beq $18,.Rend
-
- .align 3
-.Roop: ldq $3,0($17)
- addq $16,32,$16
- subq $18,4,$18
- srl $4,$19,$5
- sll $3,$7,$6
-
- ldq $4,8($17)
- srl $3,$19,$1
- bis $5,$6,$8
- stq $8,-32($16)
- sll $4,$7,$2
-
- ldq $3,16($17)
- srl $4,$19,$5
- bis $1,$2,$8
- stq $8,-24($16)
- sll $3,$7,$6
-
- ldq $4,24($17)
- srl $3,$19,$1
- bis $5,$6,$8
- stq $8,-16($16)
- sll $4,$7,$2
-
- addq $17,32,$17
- bis $1,$2,$8
- stq $8,-8($16)
-
- bgt $18,.Roop
-
-.Rend: srl $4,$19,$8
- stq $8,0($16)
- ret $31,($26),1
- .end mpihelp_rshift
-
diff --git a/mpi/alpha/mpih-mul1.S b/mpi/alpha/mpih-mul1.S
new file mode 100644
index 00000000..5b24d98d
--- /dev/null
+++ b/mpi/alpha/mpih-mul1.S
@@ -0,0 +1,89 @@
+/* Alpha 21064 mpih-mul1.S -- Multiply a limb vector with a limb and store
+ * the result in a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_size_t s1_size, (r18)
+ * mpi_limb_t s2_limb) (r19)
+ *
+ * This code runs at 42 cycles/limb on the EV4 and 18 cycles/limb on the EV5.
+ *
+ * To improve performance for long multiplications, we would use
+ * 'fetch' for S1 and 'fetch_m' for RES. It's not obvious how to use
+ * these instructions without slowing down the general code: 1. We can
+ * only have two prefetches in operation at any time in the Alpha
+ * architecture. 2. There will seldom be any special alignment
+ * between RES_PTR and S1_PTR. Maybe we can simply divide the current
+ * loop into an inner and outer loop, having the inner loop handle
+ * exactly one prefetch block?
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_mul_1
+ .ent mpihelp_mul_1 2
+mpihelp_mul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ bic $31,$31,$4 # clear cy_limb
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,Lend1 # jump if size was == 1
+ ldq $2,8($17) # $2 = s1_limb
+ subq $18,1,$18 # size--
+ stq $3,0($16)
+ beq $18,Lend2 # jump if size was == 2
+
+ .align 3
+Loop: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,16($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ stq $3,8($16)
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $16,8,$16 # res_ptr++
+ bne $18,Loop
+
+Lend2: mulq $2,$19,$3 # $3 = prod_low
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ stq $3,8($16)
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+Lend1: stq $3,0($16)
+ ret $31,($26),1
+
+ .end mpihelp_mul_1
+
+
diff --git a/mpi/alpha/mpih-mul2.S b/mpi/alpha/mpih-mul2.S
new file mode 100644
index 00000000..0c8d361c
--- /dev/null
+++ b/mpi/alpha/mpih-mul2.S
@@ -0,0 +1,96 @@
+/* Alpha 21064 addmul_1 -- Multiply a limb vector with a limb and add
+ * the result to a second limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_size_t s1_size, (r18)
+ * mpi_limb_t s2_limb) (r19)
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_addmul_1
+ .ent mpihelp_addmul_1 2
+mpihelp_addmul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ addq $5,$3,$3
+ cmpult $3,$5,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: addq $5,$3,$3
+ cmpult $3,$5,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end mpihelp_addmul_1
+
diff --git a/mpi/alpha/mpih-mul3.S b/mpi/alpha/mpih-mul3.S
new file mode 100644
index 00000000..bdf16b57
--- /dev/null
+++ b/mpi/alpha/mpih-mul3.S
@@ -0,0 +1,94 @@
+/* Alpha 21064 submul_1 -- Multiply a limb vector with a limb and
+ * subtract the result from a second limb vector.
+ * Copyright (C) 1992, 1994, 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr, (r16 )
+ * mpi_ptr_t s1_ptr, (r17 )
+ * mpi_size_t s1_size, (r18 )
+ * mpi_limb_t s2_limb) (r19 )
+ *
+ * This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5.
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_submul_1
+ .ent mpihelp_submul_1 2
+mpihelp_submul_1:
+ .frame $30,0,$26
+
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ umulh $2,$19,$0 # $0 = prod_high
+ beq $18,.Lend1 # jump if size was == 1
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ subq $18,1,$18 # size--
+ subq $5,$3,$3
+ cmpult $5,$3,$4
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ beq $18,.Lend2 # jump if size was == 2
+
+ .align 3
+.Loop: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ subq $18,1,$18 # size--
+ umulh $2,$19,$4 # $4 = cy_limb
+ ldq $2,0($17) # $2 = s1_limb
+ addq $17,8,$17 # s1_ptr++
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $16,8,$16 # res_ptr++
+ addq $5,$0,$0 # combine carries
+ bne $18,.Loop
+
+.Lend2: mulq $2,$19,$3 # $3 = prod_low
+ ldq $5,0($16) # $5 = *res_ptr
+ addq $4,$0,$0 # cy_limb = cy_limb + 'cy'
+ umulh $2,$19,$4 # $4 = cy_limb
+ addq $3,$0,$3 # $3 = cy_limb + prod_low
+ cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low)
+ subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $5,$0,$0 # combine carries
+ addq $4,$0,$0 # cy_limb = prod_high + cy
+ ret $31,($26),1
+.Lend1: subq $5,$3,$3
+ cmpult $5,$3,$5
+ stq $3,0($16)
+ addq $0,$5,$0
+ ret $31,($26),1
+
+ .end mpihelp_submul_1
+
diff --git a/mpi/alpha/mpih-rshift.S b/mpi/alpha/mpih-rshift.S
new file mode 100644
index 00000000..e93315ae
--- /dev/null
+++ b/mpi/alpha/mpih-rshift.S
@@ -0,0 +1,120 @@
+/* alpha rshift
+ * Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (r16)
+ * mpi_ptr_t up, (r17)
+ * mpi_size_t usize, (r18)
+ * unsigned cnt) (r19)
+ *
+ * This code runs at 4.8 cycles/limb on the 21064. With infinite unrolling,
+ * it would take 4 cycles/limb. It should be possible to get down to 3
+ * cycles/limb since both ldq and stq can be paired with the other used
+ * instructions. But there are many restrictions in the 21064 pipeline that
+ * makes it hard, if not impossible, to get down to 3 cycles/limb:
+ *
+ * 1. ldq has a 3 cycle delay, srl and sll have a 2 cycle delay.
+ * 2. Only aligned instruction pairs can be paired.
+ * 3. The store buffer or silo might not be able to deal with the bandwidth.
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_rshift
+ .ent mpihelp_rshift
+mpihelp_rshift:
+ .frame $30,0,$26,0
+
+ ldq $4,0($17) # load first limb
+ addq $17,8,$17
+ subq $31,$19,$7
+ subq $18,1,$18
+ and $18,4-1,$20 # number of limbs in first loop
+ sll $4,$7,$0 # compute function result
+
+ beq $20,.R0
+ subq $18,$20,$18
+
+ .align 3
+.Roop0:
+ ldq $3,0($17)
+ addq $16,8,$16
+ addq $17,8,$17
+ subq $20,1,$20
+ srl $4,$19,$5
+ sll $3,$7,$6
+ bis $3,$3,$4
+ bis $5,$6,$8
+ stq $8,-8($16)
+ bne $20,.Roop0
+
+.R0: beq $18,.Rend
+
+ .align 3
+.Roop: ldq $3,0($17)
+ addq $16,32,$16
+ subq $18,4,$18
+ srl $4,$19,$5
+ sll $3,$7,$6
+
+ ldq $4,8($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-32($16)
+ sll $4,$7,$2
+
+ ldq $3,16($17)
+ srl $4,$19,$5
+ bis $1,$2,$8
+ stq $8,-24($16)
+ sll $3,$7,$6
+
+ ldq $4,24($17)
+ srl $3,$19,$1
+ bis $5,$6,$8
+ stq $8,-16($16)
+ sll $4,$7,$2
+
+ addq $17,32,$17
+ bis $1,$2,$8
+ stq $8,-8($16)
+
+ bgt $18,.Roop
+
+.Rend: srl $4,$19,$8
+ stq $8,0($16)
+ ret $31,($26),1
+ .end mpihelp_rshift
+
diff --git a/mpi/alpha/mpih-sub1.S b/mpi/alpha/mpih-sub1.S
new file mode 100644
index 00000000..bf614309
--- /dev/null
+++ b/mpi/alpha/mpih-sub1.S
@@ -0,0 +1,123 @@
+/* Alpha sub_n -- Subtract two limb vectors of the same length > 0 and
+ * store difference in a third limb vector.
+ * Copyright (C) 1995, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_sub_n( mpi_ptr_t res_ptr, (r16)
+ * mpi_ptr_t s1_ptr, (r17)
+ * mpi_ptr_t s2_ptr, (r18)
+ * mpi_size_t size) (r19)
+ */
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl mpihelp_sub_n
+ .ent mpihelp_sub_n
+mpihelp_sub_n:
+ .frame $30,0,$26,0
+
+ ldq $3,0($17)
+ ldq $4,0($18)
+
+ subq $19,1,$19
+ and $19,4-1,$2 # number of limbs in first loop
+ bis $31,$31,$0
+ beq $2,.L0 # if multiple of 4 limbs, skip first loop
+
+ subq $19,$2,$19
+
+.Loop0: subq $2,1,$2
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ addq $17,8,$17
+ addq $18,8,$18
+ bis $5,$5,$3
+ bis $6,$6,$4
+ addq $16,8,$16
+ bne $2,.Loop0
+
+.L0: beq $19,.Lend
+
+ .align 3
+.Loop: subq $19,4,$19
+
+ ldq $5,8($17)
+ addq $4,$0,$4
+ ldq $6,8($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+
+ ldq $3,16($17)
+ addq $6,$0,$6
+ ldq $4,16($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,8($16)
+ or $0,$1,$0
+
+ ldq $5,24($17)
+ addq $4,$0,$4
+ ldq $6,24($18)
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,16($16)
+ or $0,$1,$0
+
+ ldq $3,32($17)
+ addq $6,$0,$6
+ ldq $4,32($18)
+ cmpult $6,$0,$1
+ subq $5,$6,$6
+ cmpult $5,$6,$0
+ stq $6,24($16)
+ or $0,$1,$0
+
+ addq $17,32,$17
+ addq $18,32,$18
+ addq $16,32,$16
+ bne $19,.Loop
+
+.Lend: addq $4,$0,$4
+ cmpult $4,$0,$1
+ subq $3,$4,$4
+ cmpult $3,$4,$0
+ stq $4,0($16)
+ or $0,$1,$0
+ ret $31,($26),1
+
+ .end mpihelp_sub_n
+
+
diff --git a/mpi/config.links b/mpi/config.links
index 699ca858..bd525abc 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -150,7 +150,7 @@ fi
# fixme: grep these modules from Makefile.in
mpi_ln_modules="${mpi_extra_modules} mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \
- mpih-shift mpih-sub1"
+ mpih-lshift mpih-rshift mpih-sub1"
mpi_ln_objects=
mpi_ln_src=
diff --git a/mpi/generic/distfiles b/mpi/generic/distfiles
index 1febb49d..02a1ca4c 100644
--- a/mpi/generic/distfiles
+++ b/mpi/generic/distfiles
@@ -2,6 +2,7 @@ mpih-add1.c
mpih-mul1.c
mpih-mul2.c
mpih-mul3.c
-mpih-shift.c
+mpih-lshift.c
+mpih-rshift.c
mpih-sub1.c
diff --git a/mpi/hppa/distfiles b/mpi/hppa/distfiles
index 83c59ea6..7f24205d 100644
--- a/mpi/hppa/distfiles
+++ b/mpi/hppa/distfiles
@@ -2,5 +2,6 @@ README
udiv-qrnnd.S
mpih-add1.S
mpih-sub1.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
diff --git a/mpi/hppa/mpih-shift.S b/mpi/hppa/mpih-lshift.S
index 153fbd7f..ada09f59 100644
--- a/mpi/hppa/mpih-shift.S
+++ b/mpi/hppa/mpih-lshift.S
@@ -1,6 +1,6 @@
-/* hppa rshift, lshift
- * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
- * Copyright (C) 1998 Free Software Foundation, Inc.
+/* hppa lshift
+*
+ * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc.
*
* This file is part of GNUPG.
*
@@ -17,14 +17,6 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- * Actually it's the same code with only minor changes in the
- * way the data is stored; this is to support the abstraction
- * of an optional secure memory allocation which may be used
- * to avoid revealing of sensitive data due to paging etc.
- * The GNU MP Library itself is published under the LGPL;
- * however I decided to publish this code under the plain GPL.
*/
@@ -78,47 +70,3 @@ L$0004 vshd %r22,%r0,%r20
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp, (gr26)
- * mpi_ptr_t up, (gr25)
- * mpi_size_t usize, (gr24)
- * unsigned cnt) (gr23)
- */
-
- .code
- .export mpihelp_rshift
-mpihelp_rshift
- .proc
- .callinfo frame=64,no_calls
- .entry
-
- ldws,ma 4(0,%r25),%r22
- mtsar %r23
- addib,= -1,%r24,L$r004
- vshd %r22,%r0,%r28 ; compute carry out limb
- ldws,ma 4(0,%r25),%r29
- addib,= -1,%r24,L$r002
- vshd %r29,%r22,%r20
-
-L$roop ldws,ma 4(0,%r25),%r22
- stws,ma %r20,4(0,%r26)
- addib,= -1,%r24,L$r003
- vshd %r22,%r29,%r20
- ldws,ma 4(0,%r25),%r29
- stws,ma %r20,4(0,%r26)
- addib,<> -1,%r24,L$roop
- vshd %r29,%r22,%r20
-
-L$r002 stws,ma %r20,4(0,%r26)
- vshd %r0,%r29,%r20
- bv 0(%r2)
- stw %r20,0(0,%r26)
-L$r003 stws,ma %r20,4(0,%r26)
-L$r004 vshd %r0,%r22,%r20
- bv 0(%r2)
- stw %r20,0(0,%r26)
-
- .exit
- .procend
-
diff --git a/mpi/hppa/mpih-rshift.S b/mpi/hppa/mpih-rshift.S
new file mode 100644
index 00000000..0299d2e2
--- /dev/null
+++ b/mpi/hppa/mpih-rshift.S
@@ -0,0 +1,68 @@
+/* hppa rshift
+*
+ * Copyright (C) 1992, 1994, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (gr26)
+ * mpi_ptr_t up, (gr25)
+ * mpi_size_t usize, (gr24)
+ * unsigned cnt) (gr23)
+ */
+
+ .code
+ .export mpihelp_rshift
+mpihelp_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$r004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,= -1,%r24,L$r002
+ vshd %r29,%r22,%r20
+
+L$roop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,= -1,%r24,L$r003
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,<> -1,%r24,L$roop
+ vshd %r29,%r22,%r20
+
+L$r002 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$r003 stws,ma %r20,4(0,%r26)
+L$r004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
+
diff --git a/mpi/i386/distfiles b/mpi/i386/distfiles
index 34de9157..88d2a30c 100644
--- a/mpi/i386/distfiles
+++ b/mpi/i386/distfiles
@@ -2,7 +2,8 @@ mpih-add1.S
mpih-mul1.S
mpih-mul2.S
mpih-mul3.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
mpih-sub1.S
syntax.h
diff --git a/mpi/i386/mpih-lshift.S b/mpi/i386/mpih-lshift.S
new file mode 100644
index 00000000..a84f6b1f
--- /dev/null
+++ b/mpi/i386/mpih-lshift.S
@@ -0,0 +1,96 @@
+/* i80386 lshift
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
+ * mpi_ptr_t up, (sp + 8)
+ * mpi_size_t usize, (sp + 12)
+ * unsigned cnt) (sp + 16)
+ */
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ movl 16(%esp),%edi /* res_ptr */
+ movl 20(%esp),%esi /* s_ptr */
+ movl 24(%esp),%edx /* size */
+ movl 28(%esp),%ecx /* cnt */
+
+ subl $4,%esi /* adjust s_ptr */
+
+ movl (%esi,%edx,4),%ebx /* read most significant limb */
+ xorl %eax,%eax
+ shldl %cl,%ebx,%eax /* compute carry limb */
+ decl %edx
+ jz Lend
+ pushl %eax /* push carry limb onto stack */
+ testb $1,%edx
+ jnz L1 /* enter loop in the middle */
+ movl %ebx,%eax
+
+ ALIGN (3)
+Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */
+ shldl %cl,%ebx,%eax /* compute result limb */
+ movl %eax,(%edi,%edx,4) /* store it */
+ decl %edx
+L1: movl (%esi,%edx,4),%eax
+ shldl %cl,%eax,%ebx
+ movl %ebx,(%edi,%edx,4)
+ decl %edx
+ jnz Loop
+
+ shll %cl,%eax /* compute least significant limb */
+ movl %eax,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+Lend: shll %cl,%ebx /* compute least significant limb */
+ movl %ebx,(%edi) /* store it */
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
diff --git a/mpi/i386/mpih-shift.S b/mpi/i386/mpih-rshift.S
index f10f268e..3dd9caf7 100644
--- a/mpi/i386/mpih-shift.S
+++ b/mpi/i386/mpih-rshift.S
@@ -1,4 +1,4 @@
-/* i80386 rshift, lshift
+/* i80386 rshift
* Copyright (C) 1998 Free Software Foundation, Inc.
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
*
@@ -34,68 +34,6 @@
/*******************
* mpi_limb_t
- * mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
- * mpi_ptr_t up, (sp + 8)
- * mpi_size_t usize, (sp + 12)
- * unsigned cnt) (sp + 16)
- */
-
-.text
- ALIGN (3)
- .globl C_SYMBOL_NAME(mpihelp_lshift)
-C_SYMBOL_NAME(mpihelp_lshift:)
- pushl %edi
- pushl %esi
- pushl %ebx
-
- movl 16(%esp),%edi /* res_ptr */
- movl 20(%esp),%esi /* s_ptr */
- movl 24(%esp),%edx /* size */
- movl 28(%esp),%ecx /* cnt */
-
- subl $4,%esi /* adjust s_ptr */
-
- movl (%esi,%edx,4),%ebx /* read most significant limb */
- xorl %eax,%eax
- shldl %cl,%ebx,%eax /* compute carry limb */
- decl %edx
- jz Lend
- pushl %eax /* push carry limb onto stack */
- testb $1,%edx
- jnz L1 /* enter loop in the middle */
- movl %ebx,%eax
-
- ALIGN (3)
-Loop: movl (%esi,%edx,4),%ebx /* load next lower limb */
- shldl %cl,%ebx,%eax /* compute result limb */
- movl %eax,(%edi,%edx,4) /* store it */
- decl %edx
-L1: movl (%esi,%edx,4),%eax
- shldl %cl,%eax,%ebx
- movl %ebx,(%edi,%edx,4)
- decl %edx
- jnz Loop
-
- shll %cl,%eax /* compute least significant limb */
- movl %eax,(%edi) /* store it */
-
- popl %eax /* pop carry limb */
-
- popl %ebx
- popl %esi
- popl %edi
- ret
-
-Lend: shll %cl,%ebx /* compute least significant limb */
- movl %ebx,(%edi) /* store it */
-
- popl %ebx
- popl %esi
- popl %edi
- ret
-
-/*******************
- * mpi_limb_t
* mpihelp_rshift( mpi_ptr_t wp, (sp + 4)
* mpi_ptr_t up, (sp + 8)
* mpi_size_t usize, (sp + 12)
diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles
index 951480fd..8f821fbf 100644
--- a/mpi/i586/distfiles
+++ b/mpi/i586/distfiles
@@ -2,7 +2,8 @@ mpih-add1.S
mpih-mul1.S
mpih-mul2.S
mpih-mul3.S
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
mpih-sub1.S
README
diff --git a/mpi/i586/mpih-lshift.S b/mpi/i586/mpih-lshift.S
new file mode 100644
index 00000000..9e0cb029
--- /dev/null
+++ b/mpi/i586/mpih-lshift.S
@@ -0,0 +1,230 @@
+/* i80586 lshift
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
+ * mpi_ptr_t up, (sp + 8)
+ * mpi_size_t usize, (sp + 12)
+ * unsigned cnt) (sp + 16)
+ */
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s_ptr */
+ movl 28(%esp),%ebp /* size */
+ movl 32(%esp),%ecx /* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne Lnormal
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
+ leal (%esi,%ebp,4),%eax
+ cmpl %eax,%edi
+ jnc Lspecial /* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz Lend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+Loop: movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ shldl %cl,%ebx,%edx
+ shldl %cl,%eax,%ebx
+ movl %edx,-8(%edi)
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ shldl %cl,%edx,%eax
+ shldl %cl,%ebx,%edx
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebp
+ jnz Loop
+
+Lend: popl %ebp
+ andl $7,%ebp
+ jz Lend2
+Loop2: movl (%esi),%eax
+ shldl %cl,%eax,%edx
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebp
+ jnz Loop2
+
+Lend2: shll %cl,%edx /* compute least significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ addl %edx,%edx
+ incl %ebp
+ decl %ebp
+ jz LLend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+LLoop: movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ adcl %ebx,%ebx
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebx,%ebx
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi /* use leal not to clobber carry */
+ leal 32(%edi),%edi
+ decl %ebp
+ jnz LLoop
+
+LLend: popl %ebp
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebp
+ jz LLend2
+ addl %eax,%eax /* restore carry from eax */
+LLoop2: movl %edx,%ebx
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebx,(%edi)
+
+ leal 4(%esi),%esi /* use leal not to clobber carry */
+ leal 4(%edi),%edi
+ decl %ebp
+ jnz LLoop2
+
+ jmp LL1
+LLend2: addl %eax,%eax /* restore carry from eax */
+LL1: movl %edx,(%edi) /* store last limb */
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-rshift.S
index 07d5fbc8..aec26e64 100644
--- a/mpi/i586/mpih-shift.S
+++ b/mpi/i586/mpih-rshift.S
@@ -1,4 +1,4 @@
-/* i80586 rshift, lshift
+/* i80586 rshift
* Copyright (C) 1998 Free Software Foundation, Inc.
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
*
@@ -32,203 +32,6 @@
#include "asm-syntax.h"
-/*******************
- * mpi_limb_t
- * mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
- * mpi_ptr_t up, (sp + 8)
- * mpi_size_t usize, (sp + 12)
- * unsigned cnt) (sp + 16)
- */
-
-.text
- ALIGN (3)
- .globl C_SYMBOL_NAME(mpihelp_lshift)
-C_SYMBOL_NAME(mpihelp_lshift:)
-
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
-
- movl 20(%esp),%edi /* res_ptr */
- movl 24(%esp),%esi /* s_ptr */
- movl 28(%esp),%ebp /* size */
- movl 32(%esp),%ecx /* cnt */
-
-/* We can use faster code for shift-by-1 under certain conditions. */
- cmp $1,%ecx
- jne Lnormal
- leal 4(%esi),%eax
- cmpl %edi,%eax
- jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
- leal (%esi,%ebp,4),%eax
- cmpl %eax,%edi
- jnc Lspecial /* jump if res_ptr >= s_ptr + size */
-
-Lnormal:
- leal -4(%edi,%ebp,4),%edi
- leal -4(%esi,%ebp,4),%esi
-
- movl (%esi),%edx
- subl $4,%esi
- xorl %eax,%eax
- shldl %cl,%edx,%eax /* compute carry limb */
- pushl %eax /* push carry limb onto stack */
-
- decl %ebp
- pushl %ebp
- shrl $3,%ebp
- jz Lend
-
- movl (%edi),%eax /* fetch destination cache line */
-
- ALIGN (2)
-Loop: movl -28(%edi),%eax /* fetch destination cache line */
- movl %edx,%ebx
-
- movl (%esi),%eax
- movl -4(%esi),%edx
- shldl %cl,%eax,%ebx
- shldl %cl,%edx,%eax
- movl %ebx,(%edi)
- movl %eax,-4(%edi)
-
- movl -8(%esi),%ebx
- movl -12(%esi),%eax
- shldl %cl,%ebx,%edx
- shldl %cl,%eax,%ebx
- movl %edx,-8(%edi)
- movl %ebx,-12(%edi)
-
- movl -16(%esi),%edx
- movl -20(%esi),%ebx
- shldl %cl,%edx,%eax
- shldl %cl,%ebx,%edx
- movl %eax,-16(%edi)
- movl %edx,-20(%edi)
-
- movl -24(%esi),%eax
- movl -28(%esi),%edx
- shldl %cl,%eax,%ebx
- shldl %cl,%edx,%eax
- movl %ebx,-24(%edi)
- movl %eax,-28(%edi)
-
- subl $32,%esi
- subl $32,%edi
- decl %ebp
- jnz Loop
-
-Lend: popl %ebp
- andl $7,%ebp
- jz Lend2
-Loop2: movl (%esi),%eax
- shldl %cl,%eax,%edx
- movl %edx,(%edi)
- movl %eax,%edx
- subl $4,%esi
- subl $4,%edi
- decl %ebp
- jnz Loop2
-
-Lend2: shll %cl,%edx /* compute least significant limb */
- movl %edx,(%edi) /* store it */
-
- popl %eax /* pop carry limb */
-
- popl %ebp
- popl %ebx
- popl %esi
- popl %edi
- ret
-
-/* We loop from least significant end of the arrays, which is only
- permissable if the source and destination don't overlap, since the
- function is documented to work for overlapping source and destination.
-*/
-
-Lspecial:
- movl (%esi),%edx
- addl $4,%esi
-
- decl %ebp
- pushl %ebp
- shrl $3,%ebp
-
- addl %edx,%edx
- incl %ebp
- decl %ebp
- jz LLend
-
- movl (%edi),%eax /* fetch destination cache line */
-
- ALIGN (2)
-LLoop: movl 28(%edi),%eax /* fetch destination cache line */
- movl %edx,%ebx
-
- movl (%esi),%eax
- movl 4(%esi),%edx
- adcl %eax,%eax
- movl %ebx,(%edi)
- adcl %edx,%edx
- movl %eax,4(%edi)
-
- movl 8(%esi),%ebx
- movl 12(%esi),%eax
- adcl %ebx,%ebx
- movl %edx,8(%edi)
- adcl %eax,%eax
- movl %ebx,12(%edi)
-
- movl 16(%esi),%edx
- movl 20(%esi),%ebx
- adcl %edx,%edx
- movl %eax,16(%edi)
- adcl %ebx,%ebx
- movl %edx,20(%edi)
-
- movl 24(%esi),%eax
- movl 28(%esi),%edx
- adcl %eax,%eax
- movl %ebx,24(%edi)
- adcl %edx,%edx
- movl %eax,28(%edi)
-
- leal 32(%esi),%esi /* use leal not to clobber carry */
- leal 32(%edi),%edi
- decl %ebp
- jnz LLoop
-
-LLend: popl %ebp
- sbbl %eax,%eax /* save carry in %eax */
- andl $7,%ebp
- jz LLend2
- addl %eax,%eax /* restore carry from eax */
-LLoop2: movl %edx,%ebx
- movl (%esi),%edx
- adcl %edx,%edx
- movl %ebx,(%edi)
-
- leal 4(%esi),%esi /* use leal not to clobber carry */
- leal 4(%edi),%edi
- decl %ebp
- jnz LLoop2
-
- jmp LL1
-LLend2: addl %eax,%eax /* restore carry from eax */
-LL1: movl %edx,(%edi) /* store last limb */
-
- sbbl %eax,%eax
- negl %eax
-
- popl %ebp
- popl %ebx
- popl %esi
- popl %edi
- ret
-
-
-
/*******************
* mpi_limb_t
diff --git a/mpi/m68k/distfiles b/mpi/m68k/distfiles
index c69629e7..4c0967b8 100644
--- a/mpi/m68k/distfiles
+++ b/mpi/m68k/distfiles
@@ -1,5 +1,6 @@
syntax.h
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
mpih-add1.S
mpih-sub1.S
diff --git a/mpi/m68k/mpih-shift.S b/mpi/m68k/mpih-lshift.S
index 09b1bcd5..af305c84 100644
--- a/mpi/m68k/mpih-shift.S
+++ b/mpi/m68k/mpih-lshift.S
@@ -1,15 +1,15 @@
-/* mc68020 __mpn_lshift -- Shift left a low-level natural-number integer.
+/* mc68020 lshift -- Shift left a low-level natural-number integer.
-Copyright (C) 1996 Free Software Foundation, Inc.
+Copyright (C) 1996, 1998 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of GNUPG.
-The GNU MP Library is free software; you can redistribute it and/or modify
+GNUPG is free software; you can redistribute it and/or modify
it under the terms of the GNU Library General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at your
option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
+GNUPG is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
License for more details.
@@ -17,7 +17,8 @@ License for more details.
You should have received a copy of the GNU Library General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-MA 02111-1307, USA. */
+MA 02111-1307, USA.
+*/
#include "sysdep.h"
#include "asm-syntax.h"
@@ -154,133 +155,4 @@ EPILOG(mpihelp_lshift)
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp, (sp + 4)
- * mpi_ptr_t up, (sp + 8)
- * mpi_size_t usize, (sp + 12)
- * unsigned cnt) (sp + 16)
- */
-
-#define res_ptr a1
-#define s_ptr a0
-#define s_size d6
-#define cnt d4
-
- TEXT
- ALIGN
- GLOBL C_SYMBOL_NAME(mpihelp_rshift)
-
-C_SYMBOL_NAME(mpihelp_rshift:)
-PROLOG(mpihelp_rshift)
- /* Save used registers on the stack. */
- moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
-
- /* Copy the arguments to registers. */
- movel MEM_DISP(sp,28),R(res_ptr)
- movel MEM_DISP(sp,32),R(s_ptr)
- movel MEM_DISP(sp,36),R(s_size)
- movel MEM_DISP(sp,40),R(cnt)
-
- moveql #1,R(d5)
- cmpl R(d5),R(cnt)
- bne L(Rnormal)
- cmpl R(res_ptr),R(s_ptr)
- bls L(Rspecial) /* jump if res_ptr >= s_ptr */
-#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
- lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
-#else /* not mc68020 */
- movel R(s_size),R(d0)
- asll #2,R(d0)
- lea MEM_INDX(res_ptr,d0,l),R(a2)
-#endif
- cmpl R(s_ptr),R(a2)
- bls L(Rspecial) /* jump if s_ptr >= res_ptr + s_size */
-
-L(Rnormal:)
- moveql #32,R(d5)
- subl R(cnt),R(d5)
- movel MEM_POSTINC(s_ptr),R(d2)
- movel R(d2),R(d0)
- lsll R(d5),R(d0) /* compute carry limb */
-
- lsrl R(cnt),R(d2)
- movel R(d2),R(d1)
- subql #1,R(s_size)
- beq L(Rend)
- lsrl #1,R(s_size)
- bcs L(R1)
- subql #1,R(s_size)
-
-L(Roop:)
- movel MEM_POSTINC(s_ptr),R(d2)
- movel R(d2),R(d3)
- lsll R(d5),R(d3)
- orl R(d3),R(d1)
- movel R(d1),MEM_POSTINC(res_ptr)
- lsrl R(cnt),R(d2)
-L(R1:)
- movel MEM_POSTINC(s_ptr),R(d1)
- movel R(d1),R(d3)
- lsll R(d5),R(d3)
- orl R(d3),R(d2)
- movel R(d2),MEM_POSTINC(res_ptr)
- lsrl R(cnt),R(d1)
-
- dbf R(s_size),L(Roop)
- subl #0x10000,R(s_size)
- bcc L(Roop)
-
-L(Rend:)
- movel R(d1),MEM(res_ptr) /* store most significant limb */
-
-/* Restore used registers from stack frame. */
- moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
- rts
-
-/* We loop from most significant end of the arrays, which is only
- permissable if the source and destination don't overlap, since the
- function is documented to work for overlapping source and destination. */
-
-L(Rspecial:)
-#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
- lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
- lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
-#else /* not mc68000 */
- movel R(s_size),R(d0)
- asll #2,R(d0)
- addl R(s_size),R(s_ptr)
- addl R(s_size),R(res_ptr)
-#endif
-
- clrl R(d0) /* initialize carry */
- eorw #1,R(s_size)
- lsrl #1,R(s_size)
- bcc L(LR1)
- subql #1,R(s_size)
-
-L(LRoop:)
- movel MEM_PREDEC(s_ptr),R(d2)
- roxrl #1,R(d2)
- movel R(d2),MEM_PREDEC(res_ptr)
-L(LR1:)
- movel MEM_PREDEC(s_ptr),R(d2)
- roxrl #1,R(d2)
- movel R(d2),MEM_PREDEC(res_ptr)
-
- dbf R(s_size),L(LRoop)
- roxrl #1,R(d0) /* save cy in msb */
- subl #0x10000,R(s_size)
- bcs L(LRend)
- addl R(d0),R(d0) /* restore cy */
- bra L(LRoop)
-
-L(LRend:)
-/* Restore used registers from stack frame. */
- moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
- rts
-EPILOG(mpihelp_rshift)
-
-
-
diff --git a/mpi/m68k/mpih-rshift.S b/mpi/m68k/mpih-rshift.S
new file mode 100644
index 00000000..5670888c
--- /dev/null
+++ b/mpi/m68k/mpih-rshift.S
@@ -0,0 +1,156 @@
+/* mc68020 rshift -- Shift right a low-level natural-number integer.
+
+Copyright (C) 1996, 1998 Free Software Foundation, Inc.
+
+This file is part of GNUPG.
+
+GNUPG is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+GNUPG is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with the GNU MP Library; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA.
+*/
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (sp + 4)
+ * mpi_ptr_t up, (sp + 8)
+ * mpi_size_t usize, (sp + 12)
+ * unsigned cnt) (sp + 16)
+ */
+
+#define res_ptr a1
+#define s_ptr a0
+#define s_size d6
+#define cnt d4
+
+ TEXT
+ ALIGN
+ GLOBL C_SYMBOL_NAME(mpihelp_rshift)
+
+C_SYMBOL_NAME(mpihelp_rshift:)
+PROLOG(mpihelp_rshift)
+ /* Save used registers on the stack. */
+ moveml R(d2)-R(d6)/R(a2),MEM_PREDEC(sp)
+
+ /* Copy the arguments to registers. */
+ movel MEM_DISP(sp,28),R(res_ptr)
+ movel MEM_DISP(sp,32),R(s_ptr)
+ movel MEM_DISP(sp,36),R(s_size)
+ movel MEM_DISP(sp,40),R(cnt)
+
+ moveql #1,R(d5)
+ cmpl R(d5),R(cnt)
+ bne L(Rnormal)
+ cmpl R(res_ptr),R(s_ptr)
+ bls L(Rspecial) /* jump if res_ptr >= s_ptr */
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(a2)
+#else /* not mc68020 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ lea MEM_INDX(res_ptr,d0,l),R(a2)
+#endif
+ cmpl R(s_ptr),R(a2)
+ bls L(Rspecial) /* jump if s_ptr >= res_ptr + s_size */
+
+L(Rnormal:)
+ moveql #32,R(d5)
+ subl R(cnt),R(d5)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d0)
+ lsll R(d5),R(d0) /* compute carry limb */
+
+ lsrl R(cnt),R(d2)
+ movel R(d2),R(d1)
+ subql #1,R(s_size)
+ beq L(Rend)
+ lsrl #1,R(s_size)
+ bcs L(R1)
+ subql #1,R(s_size)
+
+L(Roop:)
+ movel MEM_POSTINC(s_ptr),R(d2)
+ movel R(d2),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d1)
+ movel R(d1),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d2)
+L(R1:)
+ movel MEM_POSTINC(s_ptr),R(d1)
+ movel R(d1),R(d3)
+ lsll R(d5),R(d3)
+ orl R(d3),R(d2)
+ movel R(d2),MEM_POSTINC(res_ptr)
+ lsrl R(cnt),R(d1)
+
+ dbf R(s_size),L(Roop)
+ subl #0x10000,R(s_size)
+ bcc L(Roop)
+
+L(Rend:)
+ movel R(d1),MEM(res_ptr) /* store most significant limb */
+
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+
+/* We loop from most significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination. */
+
+L(Rspecial:)
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+ lea MEM_INDX1(s_ptr,s_size,l,4),R(s_ptr)
+ lea MEM_INDX1(res_ptr,s_size,l,4),R(res_ptr)
+#else /* not mc68000 */
+ movel R(s_size),R(d0)
+ asll #2,R(d0)
+ addl R(s_size),R(s_ptr)
+ addl R(s_size),R(res_ptr)
+#endif
+
+ clrl R(d0) /* initialize carry */
+ eorw #1,R(s_size)
+ lsrl #1,R(s_size)
+ bcc L(LR1)
+ subql #1,R(s_size)
+
+L(LRoop:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+L(LR1:)
+ movel MEM_PREDEC(s_ptr),R(d2)
+ roxrl #1,R(d2)
+ movel R(d2),MEM_PREDEC(res_ptr)
+
+ dbf R(s_size),L(LRoop)
+ roxrl #1,R(d0) /* save cy in msb */
+ subl #0x10000,R(s_size)
+ bcs L(LRend)
+ addl R(d0),R(d0) /* restore cy */
+ bra L(LRoop)
+
+L(LRend:)
+/* Restore used registers from stack frame. */
+ moveml MEM_POSTINC(sp),R(d2)-R(d6)/R(a2)
+ rts
+EPILOG(mpihelp_rshift)
+
+
+
+
diff --git a/mpi/pa7100/distfiles b/mpi/pa7100/distfiles
index d468fa20..fece9431 100644
--- a/mpi/pa7100/distfiles
+++ b/mpi/pa7100/distfiles
@@ -1,2 +1,3 @@
-mpih-shift.S
+mpih-lshift.S
+mpih-rshift.S
diff --git a/mpi/pa7100/mpih-shift.S b/mpi/pa7100/mpih-lshift.S
index 1287f30f..e17e1b70 100644
--- a/mpi/pa7100/mpih-shift.S
+++ b/mpi/pa7100/mpih-lshift.S
@@ -1,4 +1,4 @@
-/* hppa rshift, lshift
+/* hppa lshift
* optimized for the PA7100, where is runs at 3.25 cycles/limb
* Copyright (C) 1992, 1994 Free Software Foundation, Inc.
* Copyright (C) 1998 Free Software Foundation, Inc.
@@ -18,14 +18,6 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- * Actually it's the same code with only minor changes in the
- * way the data is stored; this is to support the abstraction
- * of an optional secure memory allocation which may be used
- * to avoid revealing of sensitive data due to paging etc.
- * The GNU MP Library itself is published under the LGPL;
- * however I decided to publish this code under the plain GPL.
*/
@@ -95,64 +87,3 @@ L$0004 vshd %r22,%r0,%r20
-/*******************
- * mpi_limb_t
- * mpihelp_rshift( mpi_ptr_t wp, (gr26)
- * mpi_ptr_t up, (gr25)
- * mpi_size_t usize, (gr24)
- * unsigned cnt) (gr23)
- */
-
- .code
- .export mpihelp_rshift
-mpihelp_rshift
- .proc
- .callinfo frame=64,no_calls
- .entry
-
- ldws,ma 4(0,%r25),%r22
- mtsar %r23
- addib,= -1,%r24,L$r004
- vshd %r22,%r0,%r28 ; compute carry out limb
- ldws,ma 4(0,%r25),%r29
- addib,<= -5,%r24,L$rrest
- vshd %r29,%r22,%r20
-
-L$roop ldws,ma 4(0,%r25),%r22
- stws,ma %r20,4(0,%r26)
- vshd %r22,%r29,%r20
- ldws,ma 4(0,%r25),%r29
- stws,ma %r20,4(0,%r26)
- vshd %r29,%r22,%r20
- ldws,ma 4(0,%r25),%r22
- stws,ma %r20,4(0,%r26)
- vshd %r22,%r29,%r20
- ldws,ma 4(0,%r25),%r29
- stws,ma %r20,4(0,%r26)
- addib,> -4,%r24,L$roop
- vshd %r29,%r22,%r20
-
-L$rrest addib,= 4,%r24,L$rend1
- nop
-L$eroop ldws,ma 4(0,%r25),%r22
- stws,ma %r20,4(0,%r26)
- addib,<= -1,%r24,L$rend2
- vshd %r22,%r29,%r20
- ldws,ma 4(0,%r25),%r29
- stws,ma %r20,4(0,%r26)
- addib,> -1,%r24,L$eroop
- vshd %r29,%r22,%r20
-
-L$rend1 stws,ma %r20,4(0,%r26)
- vshd %r0,%r29,%r20
- bv 0(%r2)
- stw %r20,0(0,%r26)
-L$rend2 stws,ma %r20,4(0,%r26)
-L$r004 vshd %r0,%r22,%r20
- bv 0(%r2)
- stw %r20,0(0,%r26)
-
- .exit
- .procend
-
-
diff --git a/mpi/pa7100/mpih-rshift.S b/mpi/pa7100/mpih-rshift.S
new file mode 100644
index 00000000..b409de04
--- /dev/null
+++ b/mpi/pa7100/mpih-rshift.S
@@ -0,0 +1,85 @@
+/* hppa rshift
+ * optimized for the PA7100, where is runs at 3.25 cycles/limb
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ * Copyright (C) 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (gr26)
+ * mpi_ptr_t up, (gr25)
+ * mpi_size_t usize, (gr24)
+ * unsigned cnt) (gr23)
+ */
+
+ .code
+ .export mpihelp_rshift
+mpihelp_rshift
+ .proc
+ .callinfo frame=64,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r22
+ mtsar %r23
+ addib,= -1,%r24,L$r004
+ vshd %r22,%r0,%r28 ; compute carry out limb
+ ldws,ma 4(0,%r25),%r29
+ addib,<= -5,%r24,L$rrest
+ vshd %r29,%r22,%r20
+
+L$roop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ vshd %r29,%r22,%r20
+ ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -4,%r24,L$roop
+ vshd %r29,%r22,%r20
+
+L$rrest addib,= 4,%r24,L$rend1
+ nop
+L$eroop ldws,ma 4(0,%r25),%r22
+ stws,ma %r20,4(0,%r26)
+ addib,<= -1,%r24,L$rend2
+ vshd %r22,%r29,%r20
+ ldws,ma 4(0,%r25),%r29
+ stws,ma %r20,4(0,%r26)
+ addib,> -1,%r24,L$eroop
+ vshd %r29,%r22,%r20
+
+L$rend1 stws,ma %r20,4(0,%r26)
+ vshd %r0,%r29,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+L$rend2 stws,ma %r20,4(0,%r26)
+L$r004 vshd %r0,%r22,%r20
+ bv 0(%r2)
+ stw %r20,0(0,%r26)
+
+ .exit
+ .procend
+
+
diff --git a/mpi/sparc32/distfiles b/mpi/sparc32/distfiles
index 7933edc8..95ff4288 100644
--- a/mpi/sparc32/distfiles
+++ b/mpi/sparc32/distfiles
@@ -1,4 +1,6 @@
+mpih-lshift.S
+mpih-rshift.S
mpih-add1.S
udiv.S
diff --git a/mpi/sparc32/mpih-add1.S b/mpi/sparc32/mpih-add1.S
index 04315d10..b90d9d60 100644
--- a/mpi/sparc32/mpih-add1.S
+++ b/mpi/sparc32/mpih-add1.S
@@ -1,24 +1,25 @@
-! SPARC __mpn_add_n -- Add two limb vectors of the same length > 0 and store
-! sum in a third limb vector.
-
-! Copyright (C) 1995, 1996 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Library General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
-! License for more details.
+/* SPARC _add_n -- Add two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
-! You should have received a copy of the GNU Library General Public License
-! along with the GNU MP Library; see the file COPYING.LIB. If not, write to
-! the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
-! MA 02111-1307, USA.
/*******************
diff --git a/mpi/sparc32/mpih-lshift.S b/mpi/sparc32/mpih-lshift.S
new file mode 100644
index 00000000..5348a157
--- /dev/null
+++ b/mpi/sparc32/mpih-lshift.S
@@ -0,0 +1,96 @@
+/* sparc lshift
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift):
+ sll %o2,2,%g1
+ add %o1,%g1,%o1 ! make %o1 point at end of src
+ ld [%o1-4],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o0,%g1,%o0 ! make %o0 point at end of res
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ srl %g2,%o5,%g1 ! compute function result
+ be L0 ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+Loop0: ld [%o1-8],%g3
+ add %o0,-4,%o0
+ add %o1,-4,%o1
+ addcc %g4,-1,%g4
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne Loop0
+ st %o4,[%o0+0]
+
+L0: tst %o2
+ be Lend
+ nop
+
+Loop: ld [%o1-8],%g3
+ add %o0,-16,%o0
+ addcc %o2,-4,%o2
+ sll %g2,%o3,%o4
+ srl %g3,%o5,%g1
+
+ ld [%o1-12],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+12]
+ srl %g2,%o5,%g1
+
+ ld [%o1-16],%g3
+ sll %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0+8]
+ srl %g3,%o5,%g1
+
+ ld [%o1-20],%g2
+ sll %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0+4]
+ srl %g2,%o5,%g1
+
+ add %o1,-16,%o1
+ or %g4,%g1,%g4
+ bne Loop
+ st %g4,[%o0+0]
+
+Lend: sll %g2,%o3,%g2
+ st %g2,[%o0-4]
+ retl
+ ld [%sp+80],%o0
+
diff --git a/mpi/sparc32/mpih-rshift.S b/mpi/sparc32/mpih-rshift.S
new file mode 100644
index 00000000..00996266
--- /dev/null
+++ b/mpi/sparc32/mpih-rshift.S
@@ -0,0 +1,92 @@
+/* sparc rshift
+ *
+ * Copyright (C) 1995, 1996, 1998 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG.
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+! INPUT PARAMETERS
+! res_ptr %o0
+! src_ptr %o1
+! size %o2
+! cnt %o3
+
+#include "sysdep.h"
+
+ .text
+ .align 4
+ .global C_SYMBOL_NAME(mpohelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift):
+ ld [%o1],%g2 ! load first limb
+ sub %g0,%o3,%o5 ! negate shift count
+ add %o2,-1,%o2
+ andcc %o2,4-1,%g4 ! number of limbs in first loop
+ sll %g2,%o5,%g1 ! compute function result
+ be L0 ! if multiple of 4 limbs, skip first loop
+ st %g1,[%sp+80]
+
+ sub %o2,%g4,%o2 ! adjust count for main loop
+
+Loop0: ld [%o1+4],%g3
+ add %o0,4,%o0
+ add %o1,4,%o1
+ addcc %g4,-1,%g4
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+ mov %g3,%g2
+ or %o4,%g1,%o4
+ bne Loop0
+ st %o4,[%o0-4]
+
+L0: tst %o2
+ be Lend
+ nop
+
+Loop: ld [%o1+4],%g3
+ add %o0,16,%o0
+ addcc %o2,-4,%o2
+ srl %g2,%o3,%o4
+ sll %g3,%o5,%g1
+
+ ld [%o1+8],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-16]
+ sll %g2,%o5,%g1
+
+ ld [%o1+12],%g3
+ srl %g2,%o3,%o4
+ or %g4,%g1,%g4
+ st %g4,[%o0-12]
+ sll %g3,%o5,%g1
+
+ ld [%o1+16],%g2
+ srl %g3,%o3,%g4
+ or %o4,%g1,%o4
+ st %o4,[%o0-8]
+ sll %g2,%o5,%g1
+
+ add %o1,16,%o1
+ or %g4,%g1,%g4
+ bne Loop
+ st %g4,[%o0-4]
+
+Lend: srl %g2,%o3,%g2
+ st %g2,[%o0-0]
+ retl
+ ld [%sp+80],%o0
+