summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>1998-02-11 03:25:43 +0000
committerWerner Koch <wk@gnupg.org>1998-02-11 03:25:43 +0000
commit05690451b89ca1df9e81b50f306f0f87ae3b80a6 (patch)
tree69ffa304be307409ece88034418ae3e8d4408a0b
parenta7f3283cc053fff72bb81c230504bbd9d675de1d (diff)
downloadlibgcrypt-05690451b89ca1df9e81b50f306f0f87ae3b80a6.tar.gz
a couple of changes; but some parts are now broken
-rw-r--r--ChangeLog5
-rw-r--r--THANKS26
-rw-r--r--cipher/Makefile.in2
-rw-r--r--cipher/blowfish.c2
-rw-r--r--cipher/elgamal.c28
-rw-r--r--cipher/md5.c13
-rw-r--r--cipher/rmd160.c17
-rw-r--r--cipher/sha1.c22
-rw-r--r--mpi/Makefile.am1
-rw-r--r--mpi/Makefile.in15
-rw-r--r--mpi/config.links6
-rw-r--r--mpi/i586/README26
-rw-r--r--mpi/i586/distfiles8
-rw-r--r--mpi/i586/mpih-add1.S134
-rw-r--r--mpi/i586/mpih-mul1.S89
-rw-r--r--mpi/i586/mpih-mul2.S94
-rw-r--r--mpi/i586/mpih-mul3.S94
-rw-r--r--mpi/i586/mpih-shift.S426
-rw-r--r--mpi/i586/mpih-sub1.S143
-rw-r--r--mpi/mpi-inv.c103
-rw-r--r--mpi/mpi-mpow.c119
21 files changed, 1345 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index e69de29b..1d252659 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -0,0 +1,5 @@
+Tue Feb 10 11:57:23 1998 Werner Koch (wk@frodo)
+
+ * ddd/hhhh:
+
+
diff --git a/THANKS b/THANKS
new file mode 100644
index 00000000..4f53c92f
--- /dev/null
+++ b/THANKS
@@ -0,0 +1,26 @@
+G10 has originally been written by Werner Koch. Other people contributed
+by reporting problems, suggesting various improvements or submitting actual
+code. Here is a list of these people. Help me keeping it complete and
+exempt of errors.
+
+
+Anand Kumria wildfire@progsoc.uts.edu.au
+Daniel Eisenbud eisenbud@cs.swarthmore.edu
+Detlef Lannert lannert@lannert.rz.uni-duesseldorf.de
+Ernst Molitor ernst.molitor@uni-bonn.de
+Hendrik Buschkamp buschkamp@rheumanet.org
+Jens Bachem bachem@rrz.uni-koeln.de
+Peter Gutmann pgut001@cs.auckland.ac.nz
+Ralph Gillen gillen@theochem.uni-duesseldorf.de
+Thomas Roessler roessler@guug.de
+Tomas Fasth tomas.fasth@twinspot.net
+Walter Koch walterk@ddorf.rhein-ruhr.de
+Werner Koch werner.koch@guug.de
+Wim Vandeputte bunbun@reptile.rug.ac.be
+
+
+Thanks to the German Unix User Group for providing FTP space and
+Martin Hamilton for hosting the mailing list.
+
+Many thanks to Gerlinde for having so much patience with me while
+hacking late in the evening.
diff --git a/cipher/Makefile.in b/cipher/Makefile.in
index 9e4860f6..e4ab5eda 100644
--- a/cipher/Makefile.in
+++ b/cipher/Makefile.in
@@ -130,7 +130,7 @@ AR = ar
CFLAGS = @CFLAGS@
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@
-DIST_COMMON = Makefile.am Makefile.in
+DIST_COMMON = ChangeLog Makefile.am Makefile.in
DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
diff --git a/cipher/blowfish.c b/cipher/blowfish.c
index 81e33d08..8e3a4930 100644
--- a/cipher/blowfish.c
+++ b/cipher/blowfish.c
@@ -228,7 +228,7 @@ static const u32 ps[BLOWFISH_ROUNDS+2] = {
-static u32
+static inline u32
function_F( BLOWFISH_context *bc, u32 x )
{
u16 a, b, c, d;
diff --git a/cipher/elgamal.c b/cipher/elgamal.c
index 5e6bd0c8..13b8579f 100644
--- a/cipher/elgamal.c
+++ b/cipher/elgamal.c
@@ -311,25 +311,37 @@ elg_verify(MPI a, MPI b, MPI input, ELG_public_key *pkey )
int rc;
MPI t1;
MPI t2;
+ MPI base[4];
+ MPI exp[4];
if( !(mpi_cmp_ui( a, 0 ) > 0 && mpi_cmp( a, pkey->p ) < 0) )
return 0; /* assertion 0 < a < p failed */
t1 = mpi_alloc( mpi_get_nlimbs(a) );
t2 = mpi_alloc( mpi_get_nlimbs(a) );
- /* t1 = (y^a mod p) * (a^b mod p) mod p
- * fixme: should be calculated by a call which evalutes
- * t1 = y^a * a^b mod p
- * direct.
- */
- mpi_powm( t1, pkey->y, a, pkey->p );
- mpi_powm( t2, a, b, pkey->p );
- mpi_mulm( t1, t1, t2, pkey->p );
+
+ #if 0
+ /* t1 = (y^a mod p) * (a^b mod p) mod p */
+ base[0] = pkey->y; exp[0] = a;
+ base[1] = a; exp[1] = b;
+ base[2] = NULL; exp[2] = NULL;
+ mpi_mulpowm( t1, base, exp, pkey->p );
/* t2 = g ^ input mod p */
mpi_powm( t2, pkey->g, input, pkey->p );
rc = !mpi_cmp( t1, t2 );
+ #else
+ /* t1 = g ^ - input * y ^ a * a ^ b mod p */
+ mpi_invm(t2, pkey->g, pkey->p );
+ base[0] = t2 ; exp[0] = input;
+ base[1] = pkey->y; exp[1] = a;
+ base[2] = a; exp[2] = b;
+ base[3] = NULL; exp[3] = NULL;
+ mpi_mulpowm( t1, base, exp, pkey->p );
+ rc = !mpi_cmp_ui( t1, 1 );
+
+ #endif
mpi_free(t1);
mpi_free(t2);
diff --git a/cipher/md5.c b/cipher/md5.c
index c9f9a86b..ef95c7ef 100644
--- a/cipher/md5.c
+++ b/cipher/md5.c
@@ -93,7 +93,18 @@ static byte PADDING[64] = {
#define I(x, y, z) ((y) ^ ((x) | (~z)))
/* ROTATE_LEFT rotates x left n bits */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+#if defined(__GNUC__) && defined(__i386__)
+static inline u32
+ROTATE_LEFT(u32 x, int n)
+{
+ __asm__("roll %%cl,%0"
+ :"=r" (x)
+ :"0" (x),"c" (n));
+ return x;
+}
+#else
+ #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+#endif
/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */
/* Rotation is separate from addition to prevent recomputation */
diff --git a/cipher/rmd160.c b/cipher/rmd160.c
index 0b501d77..39f1c740 100644
--- a/cipher/rmd160.c
+++ b/cipher/rmd160.c
@@ -151,6 +151,20 @@ rmd160_init( RMD160_CONTEXT *hd )
}
+#if defined(__GNUC__) && defined(__i386__)
+static inline u32
+rol(int n, u32 x)
+{
+ __asm__("roll %%cl,%0"
+ :"=r" (x)
+ :"0" (x),"c" (n));
+ return x;
+}
+#else
+ #define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) )
+#endif
+
+
/****************
* Transform the message X which consists of 16 32-bit-words
*/
@@ -209,9 +223,6 @@ transform( RMD160_CONTEXT *hd, byte *data )
(a) < 64 ? F3((x),(y),(z)) : \
F4((x),(y),(z)) )
-#define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) )
-
-
#ifdef BIG_ENDIAN_HOST
{ int i;
byte *p2, *p1;
diff --git a/cipher/sha1.c b/cipher/sha1.c
index 51029c45..a54ec6a8 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -99,14 +99,30 @@
#define K3 0x8F1BBCDCL /* Rounds 40-59 */
#define K4 0xCA62C1D6L /* Rounds 60-79 */
-#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) )
+
+#if defined(__GNUC__) && defined(__i386__)
+static inline u32
+rol(int n, u32 x)
+{
+ __asm__("roll %%cl,%0"
+ :"=r" (x)
+ :"0" (x),"c" (n));
+ return x;
+}
+#else
+ #define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) )
+#endif
+
+
+
+
#define expand(W,i) ( W[ i & 15 ] = \
- ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \
+ rol( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \
W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) )
#define subRound(a, b, c, d, e, f, k, data) \
- ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) )
+ ( e += rol( 5, a ) + f( b, c, d ) + k + data, b = rol( 30, b ) )
void
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
index 1c32e131..2801a751 100644
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -24,6 +24,7 @@ libmpi_a_SOURCES = longlong.h \
mpi-inv.c \
mpi-mul.c \
mpi-pow.c \
+ mpi-mpow.c \
mpi-scan.c \
mpicoder.c \
mpih-cmp.c \
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index aae7160c..bcbbc641 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -106,6 +106,7 @@ libmpi_a_SOURCES = longlong.h \
mpi-inv.c \
mpi-mul.c \
mpi-pow.c \
+ mpi-mpow.c \
mpi-scan.c \
mpicoder.c \
mpih-cmp.c \
@@ -138,13 +139,13 @@ LIBS = @LIBS@
libmpi_a_DEPENDENCIES = mpih-mul1.o mpih-mul2.o mpih-mul3.o mpih-add1.o \
mpih-sub1.o mpih-shift.o
libmpi_a_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \
-mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpih-cmp.o \
-mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
+mpi-inv.o mpi-mul.o mpi-pow.o mpi-mpow.o mpi-scan.o mpicoder.o \
+mpih-cmp.o mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o
AR = ar
CFLAGS = @CFLAGS@
COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@
-DIST_COMMON = Makefile.am Makefile.in
+DIST_COMMON = ChangeLog Makefile.am Makefile.in
DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
@@ -152,10 +153,10 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST)
TAR = tar
GZIP = --best
DEP_FILES = .deps/mpi-add.P .deps/mpi-bit.P .deps/mpi-cmp.P \
-.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mul.P \
-.deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P .deps/mpih-add.P \
-.deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P .deps/mpih-sub.P \
-.deps/mpiutil.P
+.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mpow.P \
+.deps/mpi-mul.P .deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P \
+.deps/mpih-add.P .deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P \
+.deps/mpih-sub.P .deps/mpiutil.P
SOURCES = $(libmpi_a_SOURCES)
OBJECTS = $(libmpi_a_OBJECTS)
diff --git a/mpi/config.links b/mpi/config.links
index 923e18b0..cf370e05 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -10,7 +10,7 @@ test -d ./mpi || mkdir ./mpi
echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
case "${target}" in
- i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*)
+ i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*)
echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
@@ -20,14 +20,14 @@ case "${target}" in
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i586 i386"
;;
- i[3456]86*-*-*)
+ i[34]86*-*-*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i386"
;;
i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
- cat $srcdir/mpi/i586/syntax.h >>./mpi/asm-syntax.h
+ cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h
path="i586 i386"
;;
alpha*-*-*)
diff --git a/mpi/i586/README b/mpi/i586/README
new file mode 100644
index 00000000..d73b0826
--- /dev/null
+++ b/mpi/i586/README
@@ -0,0 +1,26 @@
+This directory contains mpn functions optimized for Intel Pentium
+processors.
+
+RELEVANT OPTIMIZATION ISSUES
+
+1. Pentium doesn't allocate cache lines on writes, unlike most other modern
+processors. Since the functions in the mpn class do array writes, we have to
+handle allocating the destination cache lines by reading a word from it in the
+loops, to achieve the best performance.
+
+2. Pairing of memory operations requires that the two issued operations refer
+to different cache banks. The simplest way to insure this is to read/write
+two words from the same object. If we make operations on different objects,
+they might or might not be to the same cache bank.
+
+STATUS
+
+1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
+documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
+or 5 cycles/limb asymptotically.
+
+2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop
+overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
+
+3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
+should...
diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles
new file mode 100644
index 00000000..951480fd
--- /dev/null
+++ b/mpi/i586/distfiles
@@ -0,0 +1,8 @@
+mpih-add1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-shift.S
+mpih-sub1.S
+README
+
diff --git a/mpi/i586/mpih-add1.S b/mpi/i586/mpih-add1.S
new file mode 100644
index 00000000..e9883285
--- /dev/null
+++ b/mpi/i586/mpih-add1.S
@@ -0,0 +1,134 @@
+/* i80586 add_n -- Add two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_add_n( mpi_ptr_t res_ptr, (sp + 4)
+ * mpi_ptr_t s1_ptr, (sp + 8)
+ * mpi_ptr_t s2_ptr, (sp + 12)
+ * mpi_size_t size) (sp + 16)
+ */
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_add_n)
+C_SYMBOL_NAME(mpihelp_add_n:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
+ movl 32(%esp),%ecx /* size */
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz Lend
+ pushl %edx
+
+ ALIGN (3)
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ adcl %ebx,%eax
+ movl 12(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ adcl %ebx,%eax
+ movl 20(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %ebx,%eax
+ movl 28(%ebp),%ebx
+ adcl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz Loop
+
+ popl %edx
+Lend:
+ decl %edx /* test %edx w/o clobbering carry */
+ js Lend2
+ incl %edx
+Loop2:
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz Loop2
+Lend2:
+ movl (%esi),%eax
+ adcl %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
diff --git a/mpi/i586/mpih-mul1.S b/mpi/i586/mpih-mul1.S
new file mode 100644
index 00000000..c0bedec0
--- /dev/null
+++ b/mpi/i586/mpih-mul1.S
@@ -0,0 +1,89 @@
+/* i80586 mul_1 -- Multiply a limb vector with a limb and store
+ * the result in a second limb vector.
+ * Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_mul_1( mpi_ptr_t res_ptr, (sp + 4)
+ * mpi_ptr_t s1_ptr, (sp + 8)
+ * mpi_size_t s1_size, (sp + 12)
+ * mpi_limb_t s2_limb) (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(mpihelp_mul_1)
+C_SYMBOL_NAME(mpihelp_mul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
+
diff --git a/mpi/i586/mpih-mul2.S b/mpi/i586/mpih-mul2.S
new file mode 100644
index 00000000..6b564623
--- /dev/null
+++ b/mpi/i586/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* i80586 addmul_1 -- Multiply a limb vector with a limb and add
+ * the result to a second limb vector.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_addmul_1( mpi_ptr_t res_ptr, (sp + 4)
+ * mpi_ptr_t s1_ptr, (sp + 8)
+ * mpi_size_t s1_size, (sp + 12)
+ * mpi_limb_t s2_limb) (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(mpihelp_addmul_1)
+C_SYMBOL_NAME(mpihelp_addmul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(add,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
+
diff --git a/mpi/i586/mpih-mul3.S b/mpi/i586/mpih-mul3.S
new file mode 100644
index 00000000..69b7f467
--- /dev/null
+++ b/mpi/i586/mpih-mul3.S
@@ -0,0 +1,94 @@
+/* i80586 submul_1 -- Multiply a limb vector with a limb and add
+ * the result to a second limb vector.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_submul_1( mpi_ptr_t res_ptr, (sp + 4)
+ * mpi_ptr_t s1_ptr, (sp + 8)
+ * mpi_size_t s1_size, (sp + 12)
+ * mpi_limb_t s2_limb) (sp + 16)
+ */
+
+#define res_ptr edi
+#define s1_ptr esi
+#define size ecx
+#define s2_limb ebp
+
+ TEXT
+ ALIGN (3)
+ GLOBL C_SYMBOL_NAME(mpihelp_submul_1)
+C_SYMBOL_NAME(mpihelp_submul_1:)
+
+ INSN1(push,l ,R(edi))
+ INSN1(push,l ,R(esi))
+ INSN1(push,l ,R(ebx))
+ INSN1(push,l ,R(ebp))
+
+ INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20))
+ INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24))
+ INSN2(mov,l ,R(size),MEM_DISP(esp,28))
+ INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32))
+
+ INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
+ INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
+ INSN1(neg,l ,R(size))
+ INSN2(xor,l ,R(ebx),R(ebx))
+ ALIGN (3)
+
+Loop: INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4))
+
+ INSN1(mul,l ,R(s2_limb))
+
+ INSN2(add,l ,R(eax),R(ebx))
+ INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4))
+
+ INSN2(adc,l ,R(edx),$0)
+ INSN2(sub,l ,R(ebx),R(eax))
+
+ INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx))
+ INSN1(inc,l ,R(size))
+
+ INSN2(mov,l ,R(ebx),R(edx))
+ INSN1(jnz, ,Loop)
+
+ INSN2(adc,l ,R(ebx),$0)
+ INSN2(mov,l ,R(eax),R(ebx))
+ INSN1(pop,l ,R(ebp))
+ INSN1(pop,l ,R(ebx))
+ INSN1(pop,l ,R(esi))
+ INSN1(pop,l ,R(edi))
+ ret
+
diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-shift.S
new file mode 100644
index 00000000..9f156381
--- /dev/null
+++ b/mpi/i586/mpih-shift.S
@@ -0,0 +1,426 @@
+/* i80586 rshift, lshift
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_lshift( mpi_ptr_t wp, (sp + 4)
+ * mpi_ptr_t up, (sp + 8)
+ * mpi_size_t usize, (sp + 12)
+ * unsigned cnt) (sp + 16)
+ */
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_lshift)
+C_SYMBOL_NAME(mpihelp_lshift:)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s_ptr */
+ movl 28(%esp),%ebp /* size */
+ movl 32(%esp),%ecx /* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne Lnormal
+ leal 4(%esi),%eax
+ cmpl %edi,%eax
+ jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */
+ leal (%esi,%ebp,4),%eax
+ cmpl %eax,%edi
+ jnc Lspecial /* jump if res_ptr >= s_ptr + size */
+
+Lnormal:
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+ xorl %eax,%eax
+ shldl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz Lend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+Loop: movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,(%edi)
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ shldl %cl,%ebx,%edx
+ shldl %cl,%eax,%ebx
+ movl %edx,-8(%edi)
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ shldl %cl,%edx,%eax
+ shldl %cl,%ebx,%edx
+ movl %eax,-16(%edi)
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ shldl %cl,%eax,%ebx
+ shldl %cl,%edx,%eax
+ movl %ebx,-24(%edi)
+ movl %eax,-28(%edi)
+
+ subl $32,%esi
+ subl $32,%edi
+ decl %ebp
+ jnz Loop
+
+Lend: popl %ebp
+ andl $7,%ebp
+ jz Lend2
+Loop2: movl (%esi),%eax
+ shldl %cl,%eax,%edx
+ movl %edx,(%edi)
+ movl %eax,%edx
+ subl $4,%esi
+ subl $4,%edi
+ decl %ebp
+ jnz Loop2
+
+Lend2: shll %cl,%edx /* compute least significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+Lspecial:
+ movl (%esi),%edx
+ addl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ addl %edx,%edx
+ incl %ebp
+ decl %ebp
+ jz LLend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+LLoop: movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,(%edi)
+ adcl %edx,%edx
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ adcl %ebx,%ebx
+ movl %edx,8(%edi)
+ adcl %eax,%eax
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ adcl %edx,%edx
+ movl %eax,16(%edi)
+ adcl %ebx,%ebx
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ adcl %eax,%eax
+ movl %ebx,24(%edi)
+ adcl %edx,%edx
+ movl %eax,28(%edi)
+
+ leal 32(%esi),%esi /* use leal not to clobber carry */
+ leal 32(%edi),%edi
+ decl %ebp
+ jnz LLoop
+
+LLend: popl %ebp
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebp
+ jz LLend2
+ addl %eax,%eax /* restore carry from eax */
+LLoop2: movl %edx,%ebx
+ movl (%esi),%edx
+ adcl %edx,%edx
+ movl %ebx,(%edi)
+
+ leal 4(%esi),%esi /* use leal not to clobber carry */
+ leal 4(%edi),%edi
+ decl %ebp
+ jnz LLoop2
+
+ jmp LL1
+LLend2: addl %eax,%eax /* restore carry from eax */
+LL1: movl %edx,(%edi) /* store last limb */
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_rshift( mpi_ptr_t wp, (sp + 4)
+ * mpi_ptr_t up, (sp + 8)
+ * mpi_size_t usize, (sp + 12)
+ * unsigned cnt) (sp + 16)
+ */
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_rshift)
+C_SYMBOL_NAME(mpihelp_rshift:)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s_ptr */
+ movl 28(%esp),%ebp /* size */
+ movl 32(%esp),%ecx /* cnt */
+
+/* We can use faster code for shift-by-1 under certain conditions. */
+ cmp $1,%ecx
+ jne Rnormal
+ leal 4(%edi),%eax
+ cmpl %esi,%eax
+ jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */
+ leal (%edi,%ebp,4),%eax
+ cmpl %eax,%esi
+ jnc Rspecial /* jump if s_ptr >= res_ptr + size */
+
+Rnormal:
+ movl (%esi),%edx
+ addl $4,%esi
+ xorl %eax,%eax
+ shrdl %cl,%edx,%eax /* compute carry limb */
+ pushl %eax /* push carry limb onto stack */
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+ jz Rend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+Roop: movl 28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl 4(%esi),%edx
+ shrdl %cl,%eax,%ebx
+ shrdl %cl,%edx,%eax
+ movl %ebx,(%edi)
+ movl %eax,4(%edi)
+
+ movl 8(%esi),%ebx
+ movl 12(%esi),%eax
+ shrdl %cl,%ebx,%edx
+ shrdl %cl,%eax,%ebx
+ movl %edx,8(%edi)
+ movl %ebx,12(%edi)
+
+ movl 16(%esi),%edx
+ movl 20(%esi),%ebx
+ shrdl %cl,%edx,%eax
+ shrdl %cl,%ebx,%edx
+ movl %eax,16(%edi)
+ movl %edx,20(%edi)
+
+ movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ shrdl %cl,%eax,%ebx
+ shrdl %cl,%edx,%eax
+ movl %ebx,24(%edi)
+ movl %eax,28(%edi)
+
+ addl $32,%esi
+ addl $32,%edi
+ decl %ebp
+ jnz Roop
+
+Rend: popl %ebp
+ andl $7,%ebp
+ jz Rend2
+Roop2: movl (%esi),%eax
+ shrdl %cl,%eax,%edx /* compute result limb */
+ movl %edx,(%edi)
+ movl %eax,%edx
+ addl $4,%esi
+ addl $4,%edi
+ decl %ebp
+ jnz Roop2
+
+Rend2: shrl %cl,%edx /* compute most significant limb */
+ movl %edx,(%edi) /* store it */
+
+ popl %eax /* pop carry limb */
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
+/* We loop from least significant end of the arrays, which is only
+ permissable if the source and destination don't overlap, since the
+ function is documented to work for overlapping source and destination.
+*/
+
+Rspecial:
+ leal -4(%edi,%ebp,4),%edi
+ leal -4(%esi,%ebp,4),%esi
+
+ movl (%esi),%edx
+ subl $4,%esi
+
+ decl %ebp
+ pushl %ebp
+ shrl $3,%ebp
+
+ shrl $1,%edx
+ incl %ebp
+ decl %ebp
+ jz RLend
+
+ movl (%edi),%eax /* fetch destination cache line */
+
+ ALIGN (2)
+RLoop: movl -28(%edi),%eax /* fetch destination cache line */
+ movl %edx,%ebx
+
+ movl (%esi),%eax
+ movl -4(%esi),%edx
+ rcrl $1,%eax
+ movl %ebx,(%edi)
+ rcrl $1,%edx
+ movl %eax,-4(%edi)
+
+ movl -8(%esi),%ebx
+ movl -12(%esi),%eax
+ rcrl $1,%ebx
+ movl %edx,-8(%edi)
+ rcrl $1,%eax
+ movl %ebx,-12(%edi)
+
+ movl -16(%esi),%edx
+ movl -20(%esi),%ebx
+ rcrl $1,%edx
+ movl %eax,-16(%edi)
+ rcrl $1,%ebx
+ movl %edx,-20(%edi)
+
+ movl -24(%esi),%eax
+ movl -28(%esi),%edx
+ rcrl $1,%eax
+ movl %ebx,-24(%edi)
+ rcrl $1,%edx
+ movl %eax,-28(%edi)
+
+ leal -32(%esi),%esi /* use leal not to clobber carry */
+ leal -32(%edi),%edi
+ decl %ebp
+ jnz RLoop
+
+RLend: popl %ebp
+ sbbl %eax,%eax /* save carry in %eax */
+ andl $7,%ebp
+ jz RLend2
+ addl %eax,%eax /* restore carry from eax */
+RLoop2: movl %edx,%ebx
+ movl (%esi),%edx
+ rcrl $1,%edx
+ movl %ebx,(%edi)
+
+ leal -4(%esi),%esi /* use leal not to clobber carry */
+ leal -4(%edi),%edi
+ decl %ebp
+ jnz RLoop2
+
+ jmp RL1
+RLend2: addl %eax,%eax /* restore carry from eax */
+RL1: movl %edx,(%edi) /* store last limb */
+
+ movl $0,%eax
+ rcrl $1,%eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
diff --git a/mpi/i586/mpih-sub1.S b/mpi/i586/mpih-sub1.S
new file mode 100644
index 00000000..1f5c0bfd
--- /dev/null
+++ b/mpi/i586/mpih-sub1.S
@@ -0,0 +1,143 @@
+/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ * Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_sub_n( mpi_ptr_t res_ptr, (sp + 4)
+ * mpi_ptr_t s1_ptr, (sp + 8)
+ * mpi_ptr_t s2_ptr, (sp + 12)
+ * mpi_size_t size) (sp + 16)
+ */
+
+
+.text
+ ALIGN (3)
+ .globl C_SYMBOL_NAME(mpihelp_sub_n)
+C_SYMBOL_NAME(mpihelp_sub_n:)
+
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%edi /* res_ptr */
+ movl 24(%esp),%esi /* s1_ptr */
+ movl 28(%esp),%ebp /* s2_ptr */
+ movl 32(%esp),%ecx /* size */
+
+ movl (%ebp),%ebx
+
+ decl %ecx
+ movl %ecx,%edx
+ shrl $3,%ecx
+ andl $7,%edx
+ testl %ecx,%ecx /* zero carry flag */
+ jz Lend
+ pushl %edx
+
+ ALIGN (3)
+Loop: movl 28(%edi),%eax /* fetch destination cache line */
+ leal 32(%edi),%edi
+
+L1: movl (%esi),%eax
+ movl 4(%esi),%edx
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 8(%ebp),%ebx
+ movl %eax,-32(%edi)
+ movl %edx,-28(%edi)
+
+L2: movl 8(%esi),%eax
+ movl 12(%esi),%edx
+ sbbl %ebx,%eax
+ movl 12(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 16(%ebp),%ebx
+ movl %eax,-24(%edi)
+ movl %edx,-20(%edi)
+
+L3: movl 16(%esi),%eax
+ movl 20(%esi),%edx
+ sbbl %ebx,%eax
+ movl 20(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 24(%ebp),%ebx
+ movl %eax,-16(%edi)
+ movl %edx,-12(%edi)
+
+L4: movl 24(%esi),%eax
+ movl 28(%esi),%edx
+ sbbl %ebx,%eax
+ movl 28(%ebp),%ebx
+ sbbl %ebx,%edx
+ movl 32(%ebp),%ebx
+ movl %eax,-8(%edi)
+ movl %edx,-4(%edi)
+
+ leal 32(%esi),%esi
+ leal 32(%ebp),%ebp
+ decl %ecx
+ jnz Loop
+
+ popl %edx
+Lend:
+ decl %edx /* test %edx w/o clobbering carry */
+ js Lend2
+ incl %edx
+Loop2:
+ leal 4(%edi),%edi
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl 4(%ebp),%ebx
+ movl %eax,-4(%edi)
+ leal 4(%esi),%esi
+ leal 4(%ebp),%ebp
+ decl %edx
+ jnz Loop2
+Lend2:
+ movl (%esi),%eax
+ sbbl %ebx,%eax
+ movl %eax,(%edi)
+
+ sbbl %eax,%eax
+ negl %eax
+
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+
diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c
index 28cde00b..53ef356b 100644
--- a/mpi/mpi-inv.c
+++ b/mpi/mpi-inv.c
@@ -76,7 +76,7 @@ mpi_invm( MPI x, MPI a, MPI n )
mpi_free(t3);
mpi_free(u);
mpi_free(v);
- #else
+ #elif 0
/* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
* modified according to Michael Penk's solution for Exercice 35 */
@@ -156,6 +156,107 @@ mpi_invm( MPI x, MPI a, MPI n )
mpi_free(t1);
mpi_free(t2);
mpi_free(t3);
+ #else
+ /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+ * modified according to Michael Penk's solution for Exercice 35
+ * with further enhancement */
+ MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
+ unsigned k;
+ int sign;
+ int odd ;
+
+ u = mpi_copy(a);
+ v = mpi_copy(n);
+ for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) {
+ mpi_rshift(u, u, 1);
+ mpi_rshift(v, v, 1);
+ }
+ odd = mpi_test_bit(v,0);
+
+ u1 = mpi_alloc_set_ui(1);
+ if( !odd )
+ u2 = mpi_alloc_set_ui(0);
+ u3 = mpi_copy(u);
+ v1 = mpi_copy(v);
+ if( !odd ) {
+ v2 = mpi_alloc( mpi_get_nlimbs(u) );
+ mpi_sub( v2, u1, u ); /* U is used as const 1 */
+ }
+ v3 = mpi_copy(v);
+ if( mpi_test_bit(u, 0) ) { /* u is odd */
+ t1 = mpi_alloc_set_ui(0);
+ if( !odd ) {
+ t2 = mpi_alloc_set_ui(1); t2->sign = 1;
+ }
+ t3 = mpi_copy(v); t3->sign = !t3->sign;
+ goto Y4;
+ }
+ else {
+ t1 = mpi_alloc_set_ui(1);
+ if( !odd )
+ t2 = mpi_alloc_set_ui(0);
+ t3 = mpi_copy(u);
+ }
+ do {
+ do {
+ if( !odd ) {
+ if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */
+ mpi_add(t1, t1, v);
+ mpi_sub(t2, t2, u);
+ }
+ mpi_rshift(t1, t1, 1);
+ mpi_rshift(t2, t2, 1);
+ mpi_rshift(t3, t3, 1);
+ }
+ else {
+ if( mpi_test_bit(t1, 0) )
+ mpi_add(t1, t1, v);
+ mpi_rshift(t1, t1, 1);
+ mpi_rshift(t3, t3, 1);
+ }
+ Y4:
+ } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */
+
+ if( !t3->sign ) {
+ mpi_set(u1, t1);
+ if( !odd )
+ mpi_set(u2, t2);
+ mpi_set(u3, t3);
+ }
+ else {
+ mpi_sub(v1, v, t1);
+ sign = u->sign; u->sign = !u->sign;
+ if( !odd )
+ mpi_sub(v2, u, t2);
+ u->sign = sign;
+ sign = t3->sign; t3->sign = !t3->sign;
+ mpi_set(v3, t3);
+ t3->sign = sign;
+ }
+ mpi_sub(t1, u1, v1);
+ if( !odd )
+ mpi_sub(t2, u2, v2);
+ mpi_sub(t3, u3, v3);
+ if( t1->sign ) {
+ mpi_add(t1, t1, v);
+ if( !odd )
+ mpi_sub(t2, t2, u);
+ }
+ } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
+ /* mpi_lshift( u3, k ); */
+ mpi_set(x, u1);
+
+ mpi_free(u1);
+ mpi_free(v1);
+ mpi_free(t1);
+ if( !odd ) {
+ mpi_free(u2);
+ mpi_free(v2);
+ mpi_free(t2);
+ }
+ mpi_free(u3);
+ mpi_free(v3);
+ mpi_free(t3);
#endif
}
diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c
new file mode 100644
index 00000000..5ac3c639
--- /dev/null
+++ b/mpi/mpi-mpow.c
@@ -0,0 +1,119 @@
+/* mpi-mpow.c - MPI functions
+ * Copyright (c) 1998 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#include <assert.h>
+
+static int
+build_index( MPI *exparray, int k, int i, int t )
+{
+ int j, bitno;
+ int index = 0;
+
+ bitno = t-i;
+ for(j=k-1; j >= 0; j-- ) {
+ index <<= 1;
+ if( mpi_test_bit( exparray[j], bitno ) )
+ index |= 1;
+ }
+ /*log_debug("t=%d i=%d index=%d\n", t, i, index );*/
+ return index;
+}
+
+/****************
+ * RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+void
+mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+ int k; /* number of elements */
+ int t; /* bit size of largest exponent */
+ int i, j, idx;
+ MPI *G; /* table with precomputed values of size 2^k */
+ MPI tmp;
+
+ for(k=0; basearray[k]; k++ )
+ ;
+ assert(k);
+ for(t=0, i=0; (tmp=exparray[i]); i++ ) {
+ /*log_mpidump("exp: ", tmp );*/
+ j = mpi_get_nbits(tmp);
+ if( j > t )
+ t = j;
+ }
+ /*log_mpidump("mod: ", m );*/
+ assert(i==k);
+ assert(t);
+ assert( k < 10 );
+
+ G = m_alloc_clear( (1<<k) * sizeof *G );
+ #if 0
+ /* do the precomputation */
+ G[0] = mpi_alloc_set_ui( 1 );
+ for(i=1; i < (1<<k); i++ ) {
+ for(j=0; j < k; j++ ) {
+ if( (i & (1<<j) ) ) {
+ if( !G[i] )
+ G[i] = mpi_copy( basearray[j] );
+ else
+ mpi_mulm( G[i], G[i], basearray[j], m );
+ }
+ }
+ if( !G[i] )
+ G[i] = mpi_alloc(0);
+ }
+ #endif
+ /* and calculate */
+ tmp = mpi_alloc( mpi_get_nlimbs(m)+1 );
+ mpi_set_ui( res, 1 );
+ for(i = 1; i <= t; i++ ) {
+ mpi_mulm(tmp, res, res, m );
+ idx = build_index( exparray, k, i, t );
+ assert( idx >= 0 && idx < (1<<k) );
+ if( !G[idx] ) {
+ if( !idx )
+ G[0] = mpi_alloc_set_ui( 1 );
+ else {
+ for(j=0; j < k; j++ ) {
+ if( (idx & (1<<j) ) ) {
+ if( !G[idx] )
+ G[idx] = mpi_copy( basearray[j] );
+ else
+ mpi_mulm( G[idx], G[idx], basearray[j], m );
+ }
+ }
+ if( !G[idx] )
+ G[idx] = mpi_alloc(0);
+ }
+ }
+ mpi_mulm(res, tmp, G[idx], m );
+ }
+
+ /* cleanup */
+ m_free(tmp);
+ for(i=0; i < (1<<k); i++ )
+ mpi_free(G[i]);
+ m_free(G);
+}
+