diff options
author | Werner Koch <wk@gnupg.org> | 1998-02-11 03:25:43 +0000 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 1998-02-11 03:25:43 +0000 |
commit | 05690451b89ca1df9e81b50f306f0f87ae3b80a6 (patch) | |
tree | 69ffa304be307409ece88034418ae3e8d4408a0b | |
parent | a7f3283cc053fff72bb81c230504bbd9d675de1d (diff) | |
download | libgcrypt-05690451b89ca1df9e81b50f306f0f87ae3b80a6.tar.gz |
a couple of changes; but some parts are now broken
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | THANKS | 26 | ||||
-rw-r--r-- | cipher/Makefile.in | 2 | ||||
-rw-r--r-- | cipher/blowfish.c | 2 | ||||
-rw-r--r-- | cipher/elgamal.c | 28 | ||||
-rw-r--r-- | cipher/md5.c | 13 | ||||
-rw-r--r-- | cipher/rmd160.c | 17 | ||||
-rw-r--r-- | cipher/sha1.c | 22 | ||||
-rw-r--r-- | mpi/Makefile.am | 1 | ||||
-rw-r--r-- | mpi/Makefile.in | 15 | ||||
-rw-r--r-- | mpi/config.links | 6 | ||||
-rw-r--r-- | mpi/i586/README | 26 | ||||
-rw-r--r-- | mpi/i586/distfiles | 8 | ||||
-rw-r--r-- | mpi/i586/mpih-add1.S | 134 | ||||
-rw-r--r-- | mpi/i586/mpih-mul1.S | 89 | ||||
-rw-r--r-- | mpi/i586/mpih-mul2.S | 94 | ||||
-rw-r--r-- | mpi/i586/mpih-mul3.S | 94 | ||||
-rw-r--r-- | mpi/i586/mpih-shift.S | 426 | ||||
-rw-r--r-- | mpi/i586/mpih-sub1.S | 143 | ||||
-rw-r--r-- | mpi/mpi-inv.c | 103 | ||||
-rw-r--r-- | mpi/mpi-mpow.c | 119 |
21 files changed, 1345 insertions, 28 deletions
@@ -0,0 +1,5 @@ +Tue Feb 10 11:57:23 1998 Werner Koch (wk@frodo) + + * ddd/hhhh: + + @@ -0,0 +1,26 @@ +G10 has originally been written by Werner Koch. Other people contributed +by reporting problems, suggesting various improvements or submitting actual +code. Here is a list of these people. Help me keeping it complete and +exempt of errors. + + +Anand Kumria wildfire@progsoc.uts.edu.au +Daniel Eisenbud eisenbud@cs.swarthmore.edu +Detlef Lannert lannert@lannert.rz.uni-duesseldorf.de +Ernst Molitor ernst.molitor@uni-bonn.de +Hendrik Buschkamp buschkamp@rheumanet.org +Jens Bachem bachem@rrz.uni-koeln.de +Peter Gutmann pgut001@cs.auckland.ac.nz +Ralph Gillen gillen@theochem.uni-duesseldorf.de +Thomas Roessler roessler@guug.de +Tomas Fasth tomas.fasth@twinspot.net +Walter Koch walterk@ddorf.rhein-ruhr.de +Werner Koch werner.koch@guug.de +Wim Vandeputte bunbun@reptile.rug.ac.be + + +Thanks to the German Unix User Group for providing FTP space and +Martin Hamilton for hosting the mailing list. + +Many thanks to Gerlinde for having so much patience with me while +hacking late in the evening. diff --git a/cipher/Makefile.in b/cipher/Makefile.in index 9e4860f6..e4ab5eda 100644 --- a/cipher/Makefile.in +++ b/cipher/Makefile.in @@ -130,7 +130,7 @@ AR = ar CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = Makefile.am Makefile.in +DIST_COMMON = ChangeLog Makefile.am Makefile.in DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) diff --git a/cipher/blowfish.c b/cipher/blowfish.c index 81e33d08..8e3a4930 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -228,7 +228,7 @@ static const u32 ps[BLOWFISH_ROUNDS+2] = { -static u32 +static inline u32 function_F( BLOWFISH_context *bc, u32 x ) { u16 a, b, c, d; diff --git a/cipher/elgamal.c b/cipher/elgamal.c index 5e6bd0c8..13b8579f 100644 --- a/cipher/elgamal.c +++ b/cipher/elgamal.c @@ -311,25 +311,37 @@ elg_verify(MPI a, MPI b, MPI input, ELG_public_key *pkey ) int rc; MPI t1; MPI t2; + MPI base[4]; + MPI exp[4]; if( !(mpi_cmp_ui( a, 0 ) > 0 && mpi_cmp( a, pkey->p ) < 0) ) return 0; /* assertion 0 < a < p failed */ t1 = mpi_alloc( mpi_get_nlimbs(a) ); t2 = mpi_alloc( mpi_get_nlimbs(a) ); - /* t1 = (y^a mod p) * (a^b mod p) mod p - * fixme: should be calculated by a call which evalutes - * t1 = y^a * a^b mod p - * direct. - */ - mpi_powm( t1, pkey->y, a, pkey->p ); - mpi_powm( t2, a, b, pkey->p ); - mpi_mulm( t1, t1, t2, pkey->p ); + + #if 0 + /* t1 = (y^a mod p) * (a^b mod p) mod p */ + base[0] = pkey->y; exp[0] = a; + base[1] = a; exp[1] = b; + base[2] = NULL; exp[2] = NULL; + mpi_mulpowm( t1, base, exp, pkey->p ); /* t2 = g ^ input mod p */ mpi_powm( t2, pkey->g, input, pkey->p ); rc = !mpi_cmp( t1, t2 ); + #else + /* t1 = g ^ - input * y ^ a * a ^ b mod p */ + mpi_invm(t2, pkey->g, pkey->p ); + base[0] = t2 ; exp[0] = input; + base[1] = pkey->y; exp[1] = a; + base[2] = a; exp[2] = b; + base[3] = NULL; exp[3] = NULL; + mpi_mulpowm( t1, base, exp, pkey->p ); + rc = !mpi_cmp_ui( t1, 1 ); + + #endif mpi_free(t1); mpi_free(t2); diff --git a/cipher/md5.c b/cipher/md5.c index c9f9a86b..ef95c7ef 100644 --- a/cipher/md5.c +++ b/cipher/md5.c @@ -93,7 +93,18 @@ static byte PADDING[64] = { #define I(x, y, z) ((y) ^ ((x) | (~z))) /* ROTATE_LEFT rotates x left n bits */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) +#if defined(__GNUC__) && defined(__i386__) +static inline u32 +ROTATE_LEFT(u32 x, int n) +{ + __asm__("roll %%cl,%0" + :"=r" (x) + :"0" (x),"c" (n)); + return x; +} +#else + #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) +#endif /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ /* Rotation is separate from addition to prevent recomputation */ diff --git a/cipher/rmd160.c b/cipher/rmd160.c index 0b501d77..39f1c740 100644 --- a/cipher/rmd160.c +++ b/cipher/rmd160.c @@ -151,6 +151,20 @@ rmd160_init( RMD160_CONTEXT *hd ) } +#if defined(__GNUC__) && defined(__i386__) +static inline u32 +rol(int n, u32 x) +{ + __asm__("roll %%cl,%0" + :"=r" (x) + :"0" (x),"c" (n)); + return x; +} +#else + #define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) ) +#endif + + /**************** * Transform the message X which consists of 16 32-bit-words */ @@ -209,9 +223,6 @@ transform( RMD160_CONTEXT *hd, byte *data ) (a) < 64 ? F3((x),(y),(z)) : \ F4((x),(y),(z)) ) -#define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) ) - - #ifdef BIG_ENDIAN_HOST { int i; byte *p2, *p1; diff --git a/cipher/sha1.c b/cipher/sha1.c index 51029c45..a54ec6a8 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -99,14 +99,30 @@ #define K3 0x8F1BBCDCL /* Rounds 40-59 */ #define K4 0xCA62C1D6L /* Rounds 60-79 */ -#define ROTL(n,X) ( ( ( X ) << n ) | ( ( X ) >> ( 32 - n ) ) ) + +#if defined(__GNUC__) && defined(__i386__) +static inline u32 +rol(int n, u32 x) +{ + __asm__("roll %%cl,%0" + :"=r" (x) + :"0" (x),"c" (n)); + return x; +} +#else + #define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) ) +#endif + + + + #define expand(W,i) ( W[ i & 15 ] = \ - ROTL( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \ + rol( 1, ( W[ i & 15 ] ^ W[ (i - 14) & 15 ] ^ \ W[ (i - 8) & 15 ] ^ W[ (i - 3) & 15 ] ) ) ) #define subRound(a, b, c, d, e, f, k, data) \ - ( e += ROTL( 5, a ) + f( b, c, d ) + k + data, b = ROTL( 30, b ) ) + ( e += rol( 5, a ) + f( b, c, d ) + k + data, b = rol( 30, b ) ) void diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 1c32e131..2801a751 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -24,6 +24,7 @@ libmpi_a_SOURCES = longlong.h \ mpi-inv.c \ mpi-mul.c \ mpi-pow.c \ + mpi-mpow.c \ mpi-scan.c \ mpicoder.c \ mpih-cmp.c \ diff --git a/mpi/Makefile.in b/mpi/Makefile.in index aae7160c..bcbbc641 100644 --- a/mpi/Makefile.in +++ b/mpi/Makefile.in @@ -106,6 +106,7 @@ libmpi_a_SOURCES = longlong.h \ mpi-inv.c \ mpi-mul.c \ mpi-pow.c \ + mpi-mpow.c \ mpi-scan.c \ mpicoder.c \ mpih-cmp.c \ @@ -138,13 +139,13 @@ LIBS = @LIBS@ libmpi_a_DEPENDENCIES = mpih-mul1.o mpih-mul2.o mpih-mul3.o mpih-add1.o \ mpih-sub1.o mpih-shift.o libmpi_a_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \ -mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpih-cmp.o \ -mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o +mpi-inv.o mpi-mul.o mpi-pow.o mpi-mpow.o mpi-scan.o mpicoder.o \ +mpih-cmp.o mpih-add.o mpih-sub.o mpih-div.o mpih-mul.o mpiutil.o AR = ar CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) LINK = $(CC) $(CFLAGS) $(LDFLAGS) -o $@ -DIST_COMMON = Makefile.am Makefile.in +DIST_COMMON = ChangeLog Makefile.am Makefile.in DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) @@ -152,10 +153,10 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) TAR = tar GZIP = --best DEP_FILES = .deps/mpi-add.P .deps/mpi-bit.P .deps/mpi-cmp.P \ -.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mul.P \ -.deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P .deps/mpih-add.P \ -.deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P .deps/mpih-sub.P \ -.deps/mpiutil.P +.deps/mpi-div.P .deps/mpi-gcd.P .deps/mpi-inv.P .deps/mpi-mpow.P \ +.deps/mpi-mul.P .deps/mpi-pow.P .deps/mpi-scan.P .deps/mpicoder.P \ +.deps/mpih-add.P .deps/mpih-cmp.P .deps/mpih-div.P .deps/mpih-mul.P \ +.deps/mpih-sub.P .deps/mpiutil.P SOURCES = $(libmpi_a_SOURCES) OBJECTS = $(libmpi_a_OBJECTS) diff --git a/mpi/config.links b/mpi/config.links index 923e18b0..cf370e05 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -10,7 +10,7 @@ test -d ./mpi || mkdir ./mpi echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h case "${target}" in - i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*) + i[34]86*-*-linuxaout* | i[34]86*-*-linuxoldld* | i[34]86*-*-*bsd*) echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" @@ -20,14 +20,14 @@ case "${target}" in cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i586 i386" ;; - i[3456]86*-*-*) + i[34]86*-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i386" ;; i[56]86*-*-* | pentium-*-* | pentiumpro-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h - cat $srcdir/mpi/i586/syntax.h >>./mpi/asm-syntax.h + cat $srcdir/mpi/i386/syntax.h >>./mpi/asm-syntax.h path="i586 i386" ;; alpha*-*-*) diff --git a/mpi/i586/README b/mpi/i586/README new file mode 100644 index 00000000..d73b0826 --- /dev/null +++ b/mpi/i586/README @@ -0,0 +1,26 @@ +This directory contains mpn functions optimized for Intel Pentium +processors. + +RELEVANT OPTIMIZATION ISSUES + +1. Pentium doesn't allocate cache lines on writes, unlike most other modern +processors. Since the functions in the mpn class do array writes, we have to +handle allocating the destination cache lines by reading a word from it in the +loops, to achieve the best performance. + +2. Pairing of memory operations requires that the two issued operations refer +to different cache banks. The simplest way to insure this is to read/write +two words from the same object. If we make operations on different objects, +they might or might not be to the same cache bank. + +STATUS + +1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium +documentation indicates that they should take only 43/8 = 5.375 cycles/limb, +or 5 cycles/limb asymptotically. + +2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb. Due to loop +overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb. + +3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they +should... diff --git a/mpi/i586/distfiles b/mpi/i586/distfiles new file mode 100644 index 00000000..951480fd --- /dev/null +++ b/mpi/i586/distfiles @@ -0,0 +1,8 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-shift.S +mpih-sub1.S +README + diff --git a/mpi/i586/mpih-add1.S b/mpi/i586/mpih-add1.S new file mode 100644 index 00000000..e9883285 --- /dev/null +++ b/mpi/i586/mpih-add1.S @@ -0,0 +1,134 @@ +/* i80586 add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994, 1995, 1996 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_add_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_add_n) +C_SYMBOL_NAME(mpihelp_add_n:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + adcl %ebx,%eax + movl 4(%ebp),%ebx + adcl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + adcl %ebx,%eax + movl 12(%ebp),%ebx + adcl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + adcl %ebx,%eax + movl 20(%ebp),%ebx + adcl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %ebx,%eax + movl 28(%ebp),%ebx + adcl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + adcl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + adcl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + diff --git a/mpi/i586/mpih-mul1.S b/mpi/i586/mpih-mul1.S new file mode 100644 index 00000000..c0bedec0 --- /dev/null +++ b/mpi/i586/mpih-mul1.S @@ -0,0 +1,89 @@ +/* i80586 mul_1 -- Multiply a limb vector with a limb and store + * the result in a second limb vector. + * Copyright (C) 1992, 1994, 1996 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_mul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_mul_1) +C_SYMBOL_NAME(mpihelp_mul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-mul2.S b/mpi/i586/mpih-mul2.S new file mode 100644 index 00000000..6b564623 --- /dev/null +++ b/mpi/i586/mpih-mul2.S @@ -0,0 +1,94 @@ +/* i80586 addmul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_addmul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_addmul_1) +C_SYMBOL_NAME(mpihelp_addmul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(add,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-mul3.S b/mpi/i586/mpih-mul3.S new file mode 100644 index 00000000..69b7f467 --- /dev/null +++ b/mpi/i586/mpih-mul3.S @@ -0,0 +1,94 @@ +/* i80586 submul_1 -- Multiply a limb vector with a limb and add + * the result to a second limb vector. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_submul_1( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_size_t s1_size, (sp + 12) + * mpi_limb_t s2_limb) (sp + 16) + */ + +#define res_ptr edi +#define s1_ptr esi +#define size ecx +#define s2_limb ebp + + TEXT + ALIGN (3) + GLOBL C_SYMBOL_NAME(mpihelp_submul_1) +C_SYMBOL_NAME(mpihelp_submul_1:) + + INSN1(push,l ,R(edi)) + INSN1(push,l ,R(esi)) + INSN1(push,l ,R(ebx)) + INSN1(push,l ,R(ebp)) + + INSN2(mov,l ,R(res_ptr),MEM_DISP(esp,20)) + INSN2(mov,l ,R(s1_ptr),MEM_DISP(esp,24)) + INSN2(mov,l ,R(size),MEM_DISP(esp,28)) + INSN2(mov,l ,R(s2_limb),MEM_DISP(esp,32)) + + INSN2(lea,l ,R(res_ptr),MEM_INDEX(res_ptr,size,4)) + INSN2(lea,l ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4)) + INSN1(neg,l ,R(size)) + INSN2(xor,l ,R(ebx),R(ebx)) + ALIGN (3) + +Loop: INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),MEM_INDEX(s1_ptr,size,4)) + + INSN1(mul,l ,R(s2_limb)) + + INSN2(add,l ,R(eax),R(ebx)) + INSN2(mov,l ,R(ebx),MEM_INDEX(res_ptr,size,4)) + + INSN2(adc,l ,R(edx),$0) + INSN2(sub,l ,R(ebx),R(eax)) + + INSN2(mov,l ,MEM_INDEX(res_ptr,size,4),R(ebx)) + INSN1(inc,l ,R(size)) + + INSN2(mov,l ,R(ebx),R(edx)) + INSN1(jnz, ,Loop) + + INSN2(adc,l ,R(ebx),$0) + INSN2(mov,l ,R(eax),R(ebx)) + INSN1(pop,l ,R(ebp)) + INSN1(pop,l ,R(ebx)) + INSN1(pop,l ,R(esi)) + INSN1(pop,l ,R(edi)) + ret + diff --git a/mpi/i586/mpih-shift.S b/mpi/i586/mpih-shift.S new file mode 100644 index 00000000..9f156381 --- /dev/null +++ b/mpi/i586/mpih-shift.S @@ -0,0 +1,426 @@ +/* i80586 rshift, lshift + * Copyright (c) 1997 by Werner Koch (dd9jn) + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_lshift) +C_SYMBOL_NAME(mpihelp_lshift:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Lnormal + leal 4(%esi),%eax + cmpl %edi,%eax + jnc Lspecial /* jump if s_ptr + 1 >= res_ptr */ + leal (%esi,%ebp,4),%eax + cmpl %eax,%edi + jnc Lspecial /* jump if res_ptr >= s_ptr + size */ + +Lnormal: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + xorl %eax,%eax + shldl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Lend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Loop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + shldl %cl,%ebx,%edx + shldl %cl,%eax,%ebx + movl %edx,-8(%edi) + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + shldl %cl,%edx,%eax + shldl %cl,%ebx,%edx + movl %eax,-16(%edi) + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + shldl %cl,%eax,%ebx + shldl %cl,%edx,%eax + movl %ebx,-24(%edi) + movl %eax,-28(%edi) + + subl $32,%esi + subl $32,%edi + decl %ebp + jnz Loop + +Lend: popl %ebp + andl $7,%ebp + jz Lend2 +Loop2: movl (%esi),%eax + shldl %cl,%eax,%edx + movl %edx,(%edi) + movl %eax,%edx + subl $4,%esi + subl $4,%edi + decl %ebp + jnz Loop2 + +Lend2: shll %cl,%edx /* compute least significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Lspecial: + movl (%esi),%edx + addl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + addl %edx,%edx + incl %ebp + decl %ebp + jz LLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +LLoop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + adcl %eax,%eax + movl %ebx,(%edi) + adcl %edx,%edx + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + adcl %ebx,%ebx + movl %edx,8(%edi) + adcl %eax,%eax + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + adcl %edx,%edx + movl %eax,16(%edi) + adcl %ebx,%ebx + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + adcl %eax,%eax + movl %ebx,24(%edi) + adcl %edx,%edx + movl %eax,28(%edi) + + leal 32(%esi),%esi /* use leal not to clobber carry */ + leal 32(%edi),%edi + decl %ebp + jnz LLoop + +LLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz LLend2 + addl %eax,%eax /* restore carry from eax */ +LLoop2: movl %edx,%ebx + movl (%esi),%edx + adcl %edx,%edx + movl %ebx,(%edi) + + leal 4(%esi),%esi /* use leal not to clobber carry */ + leal 4(%edi),%edi + decl %ebp + jnz LLoop2 + + jmp LL1 +LLend2: addl %eax,%eax /* restore carry from eax */ +LL1: movl %edx,(%edi) /* store last limb */ + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + + + + +/******************* + * mpi_limb_t + * mpihelp_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_rshift) +C_SYMBOL_NAME(mpihelp_rshift:) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s_ptr */ + movl 28(%esp),%ebp /* size */ + movl 32(%esp),%ecx /* cnt */ + +/* We can use faster code for shift-by-1 under certain conditions. */ + cmp $1,%ecx + jne Rnormal + leal 4(%edi),%eax + cmpl %esi,%eax + jnc Rspecial /* jump if res_ptr + 1 >= s_ptr */ + leal (%edi,%ebp,4),%eax + cmpl %eax,%esi + jnc Rspecial /* jump if s_ptr >= res_ptr + size */ + +Rnormal: + movl (%esi),%edx + addl $4,%esi + xorl %eax,%eax + shrdl %cl,%edx,%eax /* compute carry limb */ + pushl %eax /* push carry limb onto stack */ + + decl %ebp + pushl %ebp + shrl $3,%ebp + jz Rend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +Roop: movl 28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl 4(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,(%edi) + movl %eax,4(%edi) + + movl 8(%esi),%ebx + movl 12(%esi),%eax + shrdl %cl,%ebx,%edx + shrdl %cl,%eax,%ebx + movl %edx,8(%edi) + movl %ebx,12(%edi) + + movl 16(%esi),%edx + movl 20(%esi),%ebx + shrdl %cl,%edx,%eax + shrdl %cl,%ebx,%edx + movl %eax,16(%edi) + movl %edx,20(%edi) + + movl 24(%esi),%eax + movl 28(%esi),%edx + shrdl %cl,%eax,%ebx + shrdl %cl,%edx,%eax + movl %ebx,24(%edi) + movl %eax,28(%edi) + + addl $32,%esi + addl $32,%edi + decl %ebp + jnz Roop + +Rend: popl %ebp + andl $7,%ebp + jz Rend2 +Roop2: movl (%esi),%eax + shrdl %cl,%eax,%edx /* compute result limb */ + movl %edx,(%edi) + movl %eax,%edx + addl $4,%esi + addl $4,%edi + decl %ebp + jnz Roop2 + +Rend2: shrl %cl,%edx /* compute most significant limb */ + movl %edx,(%edi) /* store it */ + + popl %eax /* pop carry limb */ + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + +/* We loop from least significant end of the arrays, which is only + permissable if the source and destination don't overlap, since the + function is documented to work for overlapping source and destination. +*/ + +Rspecial: + leal -4(%edi,%ebp,4),%edi + leal -4(%esi,%ebp,4),%esi + + movl (%esi),%edx + subl $4,%esi + + decl %ebp + pushl %ebp + shrl $3,%ebp + + shrl $1,%edx + incl %ebp + decl %ebp + jz RLend + + movl (%edi),%eax /* fetch destination cache line */ + + ALIGN (2) +RLoop: movl -28(%edi),%eax /* fetch destination cache line */ + movl %edx,%ebx + + movl (%esi),%eax + movl -4(%esi),%edx + rcrl $1,%eax + movl %ebx,(%edi) + rcrl $1,%edx + movl %eax,-4(%edi) + + movl -8(%esi),%ebx + movl -12(%esi),%eax + rcrl $1,%ebx + movl %edx,-8(%edi) + rcrl $1,%eax + movl %ebx,-12(%edi) + + movl -16(%esi),%edx + movl -20(%esi),%ebx + rcrl $1,%edx + movl %eax,-16(%edi) + rcrl $1,%ebx + movl %edx,-20(%edi) + + movl -24(%esi),%eax + movl -28(%esi),%edx + rcrl $1,%eax + movl %ebx,-24(%edi) + rcrl $1,%edx + movl %eax,-28(%edi) + + leal -32(%esi),%esi /* use leal not to clobber carry */ + leal -32(%edi),%edi + decl %ebp + jnz RLoop + +RLend: popl %ebp + sbbl %eax,%eax /* save carry in %eax */ + andl $7,%ebp + jz RLend2 + addl %eax,%eax /* restore carry from eax */ +RLoop2: movl %edx,%ebx + movl (%esi),%edx + rcrl $1,%edx + movl %ebx,(%edi) + + leal -4(%esi),%esi /* use leal not to clobber carry */ + leal -4(%edi),%edi + decl %ebp + jnz RLoop2 + + jmp RL1 +RLend2: addl %eax,%eax /* restore carry from eax */ +RL1: movl %edx,(%edi) /* store last limb */ + + movl $0,%eax + rcrl $1,%eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/mpi/i586/mpih-sub1.S b/mpi/i586/mpih-sub1.S new file mode 100644 index 00000000..1f5c0bfd --- /dev/null +++ b/mpi/i586/mpih-sub1.S @@ -0,0 +1,143 @@ +/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * mpihelp_sub_n( mpi_ptr_t res_ptr, (sp + 4) + * mpi_ptr_t s1_ptr, (sp + 8) + * mpi_ptr_t s2_ptr, (sp + 12) + * mpi_size_t size) (sp + 16) + */ + + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(mpihelp_sub_n) +C_SYMBOL_NAME(mpihelp_sub_n:) + + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%edi /* res_ptr */ + movl 24(%esp),%esi /* s1_ptr */ + movl 28(%esp),%ebp /* s2_ptr */ + movl 32(%esp),%ecx /* size */ + + movl (%ebp),%ebx + + decl %ecx + movl %ecx,%edx + shrl $3,%ecx + andl $7,%edx + testl %ecx,%ecx /* zero carry flag */ + jz Lend + pushl %edx + + ALIGN (3) +Loop: movl 28(%edi),%eax /* fetch destination cache line */ + leal 32(%edi),%edi + +L1: movl (%esi),%eax + movl 4(%esi),%edx + sbbl %ebx,%eax + movl 4(%ebp),%ebx + sbbl %ebx,%edx + movl 8(%ebp),%ebx + movl %eax,-32(%edi) + movl %edx,-28(%edi) + +L2: movl 8(%esi),%eax + movl 12(%esi),%edx + sbbl %ebx,%eax + movl 12(%ebp),%ebx + sbbl %ebx,%edx + movl 16(%ebp),%ebx + movl %eax,-24(%edi) + movl %edx,-20(%edi) + +L3: movl 16(%esi),%eax + movl 20(%esi),%edx + sbbl %ebx,%eax + movl 20(%ebp),%ebx + sbbl %ebx,%edx + movl 24(%ebp),%ebx + movl %eax,-16(%edi) + movl %edx,-12(%edi) + +L4: movl 24(%esi),%eax + movl 28(%esi),%edx + sbbl %ebx,%eax + movl 28(%ebp),%ebx + sbbl %ebx,%edx + movl 32(%ebp),%ebx + movl %eax,-8(%edi) + movl %edx,-4(%edi) + + leal 32(%esi),%esi + leal 32(%ebp),%ebp + decl %ecx + jnz Loop + + popl %edx +Lend: + decl %edx /* test %edx w/o clobbering carry */ + js Lend2 + incl %edx +Loop2: + leal 4(%edi),%edi + movl (%esi),%eax + sbbl %ebx,%eax + movl 4(%ebp),%ebx + movl %eax,-4(%edi) + leal 4(%esi),%esi + leal 4(%ebp),%ebp + decl %edx + jnz Loop2 +Lend2: + movl (%esi),%eax + sbbl %ebx,%eax + movl %eax,(%edi) + + sbbl %eax,%eax + negl %eax + + popl %ebp + popl %ebx + popl %esi + popl %edi + ret + diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c index 28cde00b..53ef356b 100644 --- a/mpi/mpi-inv.c +++ b/mpi/mpi-inv.c @@ -76,7 +76,7 @@ mpi_invm( MPI x, MPI a, MPI n ) mpi_free(t3); mpi_free(u); mpi_free(v); - #else + #elif 0 /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X) * modified according to Michael Penk's solution for Exercice 35 */ @@ -156,6 +156,107 @@ mpi_invm( MPI x, MPI a, MPI n ) mpi_free(t1); mpi_free(t2); mpi_free(t3); + #else + /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X) + * modified according to Michael Penk's solution for Exercice 35 + * with further enhancement */ + MPI u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3; + unsigned k; + int sign; + int odd ; + + u = mpi_copy(a); + v = mpi_copy(n); + for(k=0; !mpi_test_bit(u,0) && !mpi_test_bit(v,0); k++ ) { + mpi_rshift(u, u, 1); + mpi_rshift(v, v, 1); + } + odd = mpi_test_bit(v,0); + + u1 = mpi_alloc_set_ui(1); + if( !odd ) + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_copy(v); + if( !odd ) { + v2 = mpi_alloc( mpi_get_nlimbs(u) ); + mpi_sub( v2, u1, u ); /* U is used as const 1 */ + } + v3 = mpi_copy(v); + if( mpi_test_bit(u, 0) ) { /* u is odd */ + t1 = mpi_alloc_set_ui(0); + if( !odd ) { + t2 = mpi_alloc_set_ui(1); t2->sign = 1; + } + t3 = mpi_copy(v); t3->sign = !t3->sign; + goto Y4; + } + else { + t1 = mpi_alloc_set_ui(1); + if( !odd ) + t2 = mpi_alloc_set_ui(0); + t3 = mpi_copy(u); + } + do { + do { + if( !odd ) { + if( mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0) ) { /* one is odd */ + mpi_add(t1, t1, v); + mpi_sub(t2, t2, u); + } + mpi_rshift(t1, t1, 1); + mpi_rshift(t2, t2, 1); + mpi_rshift(t3, t3, 1); + } + else { + if( mpi_test_bit(t1, 0) ) + mpi_add(t1, t1, v); + mpi_rshift(t1, t1, 1); + mpi_rshift(t3, t3, 1); + } + Y4: + } while( !mpi_test_bit( t3, 0 ) ); /* while t3 is even */ + + if( !t3->sign ) { + mpi_set(u1, t1); + if( !odd ) + mpi_set(u2, t2); + mpi_set(u3, t3); + } + else { + mpi_sub(v1, v, t1); + sign = u->sign; u->sign = !u->sign; + if( !odd ) + mpi_sub(v2, u, t2); + u->sign = sign; + sign = t3->sign; t3->sign = !t3->sign; + mpi_set(v3, t3); + t3->sign = sign; + } + mpi_sub(t1, u1, v1); + if( !odd ) + mpi_sub(t2, u2, v2); + mpi_sub(t3, u3, v3); + if( t1->sign ) { + mpi_add(t1, t1, v); + if( !odd ) + mpi_sub(t2, t2, u); + } + } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */ + /* mpi_lshift( u3, k ); */ + mpi_set(x, u1); + + mpi_free(u1); + mpi_free(v1); + mpi_free(t1); + if( !odd ) { + mpi_free(u2); + mpi_free(v2); + mpi_free(t2); + } + mpi_free(u3); + mpi_free(v3); + mpi_free(t3); #endif } diff --git a/mpi/mpi-mpow.c b/mpi/mpi-mpow.c new file mode 100644 index 00000000..5ac3c639 --- /dev/null +++ b/mpi/mpi-mpow.c @@ -0,0 +1,119 @@ +/* mpi-mpow.c - MPI functions + * Copyright (c) 1998 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" +#include <assert.h> + +static int +build_index( MPI *exparray, int k, int i, int t ) +{ + int j, bitno; + int index = 0; + + bitno = t-i; + for(j=k-1; j >= 0; j-- ) { + index <<= 1; + if( mpi_test_bit( exparray[j], bitno ) ) + index |= 1; + } + /*log_debug("t=%d i=%d index=%d\n", t, i, index );*/ + return index; +} + +/**************** + * RES = (BASE[0] ^ EXP[0]) * (BASE[1] ^ EXP[1]) * ... * mod M + */ +void +mpi_mulpowm( MPI res, MPI *basearray, MPI *exparray, MPI m) +{ + int k; /* number of elements */ + int t; /* bit size of largest exponent */ + int i, j, idx; + MPI *G; /* table with precomputed values of size 2^k */ + MPI tmp; + + for(k=0; basearray[k]; k++ ) + ; + assert(k); + for(t=0, i=0; (tmp=exparray[i]); i++ ) { + /*log_mpidump("exp: ", tmp );*/ + j = mpi_get_nbits(tmp); + if( j > t ) + t = j; + } + /*log_mpidump("mod: ", m );*/ + assert(i==k); + assert(t); + assert( k < 10 ); + + G = m_alloc_clear( (1<<k) * sizeof *G ); + #if 0 + /* do the precomputation */ + G[0] = mpi_alloc_set_ui( 1 ); + for(i=1; i < (1<<k); i++ ) { + for(j=0; j < k; j++ ) { + if( (i & (1<<j) ) ) { + if( !G[i] ) + G[i] = mpi_copy( basearray[j] ); + else + mpi_mulm( G[i], G[i], basearray[j], m ); + } + } + if( !G[i] ) + G[i] = mpi_alloc(0); + } + #endif + /* and calculate */ + tmp = mpi_alloc( mpi_get_nlimbs(m)+1 ); + mpi_set_ui( res, 1 ); + for(i = 1; i <= t; i++ ) { + mpi_mulm(tmp, res, res, m ); + idx = build_index( exparray, k, i, t ); + assert( idx >= 0 && idx < (1<<k) ); + if( !G[idx] ) { + if( !idx ) + G[0] = mpi_alloc_set_ui( 1 ); + else { + for(j=0; j < k; j++ ) { + if( (idx & (1<<j) ) ) { + if( !G[idx] ) + G[idx] = mpi_copy( basearray[j] ); + else + mpi_mulm( G[idx], G[idx], basearray[j], m ); + } + } + if( !G[idx] ) + G[idx] = mpi_alloc(0); + } + } + mpi_mulm(res, tmp, G[idx], m ); + } + + /* cleanup */ + m_free(tmp); + for(i=0; i < (1<<k); i++ ) + mpi_free(G[i]); + m_free(G); +} + |