diff options
author | Werner Koch <wk@gnupg.org> | 1997-11-18 14:05:56 +0000 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 1997-11-18 14:05:56 +0000 |
commit | 4b5e71ca4e84e61e595dec19e1c7cab0c0a73f24 (patch) | |
tree | cfa374507b08344f49f28814c67f058723a485d4 /mpi | |
parent | 539284b719a82a79e3d3bba68de85581ea8f77f1 (diff) | |
download | libgcrypt-4b5e71ca4e84e61e595dec19e1c7cab0c0a73f24.tar.gz |
initially checkin
Diffstat (limited to 'mpi')
-rw-r--r-- | mpi/Makefile.am | 27 | ||||
-rw-r--r-- | mpi/Makefile.in | 271 | ||||
-rw-r--r-- | mpi/longlong.h | 1398 | ||||
-rw-r--r-- | mpi/mpi-add.c | 221 | ||||
-rw-r--r-- | mpi/mpi-bit.c | 133 | ||||
-rw-r--r-- | mpi/mpi-cmp.c | 72 | ||||
-rw-r--r-- | mpi/mpi-div.c | 282 | ||||
-rw-r--r-- | mpi/mpi-gcd.c | 54 | ||||
-rw-r--r-- | mpi/mpi-internal.h | 198 | ||||
-rw-r--r-- | mpi/mpi-inv.c | 127 | ||||
-rw-r--r-- | mpi/mpi-mul.c | 178 | ||||
-rw-r--r-- | mpi/mpi-pow.c | 247 | ||||
-rw-r--r-- | mpi/mpi-scan.c | 88 | ||||
-rw-r--r-- | mpi/mpicoder.c | 392 | ||||
-rw-r--r-- | mpi/mpih-add.c | 109 | ||||
-rw-r--r-- | mpi/mpih-cmp.c | 53 | ||||
-rw-r--r-- | mpi/mpih-div.c | 528 | ||||
-rw-r--r-- | mpi/mpih-mul.c | 557 | ||||
-rw-r--r-- | mpi/mpih-shift.c | 94 | ||||
-rw-r--r-- | mpi/mpih-sub.c | 106 | ||||
-rw-r--r-- | mpi/mpiutil.c | 326 |
21 files changed, 5461 insertions, 0 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am new file mode 100644 index 00000000..5edd90c2 --- /dev/null +++ b/mpi/Makefile.am @@ -0,0 +1,27 @@ +## Process this file with automake to produce Makefile.in + +INCLUDES = -I$(top_srcdir)/include + +noinst_LIBRARIES = mpi + + +mpi_SOURCES = longlong.h \ + mpi-add.c \ + mpi-bit.c \ + mpi-cmp.c \ + mpi-div.c \ + mpi-gcd.c \ + mpi-internal.h \ + mpi-inv.c \ + mpi-mul.c \ + mpi-pow.c \ + mpi-scan.c \ + mpicoder.c \ + mpihelp-add.c \ + mpihelp-cmp.c \ + mpihelp-div.c \ + mpihelp-mul.c \ + mpihelp-shift.c \ + mpihelp-sub.c \ + mpiutil.c + diff --git a/mpi/Makefile.in b/mpi/Makefile.in new file mode 100644 index 00000000..4f493a88 --- /dev/null +++ b/mpi/Makefile.in @@ -0,0 +1,271 @@ +# Makefile.in generated automatically by automake 1.0 from Makefile.am + +# Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc. +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. + + +SHELL = /bin/sh + +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ +VPATH = @srcdir@ +prefix = @prefix@ +exec_prefix = @exec_prefix@ + +bindir = @bindir@ +sbindir = @sbindir@ +libexecdir = @libexecdir@ +datadir = @datadir@ +sysconfdir = @sysconfdir@ +sharedstatedir = @sharedstatedir@ +localstatedir = @localstatedir@ +libdir = @libdir@ +infodir = @infodir@ +mandir = @mandir@ +includedir = @includedir@ +oldincludedir = /usr/include + +pkgdatadir = $(datadir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ + +top_builddir = .. + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +transform = @program_transform_name@ + +INCLUDES = -I$(top_srcdir)/include + +noinst_LIBRARIES = mpi + +mpi_SOURCES = longlong.h \ + mpi-add.c \ + mpi-bit.c \ + mpi-cmp.c \ + mpi-div.c \ + mpi-gcd.c \ + mpi-internal.h \ + mpi-inv.c \ + mpi-mul.c \ + mpi-pow.c \ + mpi-scan.c \ + mpicoder.c \ + mpihelp-add.c \ + mpihelp-cmp.c \ + mpihelp-div.c \ + mpihelp-mul.c \ + mpihelp-shift.c \ + mpihelp-sub.c \ + mpiutil.c +mkinstalldirs = $(top_srcdir)/scripts/mkinstalldirs +CONFIG_HEADER = ../config.h +LIBRARIES = $(noinst_LIBRARIES) + +noinst_LIBFILES = libmpi.a + +CC = @CC@ +LEX = @LEX@ +YACC = @YACC@ + +DEFS = @DEFS@ -I. -I$(srcdir) -I.. +CPPFLAGS = @CPPFLAGS@ +CFLAGS = @CFLAGS@ +LDFLAGS = @LDFLAGS@ +LIBS = @LIBS@ + +COMPILE = $(CC) -c $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) +LINK = $(CC) $(LDFLAGS) -o $@ +mpi_LIBADD = +mpi_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \ +mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpihelp-add.o \ +mpihelp-cmp.o mpihelp-div.o mpihelp-mul.o mpihelp-shift.o mpihelp-sub.o \ +mpiutil.o +EXTRA_mpi_SOURCES = +LIBFILES = libmpi.a +AR = ar +RANLIB = @RANLIB@ +DIST_COMMON = Makefile.am Makefile.in + + +PACKAGE = @PACKAGE@ +VERSION = @VERSION@ + +DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \ + $(TEXINFOS) $(INFOS) $(MANS) $(EXTRA_DIST) $(DATA) +DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \ + $(TEXINFOS) $(INFO_DEPS) $(MANS) $(EXTRA_DIST) $(DATA) + +TAR = tar +DEP_FILES = $(srcdir)/.deps/mpi-add.P $(srcdir)/.deps/mpi-bit.P \ +$(srcdir)/.deps/mpi-cmp.P $(srcdir)/.deps/mpi-div.P \ +$(srcdir)/.deps/mpi-gcd.P $(srcdir)/.deps/mpi-inv.P \ +$(srcdir)/.deps/mpi-mul.P $(srcdir)/.deps/mpi-pow.P \ +$(srcdir)/.deps/mpi-scan.P $(srcdir)/.deps/mpicoder.P \ +$(srcdir)/.deps/mpihelp-add.P $(srcdir)/.deps/mpihelp-cmp.P \ +$(srcdir)/.deps/mpihelp-div.P $(srcdir)/.deps/mpihelp-mul.P \ +$(srcdir)/.deps/mpihelp-shift.P $(srcdir)/.deps/mpihelp-sub.P \ +$(srcdir)/.deps/mpiutil.P +SOURCES = $(mpi_SOURCES) +OBJECTS = $(mpi_OBJECTS) + +default: all + + +$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in + cd $(top_srcdir) && automake $(subdir)/Makefile + +Makefile: $(top_builddir)/config.status Makefile.in + cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status + +mostlyclean-noinstLIBRARIES: + +clean-noinstLIBRARIES: + rm -f $(noinst_LIBFILES) + +distclean-noinstLIBRARIES: + +maintainer-clean-noinstLIBRARIES: + +.c.o: + $(COMPILE) $< + +mostlyclean-compile: + rm -f *.o core + +clean-compile: + +distclean-compile: + rm -f *.tab.c + +maintainer-clean-compile: +$(mpi_OBJECTS): ../config.h + +libmpi.a: $(mpi_OBJECTS) $(mpi_LIBADD) + rm -f libmpi.a + $(AR) cru libmpi.a $(mpi_OBJECTS) $(mpi_LIBADD) + $(RANLIB) libmpi.a + +ID: $(HEADERS) $(SOURCES) + here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS) + +tags: TAGS + +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) + here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS + +mostlyclean-tags: + +clean-tags: + +distclean-tags: + rm -f TAGS ID + +maintainer-clean-tags: + +subdir = mpi +distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) +distdir: $(DEP_DISTFILES) + @for file in `cd $(srcdir) && echo $(DISTFILES)`; do \ + test -f $(distdir)/$$file \ + || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \ + || cp -p $(srcdir)/$$file $(distdir)/$$file; \ + done + +# This fragment is probably only useful for maintainers. It relies on +# GNU make and gcc. It is only included in the generated Makefile.in +# if `automake' is not passed the `--include-deps' flag. + +MKDEP = gcc -MM $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS) + +-include $(srcdir)/.deps/.P +$(srcdir)/.deps/.P: $(BUILT_SOURCES) + cd $(srcdir) && test -d .deps || mkdir .deps + echo > $@ + +-include $(DEP_FILES) +$(DEP_FILES): $(srcdir)/.deps/.P + +$(srcdir)/.deps/%.P: $(srcdir)/%.c + @echo "mkdeps $< > $@" + @re=`echo 's,^$(srcdir)//*,,g;s, $(srcdir)//*, ,g' | sed 's,\.,\\\\.,g'`; \ + $(MKDEP) $< | sed "$$re" > $@-tmp + @if test -n "$o"; then \ + sed 's/\.o:/$$o:/' $@-tmp > $@; \ + rm $@-tmp; \ + else \ + mv $@-tmp $@; \ + fi + +# End of maintainer-only section +info: + +dvi: + +check: all + +installcheck: + +install-exec: + +install-data: + +install: install-exec install-data all + @: + +uninstall: + +all: $(LIBFILES) Makefile + +install-strip: + $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install +installdirs: + + +mostlyclean-generic: + test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES) + +clean-generic: + test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + rm -f Makefile $(DISTCLEANFILES) + rm -f config.cache config.log $(CONFIG_HEADER) stamp-h + +maintainer-clean-generic: + test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) + test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) +mostlyclean: mostlyclean-noinstLIBRARIES mostlyclean-compile \ + mostlyclean-tags mostlyclean-generic + +clean: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \ + mostlyclean + +distclean: distclean-noinstLIBRARIES distclean-compile distclean-tags \ + distclean-generic clean + rm -f config.status + +maintainer-clean: maintainer-clean-noinstLIBRARIES \ + maintainer-clean-compile maintainer-clean-tags \ + maintainer-clean-generic distclean + @echo "This command is intended for maintainers to use;" + @echo "it deletes files that may require special tools to rebuild." + +.PHONY: default mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ +clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ +mostlyclean-compile distclean-compile clean-compile \ +maintainer-clean-compile tags mostlyclean-tags distclean-tags \ +clean-tags maintainer-clean-tags distdir info dvi check installcheck \ +install-exec install-data install uninstall all installdirs \ +mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-generic clean mostlyclean distclean maintainer-clean + +.SUFFIXES: +.SUFFIXES: .c .o + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/mpi/longlong.h b/mpi/longlong.h new file mode 100644 index 00000000..006f69ed --- /dev/null +++ b/mpi/longlong.h @@ -0,0 +1,1398 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + +Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify +it under the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public +License for more details. + +You should have received a copy of the GNU Library General Public License +along with this file; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, +MA 02111-1307, USA. */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype. + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + SItype, USItype -- Signed and unsigned 32 bit types. + DItype, UDItype -- Signed and unsigned 64 bit types. + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a + UDWtype product. This is just a variant of umul_ppmm. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first non-zero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! */ + +#if defined (__GNUC__) && !defined (NO_ASM) + +/* We sometimes need to clobber "cc" with gcc2, but that would not be + understood by gcc1. Use cpp to avoid major code duplication. */ +#if __GNUC__ < 2 +#define __CLOBBER_CC +#define __AND_CLOBBER_CC +#else /* __GNUC__ >= 2 */ +#define __CLOBBER_CC : "cc" +#define __AND_CLOBBER_CC , "cc" +#endif /* __GNUC__ < 2 */ + +#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %1,%4,%5 + addc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %1,%4,%5 + subc %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("multiplu %0,%1,%2" \ + : "=r" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + __asm__ ("multmu %0,%1,%2" \ + : "=r" ((USItype)(xh)) \ + : "r" (__m0), \ + "r" (__m1)); \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("dividu %0,%3,%4" \ + : "=r" ((USItype)(q)), \ + "=q" ((USItype)(r)) \ + : "1" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* __a29k__ */ + +#if defined (__alpha) && W_TYPE_SIZE == 64 +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("umulh %r1,%2,%0" \ + : "=r" ((UDItype) ph) \ + : "%rJ" (__m0), \ + "rI" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 46 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UDItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UDItype __udiv_qrnnd (); +#define UDIV_TIME 220 +#endif /* LONGLONG_STANDALONE */ +#endif /* __alpha */ + +#if defined (__arm__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("adds %1, %4, %5 + adc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subs %1, %4, %5 + sbc %0, %2, %3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "r" ((USItype)(al)), \ + "rI" ((USItype)(bl))) +#define umul_ppmm(xh, xl, a, b) \ + __asm__ ("%@ Inlined umul_ppmm + mov %|r0, %2, lsr #16 + mov %|r2, %3, lsr #16 + bic %|r1, %2, %|r0, lsl #16 + bic %|r2, %3, %|r2, lsl #16 + mul %1, %|r1, %|r2 + mul %|r2, %|r0, %|r2 + mul %|r1, %0, %|r1 + mul %0, %|r0, %0 + adds %|r1, %|r2, %|r1 + addcs %0, %0, #65536 + adds %1, %1, %|r1, lsl #16 + adc %0, %0, %|r1, lsr #16" \ + : "=&r" ((USItype)(xh)), \ + "=r" ((USItype)(xl)) \ + : "r" ((USItype)(a)), \ + "r" ((USItype)(b)) \ + : "r0", "r1", "r2") +#define UMUL_TIME 20 +#define UDIV_TIME 100 +#endif /* __arm__ */ + +#if defined (__clipper__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#define smul_ppmm(w1, w0, u, v) \ + ({union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("mulwx %2,%0" \ + : "=r" (__xx.__ll) \ + : "%0" ((SItype)(u)), \ + "r" ((SItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("mulwux %2,%0" \ + : "=r" (__w) \ + : "%0" ((USItype)(u)), \ + "r" ((USItype)(v))); \ + __w; }) +#endif /* __clipper__ */ + +#if defined (__gmicro__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add.w %5,%1 + addx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub.w %5,%1 + subx %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + __asm__ ("mulx %3,%0,%1" \ + : "=g" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%0" ((USItype)(m0)), \ + "g" ((USItype)(m1))) +#define udiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("divx %4,%0,%1" \ + : "=g" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "1" ((USItype)(nh)), \ + "0" ((USItype)(nl)), \ + "g" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bsch/1 %1,%0" \ + : "=g" (count) \ + : "g" ((USItype)(x)), \ + "0" ((USItype)0)) +#endif + +#if defined (__hppa) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %4,%5,%1 + addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %4,%5,%1 + subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ + "rM" ((USItype)(bl))) +#if defined (_PA_RISC1_1) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("xmpyu %1,%2,%0" \ + : "=*f" (__xx.__ll) \ + : "*f" ((USItype)(u)), \ + "*f" ((USItype)(v))); \ + (wh) = __xx.__i.__h; \ + (wl) = __xx.__i.__l; \ + } while (0) +#define UMUL_TIME 8 +#define UDIV_TIME 60 +#else +#define UMUL_TIME 40 +#define UDIV_TIME 80 +#endif +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __udiv_qrnnd (); +#endif /* LONGLONG_STANDALONE */ +#define count_leading_zeros(count, x) \ + do { \ + USItype __tmp; \ + __asm__ ( \ + "ldi 1,%0 + extru,= %1,15,16,%%r0 ; Bits 31..16 zero? + extru,tr %1,15,16,%1 ; No. Shift down, skip add. + ldo 16(%0),%0 ; Yes. Perform add. + extru,= %1,23,8,%%r0 ; Bits 15..8 zero? + extru,tr %1,23,8,%1 ; No. Shift down, skip add. + ldo 8(%0),%0 ; Yes. Perform add. + extru,= %1,27,4,%%r0 ; Bits 7..4 zero? + extru,tr %1,27,4,%1 ; No. Shift down, skip add. + ldo 4(%0),%0 ; Yes. Perform add. + extru,= %1,29,2,%%r0 ; Bits 3..2 zero? + extru,tr %1,29,2,%1 ; No. Shift down, skip add. + ldo 2(%0),%0 ; Yes. Perform add. + extru %1,30,1,%1 ; Extract bit 1. + sub %0,%1,%0 ; Subtract it. + " : "=r" (count), "=r" (__tmp) : "1" (x)); \ + } while (0) +#endif /* hppa */ + +#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32 +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "r" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define smul_ppmm(xh, xl, m0, m1) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("mr %0,%3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (m0), \ + "r" (m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("dr %0,%2" \ + : "=r" (__xx.__ll) \ + : "0" (__xx.__ll), "r" (d)); \ + (q) = __xx.__i.__l; (r) = __xx.__i.__h; \ + } while (0) +#endif + +#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl %5,%1 + adcl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl %5,%1 + sbbl %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mull %3" \ + : "=a" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "rm" ((USItype)(v))) +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divl %4" \ + : "=a" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "rm" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("bsrl %1,%0" \ + : "=r" (__cbtmp) : "rm" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define count_trailing_zeros(count, x) \ + __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#ifndef UMUL_TIME +#define UMUL_TIME 40 +#endif +#ifndef UDIV_TIME +#define UDIV_TIME 40 +#endif +#endif /* 80x86 */ + +#if defined (__i860__) && W_TYPE_SIZE == 32 +#define rshift_rhlc(r,h,l,c) \ + __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \ + "=r" (r) : "r" (h), "r" (l), "rn" (c)) +#endif /* i860 */ + +#if defined (__i960__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "%dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "dI" ((USItype)(ah)), \ + "dI" ((USItype)(bh)), \ + "dI" ((USItype)(al)), \ + "dI" ((USItype)(bl))) +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__xx.__ll) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("emul %2,%1,%0" \ + : "=d" (__w) \ + : "%dI" ((USItype)(u)), \ + "dI" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ + __asm__ ("ediv %d,%n,%0" \ + : "=d" (__rq.__ll) \ + : "dI" (__nn.__ll), \ + "dI" ((USItype)(d))); \ + (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ + } while (0) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("scanbit %1,%0" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 (-32) /* sic */ +#if defined (__i960mx) /* what is the proper symbol to test??? */ +#define rshift_rhlc(r,h,l,c) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __nn; \ + __nn.__i.__h = (h); __nn.__i.__l = (l); \ + __asm__ ("shre %2,%1,%0" \ + : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \ + } +#endif /* i960mx */ +#endif /* i960 */ + +#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add%.l %5,%1 + addx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub%.l %5,%1 + subx%.l %3,%0" \ + : "=d" ((USItype)(sh)), \ + "=&d" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "d" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "%0" ((USItype)(u)), \ + "dmi" ((USItype)(v))) +#define UMUL_TIME 45 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divu%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define UDIV_TIME 90 +#define sdiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("divs%.l %4,%1:%0" \ + : "=d" ((USItype)(q)), \ + "=d" ((USItype)(r)) \ + : "0" ((USItype)(n0)), \ + "1" ((USItype)(n1)), \ + "dmi" ((USItype)(d))) +#define count_leading_zeros(count, x) \ + __asm__ ("bfffo %1{%b2:%b2},%0" \ + : "=d" ((USItype)(count)) \ + : "od" ((USItype)(x)), "n" (0)) +#define COUNT_LEADING_ZEROS_0 32 +#else /* not mc68020 */ +#define umul_ppmm(xh, xl, a, b) \ + do { USItype __umul_tmp1, __umul_tmp2; \ + __asm__ ("| Inlined umul_ppmm + move%.l %5,%3 + move%.l %2,%0 + move%.w %3,%1 + swap %3 + swap %0 + mulu %2,%1 + mulu %3,%0 + mulu %2,%3 + swap %2 + mulu %5,%2 + add%.l %3,%2 + jcc 1f + add%.l %#0x10000,%0 +1: move%.l %2,%3 + clr%.w %2 + swap %2 + swap %3 + clr%.w %3 + add%.l %3,%1 + addx%.l %2,%0 + | End inlined umul_ppmm" \ + : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \ + "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \ + : "%2" ((USItype)(a)), "d" ((USItype)(b))); \ + } while (0) +#define UMUL_TIME 100 +#define UDIV_TIME 400 +#endif /* not mc68020 */ +#endif /* mc68000 */ + +#if defined (__m88000__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addu.co %1,%r4,%r5 + addu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subu.co %1,%r4,%r5 + subu.ci %0,%r2,%r3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rJ" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rJ" ((USItype)(bl))) +#define count_leading_zeros(count, x) \ + do { \ + USItype __cbtmp; \ + __asm__ ("ff1 %0,%1" \ + : "=r" (__cbtmp) \ + : "r" ((USItype)(x))); \ + (count) = __cbtmp ^ 31; \ + } while (0) +#define COUNT_LEADING_ZEROS_0 63 /* sic */ +#if defined (__m88110__) +#define umul_ppmm(wh, wl, u, v) \ + do { \ + union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x; \ + __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \ + (wh) = __x.__i.__h; \ + (wl) = __x.__i.__l; \ + } while (0) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __x, __q; \ + __x.__i.__h = (n1); __x.__i.__l = (n0); \ + __asm__ ("divu.d %0,%1,%2" \ + : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ + (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) +#define UMUL_TIME 5 +#define UDIV_TIME 25 +#else +#define UMUL_TIME 17 +#define UDIV_TIME 150 +#endif /* __m88110__ */ +#endif /* __m88000__ */ + +#if defined (__mips__) && W_TYPE_SIZE == 32 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3" \ + : "=l" ((USItype)(w0)), \ + "=h" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("multu %2,%3 + mflo %0 + mfhi %1" \ + : "=d" ((USItype)(w0)), \ + "=d" ((USItype)(w1)) \ + : "d" ((USItype)(u)), \ + "d" ((USItype)(v))) +#endif +#define UMUL_TIME 10 +#define UDIV_TIME 100 +#endif /* __mips__ */ + +#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 +#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3" \ + : "=l" ((UDItype)(w0)), \ + "=h" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#else +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("dmultu %2,%3 + mflo %0 + mfhi %1" \ + : "=d" ((UDItype)(w0)), \ + "=d" ((UDItype)(w1)) \ + : "d" ((UDItype)(u)), \ + "d" ((UDItype)(v))) +#endif +#define UMUL_TIME 20 +#define UDIV_TIME 140 +#endif /* __mips__ */ + +#if defined (__ns32000__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __asm__ ("meid %2,%0" \ + : "=g" (__xx.__ll) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#define __umulsidi3(u, v) \ + ({UDItype __w; \ + __asm__ ("meid %2,%0" \ + : "=g" (__w) \ + : "%0" ((USItype)(u)), \ + "g" ((USItype)(v))); \ + __w; }) +#define udiv_qrnnd(q, r, n1, n0, d) \ + ({union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ + __asm__ ("deid %2,%0" \ + : "=g" (__xx.__ll) \ + : "0" (__xx.__ll), \ + "g" ((USItype)(d))); \ + (r) = __xx.__i.__l; (q) = __xx.__i.__h; }) +#define count_trailing_zeros(count,x) \ + do { + __asm__ ("ffsd %2,%0" \ + : "=r" ((USItype) (count)) \ + : "0" ((USItype) 0), \ + "r" ((USItype) (x))); \ + } while (0) +#endif /* __ns32000__ */ + +#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + else \ + __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%r" ((USItype)(al)), \ + "rI" ((USItype)(bl))); \ + } while (0) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + if (__builtin_constant_p (ah) && (ah) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) == 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ + __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + else \ + __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "r" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "rI" ((USItype)(al)), \ + "r" ((USItype)(bl))); \ + } while (0) +#define count_leading_zeros(count, x) \ + __asm__ ("{cntlz|cntlzw} %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))) +#define COUNT_LEADING_ZEROS_0 32 +#if defined (_ARCH_PPC) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhwu %0,%1,%2" \ + : "=r" ((USItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define UMUL_TIME 15 +#define smul_ppmm(ph, pl, m0, m1) \ + do { \ + SItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mulhw %0,%1,%2" \ + : "=r" ((SItype) ph) \ + : "%r" (__m0), \ + "r" (__m1)); \ + (pl) = __m0 * __m1; \ + } while (0) +#define SMUL_TIME 14 +#define UDIV_TIME 120 +#else +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((USItype)(xh)), \ + "=q" ((USItype)(xl)) \ + : "r" (__m0), \ + "r" (__m1)); \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define UMUL_TIME 8 +#define smul_ppmm(xh, xl, m0, m1) \ + __asm__ ("mul %0,%2,%3" \ + : "=r" ((SItype)(xh)), \ + "=q" ((SItype)(xl)) \ + : "r" (m0), \ + "r" (m1)) +#define SMUL_TIME 4 +#define sdiv_qrnnd(q, r, nh, nl, d) \ + __asm__ ("div %0,%2,%4" \ + : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \ + : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d))) +#define UDIV_TIME 100 +#endif +#endif /* Power architecture variants. */ + +#if defined (__pyr__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addw %5,%1 + addwc %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subw %5,%1 + subwb %3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */ +#define umul_ppmm(w1, w0, u, v) \ + ({union {UDItype __ll; \ + struct {USItype __h, __l;} __i; \ + } __xx; \ + __asm__ ("movw %1,%R0 + uemul %2,%0" \ + : "=&r" (__xx.__ll) \ + : "g" ((USItype) (u)), \ + "g" ((USItype)(v))); \ + (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) +#endif /* __pyr__ */ + +#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("a %1,%5 + ae %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("s %1,%5 + se %0,%3" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "r" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "r" ((USItype)(bl))) +#define umul_ppmm(ph, pl, m0, m1) \ + do { \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ( \ + "s r2,r2 + mts r10,%2 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + m r2,%3 + cas %0,r2,r0 + mfs r10,%1" \ + : "=r" ((USItype)(ph)), \ + "=r" ((USItype)(pl)) \ + : "%r" (__m0), \ + "r" (__m1) \ + : "r2"); \ + (ph) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define UMUL_TIME 20 +#define UDIV_TIME 200 +#define count_leading_zeros(count, x) \ + do { \ + if ((x) >= 0x10000) \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x) >> 16)); \ + else \ + { \ + __asm__ ("clz %0,%1" \ + : "=r" ((USItype)(count)) \ + : "r" ((USItype)(x))); \ + (count) += 16; \ + } \ + } while (0) +#endif /* RT/ROMP */ + +#if defined (__sh2__) && W_TYPE_SIZE == 32 +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ( \ + "dmulu.l %2,%3 + sts macl,%1 + sts mach,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "macl", "mach") +#define UMUL_TIME 5 +#endif + +#if defined (__sparc__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addcc %r4,%5,%1 + addx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "%rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subcc %r4,%5,%1 + subx %r2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rJ" ((USItype)(ah)), \ + "rI" ((USItype)(bh)), \ + "rJ" ((USItype)(al)), \ + "rI" ((USItype)(bl)) \ + __CLOBBER_CC) +#if defined (__sparc_v8__) +/* Don't match immediate range because, 1) it is not often useful, + 2) the 'I' flag thinks of the range as a 13 bit signed interval, + while we want to match a 13 bit interval, sign extended to 32 bits, + but INTERPRETED AS UNSIGNED. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */ +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { \ + USItype __q; \ + __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + : "=r" ((USItype)(__q)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "r" ((USItype)(d))); \ + (r) = (n0) - __q * (d); \ + (q) = __q; \ + } while (0) +#define UDIV_TIME 25 +#endif /* SUPERSPARC */ +#else /* ! __sparc_v8__ */ +#if defined (__sparclite__) +/* This has hardware multiply but not divide. It also has two additional + instructions scan (ffs from high bit) and divscc. */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("umul %2,%3,%1;rd %%y,%0" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "r" ((USItype)(u)), \ + "r" ((USItype)(v))) +#define UMUL_TIME 5 +#define udiv_qrnnd(q, r, n1, n0, d) \ + __asm__ ("! Inlined udiv_qrnnd + wr %%g0,%2,%%y ! Not a delayed write for sparclite + tst %%g0 + divscc %3,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%%g1 + divscc %%g1,%4,%0 + rd %%y,%1 + bl,a 1f + add %1,%4,%1 +1: ! End of inline udiv_qrnnd" \ + : "=r" ((USItype)(q)), \ + "=r" ((USItype)(r)) \ + : "r" ((USItype)(n1)), \ + "r" ((USItype)(n0)), \ + "rI" ((USItype)(d)) \ + : "%g1" __AND_CLOBBER_CC) +#define UDIV_TIME 37 +#define count_leading_zeros(count, x) \ + __asm__ ("scan %1,0,%0" \ + : "=r" ((USItype)(x)) \ + : "r" ((USItype)(count))) +/* Early sparclites return 63 for an argument of 0, but they warn that future + implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 + undefined. */ +#endif /* __sparclite__ */ +#endif /* __sparc_v8__ */ +/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */ +#ifndef umul_ppmm +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("! Inlined umul_ppmm + wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr + sra %3,31,%%g2 ! Don't move this insn + and %2,%%g2,%%g2 ! Don't move this insn + andcc %%g0,0,%%g1 ! Don't move this insn + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,%3,%%g1 + mulscc %%g1,0,%%g1 + add %%g1,%%g2,%0 + rd %%y,%1" \ + : "=r" ((USItype)(w1)), \ + "=r" ((USItype)(w0)) \ + : "%rI" ((USItype)(u)), \ + "r" ((USItype)(v)) \ + : "%g1", "%g2" __AND_CLOBBER_CC) +#define UMUL_TIME 39 /* 39 instructions */ +#endif +#ifndef udiv_qrnnd +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { USItype __r; \ + (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern USItype __udiv_qrnnd (); +#define UDIV_TIME 140 +#endif /* LONGLONG_STANDALONE */ +#endif /* udiv_qrnnd */ +#endif /* __sparc__ */ + +#if defined (__vax__) && W_TYPE_SIZE == 32 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addl2 %5,%1 + adwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "%0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "%1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subl2 %5,%1 + sbwc %3,%0" \ + : "=g" ((USItype)(sh)), \ + "=&g" ((USItype)(sl)) \ + : "0" ((USItype)(ah)), \ + "g" ((USItype)(bh)), \ + "1" ((USItype)(al)), \ + "g" ((USItype)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {UDItype __ll; \ + struct {USItype __l, __h;} __i; \ + } __xx; \ + USItype __m0 = (m0), __m1 = (m1); \ + __asm__ ("emul %1,%2,$0,%0" \ + : "=g" (__xx.__ll) \ + : "g" (__m0), \ + "g" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((SItype) __m0 >> 31) & __m1) \ + + (((SItype) __m1 >> 31) & __m0)); \ + } while (0) +#define sdiv_qrnnd(q, r, n1, n0, d) \ + do { \ + union {DItype __ll; \ + struct {SItype __l, __h;} __i; \ + } __xx; \ + __xx.__i.__h = n1; __xx.__i.__l = n0; \ + __asm__ ("ediv %3,%2,%0,%1" \ + : "=g" (q), "=g" (r) \ + : "g" (__xx.__ll), "g" (d)); \ + } while (0) +#endif /* __vax__ */ + +#if defined (__z8000__) && W_TYPE_SIZE == 16 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "%0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "%1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ + : "=r" ((unsigned int)(sh)), \ + "=&r" ((unsigned int)(sl)) \ + : "0" ((unsigned int)(ah)), \ + "r" ((unsigned int)(bh)), \ + "1" ((unsigned int)(al)), \ + "rQR" ((unsigned int)(bl))) +#define umul_ppmm(xh, xl, m0, m1) \ + do { \ + union {long int __ll; \ + struct {unsigned int __h, __l;} __i; \ + } __xx; \ + unsigned int __m0 = (m0), __m1 = (m1); \ + __asm__ ("mult %S0,%H3" \ + : "=r" (__xx.__i.__h), \ + "=r" (__xx.__i.__l) \ + : "%1" (__m0), \ + "rQR" (__m1)); \ + (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ + (xh) += ((((signed int) __m0 >> 15) & __m1) \ + + (((signed int) __m1 >> 15) & __m0)); \ + } while (0) +#endif /* __z8000__ */ + +#endif /* __GNUC__ */ + + +#if !defined (umul_ppmm) && defined (__umulsidi3) +#define umul_ppmm(ph, pl, m0, m1) \ + { \ + UDWtype __ll = __umulsidi3 (m0, m1); \ + ph = (UWtype) (__ll >> W_TYPE_SIZE); \ + pl = (UWtype) __ll; \ + } +#endif + +#if !defined (__umulsidi3) +#define __umulsidi3(u, v) \ + ({UWtype __hi, __lo; \ + umul_ppmm (__hi, __lo, u, v); \ + ((UDWtype) __hi << W_TYPE_SIZE) | __lo; }) +#endif + +/* If this machine has no inline assembler, use C macros. */ + +#if !defined (add_ssaaaa) +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (sub_ddmmss) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ + } while (0) +#endif + +#if !defined (umul_ppmm) +#define umul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + UWtype __u = (u), __v = (v); \ + \ + __ul = __ll_lowpart (__u); \ + __uh = __ll_highpart (__u); \ + __vl = __ll_lowpart (__v); \ + __vh = __ll_highpart (__v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart (__x0);/* this can't give carry */ \ + __x1 += __x2; /* but this indeed can */ \ + if (__x1 < __x2) /* did we get it? */ \ + __x3 += __ll_B; /* yes, add it in the proper pos. */ \ + \ + (w1) = __x3 + __ll_highpart (__x1); \ + (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\ + } while (0) +#endif + +#if !defined (umul_ppmm) +#define smul_ppmm(w1, w0, u, v) \ + do { \ + UWtype __w1; \ + UWtype __m0 = (u), __m1 = (v); \ + umul_ppmm (__w1, w0, __m0, __m1); \ + (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \ + - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \ + } while (0) +#endif + +/* Define this unconditionally, so it can be used for debugging. */ +#define __udiv_qrnnd_c(q, r, n1, n0, d) \ + do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart (d); \ + __d0 = __ll_lowpart (d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart (n0); \ + if (__r1 < __m) \ + { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ + if (__r0 < __m) \ + { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ + } while (0) + +/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through + __udiv_w_sdiv (defined in libgcc or elsewhere). */ +#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) +#define udiv_qrnnd(q, r, nh, nl, d) \ + do { \ + UWtype __r; \ + (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \ + (r) = __r; \ + } while (0) +#endif + +/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ +#if !defined (udiv_qrnnd) +#define UDIV_NEEDS_NORMALIZATION 1 +#define udiv_qrnnd __udiv_qrnnd_c +#endif + +#if !defined (count_leading_zeros) +extern +#ifdef __STDC__ +const +#endif +unsigned char __clz_tab[]; +#define count_leading_zeros(count, x) \ + do { \ + UWtype __xr = (x); \ + UWtype __a; \ + \ + if (W_TYPE_SIZE <= 32) \ + { \ + __a = __xr < ((UWtype) 1 << 2*__BITS4) \ + ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \ + : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\ + } \ + else \ + { \ + for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ + if (((__xr >> __a) & 0xff) != 0) \ + break; \ + } \ + \ + (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ + } while (0) +/* This version gives a well-defined value for zero. */ +#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE +#endif + +#if !defined (count_trailing_zeros) +/* Define count_trailing_zeros using count_leading_zeros. The latter might be + defined in asm, but if it is not, the C version above is good enough. */ +#define count_trailing_zeros(count, x) \ + do { \ + UWtype __ctz_x = (x); \ + UWtype __ctz_c; \ + count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ + (count) = W_TYPE_SIZE - 1 - __ctz_c; \ + } while (0) +#endif + +#ifndef UDIV_NEEDS_NORMALIZATION +#define UDIV_NEEDS_NORMALIZATION 0 +#endif diff --git a/mpi/mpi-add.c b/mpi/mpi-add.c new file mode 100644 index 00000000..047a2fa3 --- /dev/null +++ b/mpi/mpi-add.c @@ -0,0 +1,221 @@ +/* mpi-add.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" + + +/**************** + * Add the unsigned integer V to the mpi-integer U and store the + * result in W. U and V may be the same. + */ +void +mpi_add_ui(MPI w, MPI u, unsigned long v ) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if( !usize ) { /* simple */ + wp[0] = v; + wsize = v? 1:0; + } + else if( !usign ) { /* mpi is not negative */ + mpi_limb_t cy; + cy = mpihelp_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } + else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if( usize == 1 && up[0] < v ) { + wp[0] = v - up[0]; + wsize = 1; + } + else { + mpihelp_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = (usize - (wp[usize-1]? 0:1)); + wsign = 1; + } + } + + w->nlimbs = wsize; + w->sign = wsign; +} + + +void +mpi_add(MPI w, MPI u, MPI v) +{ + mpi_ptr_t wp, up, vp; + mpi_size_t usize, vsize, wsize; + int usign, vsign, wsign; + + usize = u->nlimbs; + vsize = v->nlimbs; + usign = u->sign; + vsign = v->sign; + + if( usize < vsize ) { /* Swap U and V. */ + { MPI t; t = u; u = v; v = t; } + { mpi_size_t t = usize; usize = vsize; vsize = t; } + { int t = usign; usign = vsign; vsign = t; } + } + + /* If not space for w (and possible carry), increase space. */ + wsize = usize + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize); + wsign = 0; + + /* These must be after realloc (u or v may be the same as w). */ + up = u->d; + vp = v->d; + wp = w->d; + + if( !vsize ) { /* simple */ + MPN_COPY(wp, up, usize ); + wsize = usize; + wsign = usign; + } + else if( usign != vsign ) { /* different sign */ + /* This test is right since USIZE >= VSIZE */ + if( usize != vsize ) { + mpihelp_sub(wp, up, usize, vp, vsize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + wsign = usign; + } + else if( mpihelp_cmp(up, vp, usize) < 0 ) { + mpihelp_sub_n(wp, vp, up, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if( !usign ) + wsign = 1; + } + else { + mpihelp_sub_n(wp, up, vp, usize); + wsize = usize; + MPN_NORMALIZE(wp, wsize); + if( usign ) + wsign = 1; + } + } + else { /* U and V have same sign. Add them. */ + mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize); + wp[usize] = cy; + wsize = usize + cy; + if( usign ) + wsize = 1; + } + + w->nlimbs = wsize; + w->sign = wsign; +} + + +/**************** + * Subtract the unsigned integer V from the mpi-integer U and store the + * result in W. + */ +void +mpi_sub_ui(MPI w, MPI u, unsigned long v ) +{ + mpi_ptr_t wp, up; + mpi_size_t usize, wsize; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + wsign = 0; + + /* If not space for W (and possible carry), increase space. */ + wsize = usize + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize); + + /* These must be after realloc (U may be the same as W). */ + up = u->d; + wp = w->d; + + if( !usize ) { /* simple */ + wp[0] = v; + wsize = v? 1:0; + wsign = 1; + } + else if( usign ) { /* mpi and v are negative */ + mpi_limb_t cy; + cy = mpihelp_add_1(wp, up, usize, v); + wp[usize] = cy; + wsize = usize + cy; + } + else { /* The signs are different. Need exact comparison to determine + * which operand to subtract from which. */ + if( usize == 1 && up[0] < v ) { + wp[0] = v - up[0]; + wsize = 1; + wsign = 1; + } + else { + mpihelp_sub_1(wp, up, usize, v); + /* Size can decrease with at most one limb. */ + wsize = (usize - (wp[usize-1]? 1:0)); + } + } + + w->nlimbs = wsize; + w->sign = wsign; +} + +void +mpi_sub(MPI w, MPI u, MPI v) +{ + if( w == v ) { + MPI vv = mpi_copy(v); + vv->sign = !vv->sign; + mpi_add( w, u, vv ); + m_free(vv); + } + else { + /* fixme: this is not thread-save (we temp. modify v) */ + v->sign = !v->sign; + mpi_add( w, u, v ); + v->sign = !v->sign; + } +} + + diff --git a/mpi/mpi-bit.c b/mpi/mpi-bit.c new file mode 100644 index 00000000..9cb346aa --- /dev/null +++ b/mpi/mpi-bit.c @@ -0,0 +1,133 @@ +/* mpi-bit.c - MPI bit level fucntions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "mpi-internal.h" + + +/**************** + * Return the number of bits in A. + * fixme: we should not count leading zero bits + */ +unsigned +mpi_get_nbits( MPI a ) +{ + return a->nlimbs * BITS_PER_MPI_LIMB; +} + + +/**************** + * Test wether bit N is set. + */ +int +mpi_test_bit( MPI a, unsigned n ) +{ + unsigned limbno, bitno; + mpi_limb_t limb; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if( limbno >= a->nlimbs ) + return 0; /* too far left: this is a 0 */ + limb = a->d[limbno]; + return (limb & (1 << bitno))? 1: 0; +} + + +/**************** + * Set bit N of A. + */ +void +mpi_set_bit( MPI a, unsigned n ) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if( limbno >= a->nlimbs ) { /* resize */ + if( a->alloced >= limbno ) + mpi_resize(a, limbno+1 ); + a->nlimbs = limbno+1; + } + a->d[limbno] |= (1<<bitno); +} + +/**************** + * Clear bit N of A. + */ +void +mpi_clear_bit( MPI a, unsigned n ) +{ + unsigned limbno, bitno; + + limbno = n / BITS_PER_MPI_LIMB; + bitno = n % BITS_PER_MPI_LIMB; + + if( limbno >= a->nlimbs ) + return; /* don't need to clear this bit, it's to far to left */ + a->d[limbno] &= ~(1 << bitno); +} + + +void +mpi_set_bytes( MPI a, unsigned nbits, byte (*fnc)(int), int opaque ) +{ + byte *p; + unsigned nlimbs, nlimbs2, xbits, xbytes; + unsigned n; + int i; + + nlimbs = nbits / BITS_PER_MPI_LIMB; + xbits = nbits % BITS_PER_MPI_LIMB; + nlimbs2 = xbits? (nlimbs+1):nlimbs; + xbytes = xbits / 8; + xbits = xbits % 8; + if( a->alloced < nlimbs2 ) + mpi_resize(a, nlimbs2 ); + a->nlimbs = nlimbs2; + for(n=0; n < nlimbs; n++ ) { + p = (byte*)(a->d+n); + #ifdef HAVE_LITTLE_ENDIAN + for(i=0; i < BYTES_PER_MPI_LIMB; i++ ) + p[i] = fnc(opaque); + #else + for(i=BYTES_PER_MPI_LIMB-1; i>=0; i-- ) + p[i] = fnc(opaque); + #endif + } + if( xbytes ) { + p = (byte*)(a->d+n); + #ifdef HAVE_LITTLE_ENDIAN + for(i=0; i < xbytes; i++ ) + p[i] = fnc(opaque); + #else + for(i=xbytes-1; i>=0; i-- ) + p[i] = fnc(opaque); + #endif + } + assert(!xbits); +} + + diff --git a/mpi/mpi-cmp.c b/mpi/mpi-cmp.c new file mode 100644 index 00000000..83e85ceb --- /dev/null +++ b/mpi/mpi-cmp.c @@ -0,0 +1,72 @@ +/* mpi-cmp.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +int +mpi_cmp_ui( MPI u, unsigned long v ) +{ + mpi_limb_t limb = v; + + if( !u->nlimbs && !limb ) + return 0; + if( u->sign ) + return -1; + if( u->nlimbs > 1 ) + return 1; + + if( u->d[0] == limb ) + return 0; + else if( u->d[0] > limb ) + return 1; + else + return -1; +} + +int +mpi_cmp( MPI u, MPI v ) +{ + mpi_size_t usize = u->nlimbs; + mpi_size_t vsize = v->nlimbs; + int cmp; + + /* FIXME: are the numbers always normalized? */ + if( !u->sign && v->sign ) + return 1; + else if( u->sign && !v->sign ) + return -1; + else if( usize != vsize && !u->sign && !v->sign ) + return usize - vsize; + else if( usize != vsize && u->sign && v->sign ) + return vsize + usize; + else if( !usize ) + return 0; + else if( !(cmp=mpihelp_cmp( u->d, v->d, usize )) ) + return 0; + else if( (cmp < 0?1:0) == (u->sign?1:0)) + return 1; + else + return -1; +} + + diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c new file mode 100644 index 00000000..2955575a --- /dev/null +++ b/mpi/mpi-div.c @@ -0,0 +1,282 @@ +/* mpi-div.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + + + +void +mpi_fdiv_r( MPI rem, MPI dividend, MPI divisor ) +{ + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + /* We need the original value of the divisor after the remainder has been + * preliminary calculated. We have to copy it to temporary space if it's + * the same variable as REM. */ + if( rem == divisor ) { + temp_divisor = mpi_copy( divisor ); + divisor = temp_divisor; + } + + mpi_tdiv_r( rem, dividend, divisor ); + + if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs ) + mpi_add( rem, rem, divisor); + + if( temp_divisor ) + mpi_free(temp_divisor); +} + + + +/**************** + * Division rounding the quotient towards -infinity. + * The remainder gets the same sign as the denominator. + * rem is optional + */ + +ulong +mpi_fdiv_r_ui( MPI rem, MPI dividend, ulong divisor ) +{ + mpi_limb_t rlimb; + + rlimb = mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor ); + if( rlimb && dividend->sign ) + rlimb = divisor - rlimb; + + if( rem ) { + rem->d[0] = rlimb; + rem->nlimbs = rlimb? 1:0; + } + return rlimb; +} + + +void +mpi_fdiv_q( MPI quot, MPI dividend, MPI divisor ) +{ + MPI tmp = mpi_alloc( mpi_get_nlimbs(quot) ); + mpi_fdiv_qr( quot, tmp, dividend, divisor); + mpi_free(tmp); +} + +void +mpi_fdiv_qr( MPI quot, MPI rem, MPI dividend, MPI divisor ) +{ + int divisor_sign = divisor->sign; + MPI temp_divisor = NULL; + + if( quot == divisor || rem == divisor ) { + temp_divisor = mpi_copy( divisor ); + divisor = temp_divisor; + } + + mpi_tdiv_qr( quot, rem, dividend, divisor ); + + if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) { + mpi_sub_ui( quot, quot, 1 ); + mpi_add( rem, rem, divisor); + } + + if( temp_divisor ) + mpi_free(temp_divisor); +} + + +/* If den == quot, den needs temporary storage. + * If den == rem, den needs temporary storage. + * If num == quot, num needs temporary storage. + * If den has temporary storage, it can be normalized while being copied, + * i.e no extra storage should be allocated. + */ + +void +mpi_tdiv_r( MPI rem, MPI num, MPI den) +{ + mpi_tdiv_qr(NULL, rem, num, den ); +} + +void +mpi_tdiv_qr( MPI quot, MPI rem, MPI num, MPI den) +{ + mpi_ptr_t np, dp; + mpi_ptr_t qp, rp; + mpi_size_t nsize = num->nlimbs; + mpi_size_t dsize = den->nlimbs; + mpi_size_t qsize, rsize; + mpi_size_t sign_remainder = num->sign; + mpi_size_t sign_quotient = num->sign ^ den->sign; + unsigned normalization_steps; + mpi_limb_t q_limb; + mpi_ptr_t marker[5]; + int markidx=0; + + /* Ensure space is enough for quotient and remainder. + * We need space for an extra limb in the remainder, because it's + * up-shifted (normalized) below. */ + rsize = nsize + 1; + if( rem->alloced < rsize ) + mpi_resize( rem, rsize); + + qsize = rsize - dsize; /* qsize cannot be bigger than this. */ + if( qsize <= 0 ) { + if( num != rem ) { + rem->nlimbs = num->nlimbs; + rem->sign = num->sign; + MPN_COPY(rem->d, num->d, nsize); + } + if( quot ) { + /* This needs to follow the assignment to rem, in case the + * numerator and quotient are the same. */ + quot->nlimbs = 0; + quot->sign = 0; + } + return; + } + + if( quot && quot->alloced < qsize ) + mpi_resize( quot, qsize); + + /* Read pointers here, when reallocation is finished. */ + np = num->d; + dp = den->d; + rp = rem->d; + + /* Optimize division by a single-limb divisor. */ + if( dsize == 1 ) { + mpi_limb_t rlimb; + if( quot ) { + qp = quot->d; + rlimb = mpihelp_divmod_1( qp, np, nsize, dp[0] ); + qsize -= qp[qsize - 1] == 0; + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + else + rlimb = mpihelp_mod_1( np, nsize, dp[0] ); + rp[0] = rlimb; + rsize = rlimb != 0?1:0; + rem->nlimbs = rsize; + rem->sign = sign_remainder; + return; + } + + + if( quot ) { + qp = quot->d; + /* Make sure QP and NP point to different objects. Otherwise the + * numerator would be gradually overwritten by the quotient limbs. */ + if(qp == np) { /* Copy NP object to temporary space. */ + np = marker[markidx++] = mpi_alloc_limb_space(nsize); + MPN_COPY(np, qp, nsize); + } + } + else /* Put quotient at top of remainder. */ + qp = rp + dsize; + + count_leading_zeros( normalization_steps, dp[dsize - 1] ); + + /* Normalize the denominator, i.e. make its most significant bit set by + * shifting it NORMALIZATION_STEPS bits to the left. Also shift the + * numerator the same number of steps (to keep the quotient the same!). + */ + if( normalization_steps ) { + mpi_ptr_t tp; + mpi_limb_t nlimb; + + /* Shift up the denominator setting the most significant bit of + * the most significant word. Use temporary storage not to clobber + * the original contents of the denominator. */ + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + mpihelp_lshift( tp, dp, dsize, normalization_steps ); + dp = tp; + + /* Shift up the numerator, possibly introducing a new most + * significant word. Move the shifted numerator in the remainder + * meanwhile. */ + nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps); + if( nlimb ) { + rp[nsize] = nlimb; + rsize = nsize + 1; + } + else + rsize = nsize; + } + else { + /* The denominator is already normalized, as required. Copy it to + * temporary space if it overlaps with the quotient or remainder. */ + if( dp == rp || (quot && (dp == qp))) { + mpi_ptr_t tp; + + tp = marker[markidx++] = mpi_alloc_limb_space(dsize); + MPN_COPY( tp, dp, dsize ); + dp = tp; + } + + /* Move the numerator to the remainder. */ + if( rp != np ) + MPN_COPY(rp, np, nsize); + + rsize = nsize; + } + + q_limb = mpihelp_divrem( qp, 0, rp, rsize, dp, dsize ); + + if( quot ) { + qsize = rsize - dsize; + if(q_limb) { + qp[qsize] = q_limb; + qsize += 1; + } + + quot->nlimbs = qsize; + quot->sign = sign_quotient; + } + + rsize = dsize; + MPN_NORMALIZE (rp, rsize); + + if( normalization_steps && rsize ) { + mpihelp_rshift(rp, rp, rsize, normalization_steps); + rsize -= rp[rsize - 1] == 0?1:0; + } + + rem->nlimbs = rsize; + rem->sign = sign_remainder; + while( markidx ) + mpi_free_limb_space(marker[--markidx]); +} + + +/**************** + * Check wether dividend is divisible by divisor + * (note: divisor must fit into a limb) + */ +int +mpi_divisible_ui(MPI dividend, ulong divisor ) +{ + return !mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor ); +} + diff --git a/mpi/mpi-gcd.c b/mpi/mpi-gcd.c new file mode 100644 index 00000000..f31e917f --- /dev/null +++ b/mpi/mpi-gcd.c @@ -0,0 +1,54 @@ +/* mpi-gcd.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/**************** + * Find the greatest common divisor G of A and B. + * Return: true if this 1, false in all other cases + */ +int +mpi_gcd( MPI g, MPI xa, MPI xb ) +{ + MPI a, b; + + a = mpi_copy(xa); + b = mpi_copy(xb); + + /* TAOCP Vol II, 4.5.2, Algorithm A */ + a->sign = 0; + b->sign = 0; + while( mpi_cmp_ui( b, 0 ) ) { + mpi_fdiv_r( g, a, b ); /* g used as temorary variable */ + mpi_set(a,b); + mpi_set(b,g); + } + mpi_set(g, a); + + mpi_free(a); + mpi_free(b); + return !mpi_cmp_ui( g, 1); +} + + + diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h new file mode 100644 index 00000000..b5c00b6c --- /dev/null +++ b/mpi/mpi-internal.h @@ -0,0 +1,198 @@ +/* mpi-internal.h - Internal to the Multi Precision Integers + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#ifndef G10_MPI_INTERNAL_H +#define G10_MPI_INTERNAL_H + +#include "mpi.h" + + + +typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ +typedef int mpi_size_t; /* (must be a signed type) */ + +#define ABS(x) (x >= 0 ? x : -x) +#define MIN(l,o) ((l) < (o) ? (l) : (o)) +#define MAX(h,i) ((h) > (i) ? (h) : (i)) +#define RESIZE_IF_NEEDED(a,b) \ + do { \ + if( (a)->alloced < (b) ) \ + mpi_resize((a), (b)); \ + } while(0) + +/* Copy N limbs from S to D. */ +#define MPN_COPY( d, s, n) \ + do { \ + mpi_size_t _i; \ + for( _i = 0; _i < (n); _i++ ) \ + (d)[_i] = (s)[_i]; \ + } while(0) + +#define MPN_COPY_DECR( d, s, n ) \ + do { \ + mpi_size_t _i; \ + for( _i = (n)-1; _i >= 0; _i--) \ + (d)[_i] = (s)[_i]; \ + } while(0) + +/* Zero N limbs at D */ +#define MPN_ZERO(d, n) \ + do { \ + int _i; \ + for( _i = 0; _i < (n); _i++ ) \ + (d)[_i] = 0; \ + } while (0) + +#define MPN_NORMALIZE(d, n) \ + do { \ + while( (n) > 0 ) { \ + if( (d)[(n)-1] ) \ + break; \ + (n)--; \ + } \ + } while(0) + +#define MPN_NORMALIZE_NOT_ZERO(d, n) \ + do { \ + for(;;) { \ + if( (d)[(n)-1] ) \ + break; \ + (n)--; \ + } \ + } while(0) + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if( (size) < KARATSUBA_THRESHOLD ) \ + mul_n_basecase (prodp, up, vp, size); \ + else \ + mul_n (prodp, up, vp, size, tspace); \ + } while (0); + + +/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest + * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB). + * If this would yield overflow, DI should be the largest possible number + * (i.e., only ones). For correct operation, the most significant bit of D + * has to be set. Put the quotient in Q and the remainder in R. + */ +#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \ + do { \ + mpi_limb_t _q, _ql, _r; \ + mpi_limb_t _xh, _xl; \ + umul_ppmm (_q, _ql, (nh), (di)); \ + _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \ + umul_ppmm (_xh, _xl, _q, (d)); \ + sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \ + if( _xh ) { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + if( _xh) { \ + sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \ + _q++; \ + } \ + } \ + if( _r >= (d) ) { \ + _r -= (d); \ + _q++; \ + } \ + (r) = _r; \ + (q) = _q; \ + } while (0) + + +/*-- mpiutil.c --*/ +#ifdef M_DEBUG + #define mpi_alloc_limb_space(n) mpi_debug_alloc_limb_space((n), M_DBGINFO( __LINE__ ) ) + #define mpi_free_limb_space(n) mpi_debug_free_limb_space((n), M_DBGINFO( __LINE__ ) ) + mpi_ptr_t mpi_debug_alloc_limb_space( unsigned nlimbs, const char *info ); + void mpi_debug_free_limb_space( mpi_ptr_t a, const char *info ); +#else + mpi_ptr_t mpi_alloc_limb_space( unsigned nlimbs ); + void mpi_free_limb_space( mpi_ptr_t a ); +#endif +void mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs ); + +/*-- mpihelp-add.c --*/ +mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ); +mpi_limb_t mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-sub.c --*/ +mpi_limb_t mpihelp_sub_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ); +mpi_limb_t mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size); +mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size); + +/*-- mpihelp-cmp.c --*/ +int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ); + +/*-- mpihelp-mul.c --*/ +mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb); +void mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size); +mpi_limb_t mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize); + +/*-- mpihelp-div.c --*/ +mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); +mpi_limb_t mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize); +mpi_limb_t mpihelp_divmod_1( mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb); + +/*-- mpihelp-shift.c --*/ +mpi_limb_t mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); +mpi_limb_t mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned cnt); + + +/* Define stuff for longlong.h. */ +#define W_TYPE_SIZE BITS_PER_MPI_LIMB + typedef mpi_limb_t UWtype; + typedef unsigned int UHWtype; +#if defined (__GNUC__) + typedef unsigned int UQItype __attribute__ ((mode (QI))); + typedef int SItype __attribute__ ((mode (SI))); + typedef unsigned int USItype __attribute__ ((mode (SI))); + typedef int DItype __attribute__ ((mode (DI))); + typedef unsigned int UDItype __attribute__ ((mode (DI))); +#else + typedef unsigned char UQItype; + typedef long SItype; + typedef unsigned long USItype; +#endif + + +#endif /*G10_MPI_INTERNAL_H*/ diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c new file mode 100644 index 00000000..acde6055 --- /dev/null +++ b/mpi/mpi-inv.c @@ -0,0 +1,127 @@ +/* mpi-inv.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/**************** + * Calculate the multiplicative inverse X of U mod V + * That is: Find the solution for + * 1 = (u*x) mod v + * This has only a unique solution if U and V are relatively prime. + * Returns 0 if a solution was found. + */ +int +mpi_inv_mod( MPI x, MPI u, MPI v ) +{ + #if 0 + /* Extended Euclid's algorithm (See TAOPC Vol II, 4.52. Alg X) */ + MPI u1, u2, u3, v1, v2, v3, q, t1, t2, t3; + + u1 = mpi_alloc_set_ui(1); + u2 = mpi_alloc_set_ui(0); + u3 = mpi_copy(u); + v1 = mpi_alloc_set_ui(0); + v2 = mpi_alloc_set_ui(1); + v3 = mpi_copy(v); + q = mpi_alloc( mpi_get_nlimbs(u) ); + t1 = mpi_alloc( mpi_get_nlimbs(u) ); + t2 = mpi_alloc( mpi_get_nlimbs(u) ); + t3 = mpi_alloc( mpi_get_nlimbs(u) ); + while( mpi_cmp_ui( v3, 0 ) ) { + /*log_debug("----------------------\n"); + log_mpidump("q =", u1); + log_mpidump("u1=", u1); + log_mpidump("u2=", u2); + log_mpidump("u3=", u3); + log_mpidump("v1=", v1); + log_mpidump("v2=", v2); + log_mpidump("v3=", v3); */ + mpi_fdiv_q( q, u3, v3 ); + mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q); + mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3); + + mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3); + mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3); + } + mpi_set(x, u3); + + mpi_free(u1); + mpi_free(u2); + mpi_free(u3); + mpi_free(v1); + mpi_free(v2); + mpi_free(v3); + mpi_free(q); + mpi_free(t1); + mpi_free(t2); + mpi_free(t3); + #endif + + /***************************** + * 1. Init: g0 = u g1 = v v0 = 0 v1 = 1 + * 2. Test: if g1 is 0 terminate. Result = v0 < 0: v0 + n + * else: v0 + * 3. Divide: div,rem = g0 / g1 + * t1 = v0 - div * v1 + * v0 = v1 + * v1 = t1 + * g0 = g1 + * g1 = rem + * continue with step 2. + */ + MPI g0, g1, v0, v1, div, rem, t1; + + g0 = mpi_copy(v); + g1 = mpi_copy(u); + v0 = mpi_alloc_set_ui( 0 ); + v1 = mpi_alloc_set_ui( 1 ); + div = mpi_alloc(mpi_get_nlimbs(v)); + rem = mpi_alloc(mpi_get_nlimbs(v)); + t1 = mpi_alloc(mpi_get_nlimbs(v)); + while( mpi_cmp_ui( g1, 0) ) { + mpi_fdiv_qr(div, rem, g0, g1); + mpi_mul(t1, div, v1); + mpi_sub(t1, v0, t1); + mpi_set(v0, v1); + mpi_set(v1, t1); + mpi_set(g0, g1); + mpi_set(g1, rem); + + } + if( mpi_cmp_ui( v0, 0) < 0 ) + mpi_add( x, v0, v); + else + mpi_set( x, v0); + + mpi_free(g0); + mpi_free(g1); + mpi_free(v0); + mpi_free(v1); + mpi_free(div); + mpi_free(rem); + mpi_free(t1); + return 0; +} + + + diff --git a/mpi/mpi-mul.c b/mpi/mpi-mul.c new file mode 100644 index 00000000..03f2b4b4 --- /dev/null +++ b/mpi/mpi-mul.c @@ -0,0 +1,178 @@ +/* mpi-mul.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + + +void +mpi_mul_ui( MPI prod, MPI mult, unsigned long small_mult ) +{ + mpi_size_t size, prod_size; + mpi_ptr_t prod_ptr; + mpi_limb_t cy; + int sign; + + size = mult->nlimbs; + sign = mult->sign; + + if( !size || !small_mult ) { + prod->nlimbs = 0; + prod->sign = 0; + return; + } + + prod_size = size + 1; + if( prod->alloced < prod_size ) + mpi_resize( prod, prod_size ); + prod_ptr = prod->d; + + cy = mpihelp_mul_1( prod_ptr, mult->d, size, (mpi_limb_t)small_mult ); + if( cy ) + prod_ptr[size++] = cy; + prod->nlimbs = size; + prod->sign = sign; +} + + +void +mpi_mul_2exp( MPI w, MPI u, unsigned long cnt) +{ + mpi_size_t usize, wsize, limb_cnt; + mpi_ptr_t wp; + mpi_limb_t wlimb; + int usign, wsign; + + usize = u->nlimbs; + usign = u->sign; + + if( !usize ) { + w->nlimbs = 0; + w->sign = 0; + return; + } + + limb_cnt = cnt / BITS_PER_MPI_LIMB; + wsize = usize + limb_cnt + 1; + if( w->alloced < wsize ) + mpi_resize(w, wsize ); + wp = w->d; + wsize = usize + limb_cnt; + wsign = usign; + + cnt %= BITS_PER_MPI_LIMB; + if( cnt ) { + wlimb = mpihelp_lshift( wp + limb_cnt, u->d, usize, cnt ); + if( wlimb ) { + wp[wsize] = wlimb; + wsize++; + } + } + else { + MPN_COPY_DECR( wp + limb_cnt, u->d, usize ); + } + + /* Zero all whole limbs at low end. Do it here and not before calling + * mpn_lshift, not to lose for U == W. */ + MPN_ZERO( wp, limb_cnt ); + + w->nlimbs = wsize; + w->sign = wsign; +} + + + +void +mpi_mul( MPI w, MPI u, MPI v) +{ + mpi_size_t usize, vsize, wsize; + mpi_ptr_t up, vp, wp; + mpi_limb_t cy; + int usign, vsign, sign_product; + int assign_wp=0; + mpi_ptr_t tmp_limb=NULL; + + if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */ + usize = v->nlimbs; + usign = v->sign; + up = v->d; + vsize = u->nlimbs; + vsign = u->sign; + vp = u->d; + } + else { + usize = u->nlimbs; + usign = u->sign; + up = u->d; + vsize = v->nlimbs; + vsign = v->sign; + vp = v->d; + } + sign_product = usign ^ vsign; + wp = w->d; + + /* Ensure W has space enough to store the result. */ + wsize = usize + vsize; + if( w->alloced < wsize ) { + if( wp == up || wp == vp ) { + wp = mpi_alloc_limb_space( wsize ); + assign_wp = 1; + } + else { + mpi_resize(w, wsize ); + wp = w->d; + } + } + else { /* Make U and V not overlap with W. */ + if( wp == up ) { + /* W and U are identical. Allocate temporary space for U. */ + up = tmp_limb = mpi_alloc_limb_space( usize ); + /* Is V identical too? Keep it identical with U. */ + if( wp == vp ) + vp = up; + /* Copy to the temporary space. */ + MPN_COPY( up, wp, usize ); + } + else if( wp == vp ) { + /* W and V are identical. Allocate temporary space for V. */ + vp = tmp_limb = mpi_alloc_limb_space( vsize ); + /* Copy to the temporary space. */ + MPN_COPY( vp, wp, vsize ); + } + } + + if( !vsize ) + wsize = 0; + else { + cy = mpihelp_mul( wp, up, usize, vp, vsize ); + wsize -= cy? 0:1; + } + + if( assign_wp ) + mpi_assign_limb_space( w, wp, wsize ); + w->nlimbs = wsize; + w->sign = sign_product; + if( tmp_limb ) + mpi_free_limb_space( tmp_limb ); +} + + diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c new file mode 100644 index 00000000..14fe4de4 --- /dev/null +++ b/mpi/mpi-pow.c @@ -0,0 +1,247 @@ +/* mpi-pow.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" +#include <assert.h> + + +/**************** + * RES = BASE ^ EXP mod MOD + */ +void +mpi_powm( MPI res, MPI base, MPI exp, MPI mod) +{ + mpi_ptr_t rp, ep, mp, bp; + mpi_size_t esize, msize, bsize, rsize; + int esign, msign, bsign, rsign; + mpi_size_t size; + int mod_shift_cnt; + int negative_result; + mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL; + mpi_ptr_t xp_marker=NULL; + int assign_rp=0; + + esize = exp->nlimbs; + msize = mod->nlimbs; + size = 2 * msize; + esign = exp->sign; + msign = mod->sign; + + rp = res->d; + ep = exp->d; + + if( !msize ) + msize = 1 / msize; /* provoke a signal */ + + if( !esize ) { + /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 + * depending on if MOD equals 1. */ + rp[0] = 1; + res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; + res->sign = 0; + goto leave; + } + + /* Normalize MOD (i.e. make its most significant bit set) as required by + * mpn_divrem. This will make the intermediate values in the calculation + * slightly larger, but the correct result is obtained after a final + * reduction using the original MOD value. */ + mp = mp_marker = mpi_alloc_limb_space(msize); + count_leading_zeros( mod_shift_cnt, mod->d[msize-1] ); + if( mod_shift_cnt ) + mpihelp_lshift( mp, mod->d, msize, mod_shift_cnt ); + else + MPN_COPY( mp, mod->d, msize ); + + bsize = base->nlimbs; + bsign = base->sign; + if( bsize > msize ) { /* The base is larger than the module. Reduce it. */ + /* Allocate (BSIZE + 1) with space for remainder and quotient. + * (The quotient is (bsize - msize + 1) limbs.) */ + bp = bp_marker = mpi_alloc_limb_space( bsize + 1); + MPN_COPY( bp, base->d, bsize ); + /* We don't care about the quotient, store it above the remainder, + * at BP + MSIZE. */ + mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize ); + bsize = msize; + /* Canonicalize the base, since we are going to multiply with it + * quite a few times. */ + MPN_NORMALIZE( bp, bsize ); + } + else + bp = base->d; + + if( !bsize ) { + res->nlimbs = 0; + res->sign = 0; + goto leave; + } + + if( res->alloced < size ) { + /* We have to allocate more space for RES. If any of the input + * parameters are identical to RES, defer deallocation of the old + * space. */ + if( rp == ep || rp == mp || rp == bp ) { + rp = mpi_alloc_limb_space( size ); + assign_rp = 1; + } + else { + mpi_resize( res, size ); + rp = res->d; + } + } + else { /* Make BASE, EXP and MOD not overlap with RES. */ + if( rp == bp ) { + /* RES and BASE are identical. Allocate temp. space for BASE. */ + assert( !bp_marker ); + bp = bp_marker = mpi_alloc_limb_space( bsize ); + MPN_COPY(bp, rp, bsize); + } + if( rp == ep ) { + /* RES and EXP are identical. Allocate temp. space for EXP. */ + ep = ep_marker = mpi_alloc_limb_space( esize ); + MPN_COPY(ep, rp, esize); + } + if( rp == mp ) { + /* RES and MOD are identical. Allocate temporary space for MOD.*/ + assert( !mp_marker ); + mp = mp_marker = mpi_alloc_limb_space( msize ); + MPN_COPY(mp, rp, msize); + } + } + + MPN_COPY( rp, bp, bsize ); + rsize = bsize; + rsign = bsign; + + { + mpi_size_t i; + mpi_ptr_t xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1) ); + int c; + mpi_limb_t e; + mpi_limb_t carry_limb; + + negative_result = (ep[0] & 1) && base->sign; + + i = esize - 1; + e = ep[i]; + count_leading_zeros (c, e); + e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ + c = BITS_PER_MPI_LIMB - 1 - c; + + /* Main loop. + * + * Make the result be pointed to alternately by XP and RP. This + * helps us avoid block copying, which would otherwise be necessary + * with the overlap restrictions of mpihelp_divmod. With 50% probability + * the result after this loop will be in the area originally pointed + * by RP (==RES->d), and with 50% probability in the area originally + * pointed to by XP. + */ + for(;;) { + while( c ) { + mpi_ptr_t tp; + mpi_size_t xsize; + + mpihelp_mul_n(xp, rp, rp, rsize); + xsize = 2 * rsize; + if( xsize > msize ) { + mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); + xsize = msize; + } + + tp = rp; rp = xp; xp = tp; + rsize = xsize; + + if( (mpi_limb_signed_t)e < 0 ) { + mpihelp_mul( xp, rp, rsize, bp, bsize ); + xsize = rsize + bsize; + if( xsize > msize ) { + mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); + xsize = msize; + } + + tp = rp; rp = xp; xp = tp; + rsize = xsize; + } + e <<= 1; + c--; + } + + i--; + if( i < 0 ) + break; + e = ep[i]; + c = BITS_PER_MPI_LIMB; + } + + /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT + * steps. Adjust the result by reducing it with the original MOD. + * + * Also make sure the result is put in RES->d (where it already + * might be, see above). + */ + if( mod_shift_cnt ) { + carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt); + rp = res->d; + if( carry_limb ) { + rp[rsize] = carry_limb; + rsize++; + } + } + else { + MPN_COPY( res->d, rp, rsize); + rp = res->d; + } + + if( rsize >= msize ) { + mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); + rsize = msize; + } + + /* Remove any leading zero words from the result. */ + if( mod_shift_cnt ) + mpihelp_rshift( rp, rp, rsize, mod_shift_cnt); + MPN_NORMALIZE (rp, rsize); + } + + if( negative_result && rsize ) { + if( mod_shift_cnt ) + mpihelp_rshift( mp, mp, msize, mod_shift_cnt); + mpihelp_sub( rp, mp, msize, rp, rsize); + rsize = msize; + rsign = msign; + MPN_NORMALIZE(rp, rsize); + } + res->nlimbs = rsize; + res->sign = rsign; + + leave: + if( assign_rp ) mpi_assign_limb_space( res, rp, size ); + if( mp_marker ) mpi_free_limb_space( mp_marker ); + if( bp_marker ) mpi_free_limb_space( bp_marker ); + if( ep_marker ) mpi_free_limb_space( ep_marker ); + if( xp_marker ) mpi_free_limb_space( xp_marker ); +} + diff --git a/mpi/mpi-scan.c b/mpi/mpi-scan.c new file mode 100644 index 00000000..8626032a --- /dev/null +++ b/mpi/mpi-scan.c @@ -0,0 +1,88 @@ +/* mpi-scan.c - MPI functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/**************** + * Scan through an mpi and return byte for byte. a -1 is returned to indicate + * the end of the mpi. Scanning is done from the lsb to the msb, returned + * values are in the range of 0 .. 255. + * + * FIXME: This code is VERY ugly! + */ +int +mpi_getbyte( MPI a, unsigned index ) +{ + int i, j; + unsigned n; + mpi_ptr_t ap; + mpi_limb_t limb; + + ap = a->d; + for(n=0,i=0; i < a->nlimbs; i++ ) { + limb = ap[i]; + for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) + if( n == index ) + return (limb >> j*8) & 0xff; + } + return -1; +} + + +/**************** + * Put a value at position INDEX into A. index counts from lsb to msb + */ +void +mpi_putbyte( MPI a, unsigned index, int c ) +{ + int i, j; + unsigned n; + mpi_ptr_t ap; + mpi_limb_t limb; + +#if BYTES_PER_MPI_LIMB != 4 + #error please enhance this function, its ugly - i know. +#endif + c &= 0xff; + ap = a->d; + for(n=0,i=0; i < a->alloced; i++ ) { + limb = ap[i]; + for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) + if( n == index ) { + if( j == 0 ) + limb = (limb & 0xffffff00) | c; + else if( j == 1 ) + limb = (limb & 0xffff00ff) | (c<<8); + else if( j == 2 ) + limb = (limb & 0xff00ffff) | (c<<16); + else + limb = (limb & 0x00ffffff) | (c<<24); + if( a->nlimbs <= i ) + a->nlimbs = i+1; + ap[i] = limb; + return; + } + } + abort(); /* index out of range */ +} + diff --git a/mpi/mpicoder.c b/mpi/mpicoder.c new file mode 100644 index 00000000..23454c0f --- /dev/null +++ b/mpi/mpicoder.c @@ -0,0 +1,392 @@ +/* mpicoder.c - Coder for the external representation of MPIs + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "mpi.h" +#include "iobuf.h" +#include "memory.h" +#include "util.h" + +#ifdef M_DEBUG + #undef mpi_decode + #undef mpi_decode_buffer +#endif + +#define MAX_EXTERN_MPI_BITS 16384 + +/**************** + * write an mpi to out. + */ +int +mpi_encode( IOBUF out, MPI a ) +{ + u16 dummy; + return mpi_encode_csum( out, a, &dummy ); +} + +int +mpi_encode_csum( IOBUF out, MPI a, u16 *csum ) +{ + int i; + byte c; + unsigned nbits = a->nlimbs * BITS_PER_MPI_LIMB; + mpi_limb_t limb; + +#if BYTES_PER_MPI_LIMB != 4 + #error Make this function work with other LIMB sizes +#endif + if( nbits > MAX_EXTERN_MPI_BITS ) + log_bug("mpi_encode: mpi too large (%u bits)\n", nbits); + iobuf_put(out, (c=nbits >>8) ); *csum += c; + iobuf_put(out, (c=nbits) ); *csum += c; + for(i=a->nlimbs-1; i >= 0; i-- ) { + limb = a->d[i]; + iobuf_put(out, (c=limb >> 24) ); *csum += c; + iobuf_put(out, (c=limb >> 16) ); *csum += c; + iobuf_put(out, (c=limb >> 8) ); *csum += c; + iobuf_put(out, (c=limb ) ); *csum += c; + } + return 0; +} + +/**************** + * encode the MPI into a newly allocated buffer, the buffer is + * so constructed, that it can be used for mpi_write. The caller + * must free the returned buffer. The buffer is allocated in the same + * type of memory space as A is. + */ +byte * +mpi_encode_buffer( MPI a ) +{ + abort(); + return NULL; +} + +/**************** + * write an mpi to out. This is a special function to handle + * encrypted values. It simply writes the buffer a to OUT. + * A is a special buffer, starting with 2 bytes giving it's length + * (in big endian order) and 2 bytes giving it's length in bits (also + * big endian) + */ +int +mpi_write( IOBUF out, byte *a) +{ + u16 dummy; + return mpi_write_csum( out, a, &dummy ); +} + +int +mpi_write_csum( IOBUF out, byte *a, u16 *csum) +{ + int rc; + unsigned n; + + n = *a++ << 8; + n |= *a++; + rc = iobuf_write(out, a, n ); + for( ; n; n--, a++ ) + *csum += *a; + return rc; +} + +/**************** + * Decode an external representation and return an MPI + * The external format is a 16 bit unsigned value stored in network byte order, + * giving the number of bits for the following integer. The integer is stored + * with MSB first (left padded with zeroes to align on a byte boundary). + */ +MPI +#ifdef M_DEBUG +mpi_debug_decode(IOBUF inp, unsigned *ret_nread, const char *info) +#else +mpi_decode(IOBUF inp, unsigned *ret_nread) +#endif +{ + int c, i, j; + unsigned nbits, nbytes, nlimbs, nread=0; + mpi_limb_t a; + MPI val = MPI_NULL; + + if( (c = iobuf_get(inp)) == -1 ) + goto leave; + nbits = c << 8; + if( (c = iobuf_get(inp)) == -1 ) + goto leave; + nbits |= c; + if( nbits > MAX_EXTERN_MPI_BITS ) { + log_error("mpi too large (%u bits)\n", nbits); + goto leave; + } + nread = 2; + + nbytes = (nbits+7) / 8; + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + #ifdef M_DEBUG + val = mpi_debug_alloc( nlimbs, info ); + #else + val = mpi_alloc( nlimbs ); + #endif + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + j= val->nlimbs = nlimbs; + val->sign = 0; + for( ; j > 0; j-- ) { + a = 0; + for(; i < BYTES_PER_MPI_LIMB; i++ ) { + a <<= 8; + a |= iobuf_get(inp) & 0xff; nread++; + } + i = 0; + val->d[j-1] = a; + } + + leave: + if( nread > *ret_nread ) + log_error("Ooops: mpi crosses packet border"); + else + *ret_nread = nread; + return val; +} + + +/**************** + * Decode an MPI from the buffer, the buffer starts with two bytes giving + * the length of the data to follow, the original data follows. + * The MPI is alloced from secure MPI space + */ +MPI +#ifdef M_DEBUG +mpi_debug_decode_buffer(byte *buffer, const char *info ) +#else +mpi_decode_buffer(byte *buffer ) +#endif +{ + int i, j; + u16 buflen; + unsigned nbits, nbytes, nlimbs; + mpi_limb_t a; + byte *p = buffer; + MPI val; + + if( !buffer ) + log_bug("mpi_decode_buffer: no buffer\n"); + buflen = *p++ << 8; + buflen |= *p++; + nbits = *p++ << 8; + nbits |= *p++; + nbytes = (nbits+7) / 8; + if( nbytes+2 != buflen ) + log_bug("mpi_decode_buffer: length conflict\n"); + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + #ifdef M_DEBUG + val = mpi_debug_alloc_secure( nlimbs, info ); + #else + val = mpi_alloc_secure( nlimbs ); + #endif + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + j= val->nlimbs = nlimbs; + val->sign = 0; + for( ; j > 0; j-- ) { + a = 0; + for(; i < BYTES_PER_MPI_LIMB; i++ ) { + a <<= 8; + a |= *p++; + } + i = 0; + val->d[j-1] = a; + } + return val; +} + + +/**************** + * Read a MPI from the external medium and return it in a newly allocated + * buffer (This buffer is allocated in the secure memory space, because + * we properly need this to decipher this string). + * Return: the allocated string and in RET_NREAD the number of bytes + * read (including the 2 length bytes), the returned buffer will + * be prefixed with two bytes describing the length of the following + * data. + */ +byte * +mpi_read(IOBUF inp, unsigned *ret_nread) +{ + int c; + u16 buflen; + unsigned nbits, nbytes, nread; + byte *p, *buf; + + if( (c = iobuf_get(inp)) == -1 ) + return NULL; + nbits = c << 8; + if( (c = iobuf_get(inp)) == -1 ) + return NULL; + nbits |= c; + if( nbits > MAX_EXTERN_MPI_BITS ) { + log_error("mpi too large (%u bits)\n", nbits); + return NULL; + } + nread = 2; + + nbytes = (nbits+7) / 8; + buflen = nbytes + 2; + p = buf = m_alloc_secure( buflen+2 ); + *p++ = buflen >> 8; + *p++ = buflen & 0xff; + *p++ = nbits >> 8; + *p++ = nbits & 0xff; + for( ; nbytes ; nbytes--, nread++ ) + *p++ = iobuf_get(inp) & 0xff; + + if( nread > *ret_nread ) + log_error("Ooops: mpi crosses packet border"); + else + *ret_nread = nread; + return buf; +} + + +/**************** + * Make a mpi from a character string. + */ +int +mpi_fromstr(MPI val, const char *str) +{ + int hexmode=0, sign=0, prepend_zero=0, i, j, c, c1, c2; + unsigned nbits, nbytes, nlimbs; + mpi_limb_t a; + + if( *str == '-' ) { + sign = 1; + str++; + } + if( *str == '0' && str[1] == 'x' ) + hexmode = 1; + else + return 1; /* other bases are not yet supported */ + str += 2; + + nbits = strlen(str)*4; + if( nbits % 8 ) + prepend_zero = 1; + nbytes = (nbits+7) / 8; + nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB; + if( val->alloced < nlimbs ) + mpi_resize(val, nlimbs ); + i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; + i %= BYTES_PER_MPI_LIMB; + j= val->nlimbs = nlimbs; + val->sign = sign; + for( ; j > 0; j-- ) { + a = 0; + for(; i < BYTES_PER_MPI_LIMB; i++ ) { + if( prepend_zero ) { + c1 = '0'; + prepend_zero = 0; + } + else + c1 = *str++; + assert(c1); + c2 = *str++; + assert(c2); + if( c1 >= '0' && c1 <= '9' ) + c = c1 - '0'; + else if( c1 >= 'a' && c1 <= 'f' ) + c = c1 - 'a' + 10; + else if( c1 >= 'A' && c1 <= 'F' ) + c = c1 - 'A' + 10; + else { + mpi_clear(val); + return 1; + } + c <<= 4; + if( c2 >= '0' && c2 <= '9' ) + c |= c2 - '0'; + else if( c2 >= 'a' && c2 <= 'f' ) + c |= c2 - 'a' + 10; + else if( c2 >= 'A' && c2 <= 'F' ) + c |= c2 - 'A' + 10; + else { + mpi_clear(val); + return 1; + } + a <<= 8; + a |= c; + } + i = 0; + val->d[j-1] = a; + } + + return 0; +} + + +/**************** + * print an MPI to the give stream and return the number of characters + * printed. + */ +int +mpi_print( FILE *fp, MPI a, int mode ) +{ + int i, n=0; + + if( a == MPI_NULL ) + return fprintf(fp, "[MPI_NULL]"); + if( !mode ) + n += fprintf(fp, "[%d bits]", a->nlimbs * BITS_PER_MPI_LIMB ); + else { + if( a->sign ) + putc('-', fp); + for(i=a->nlimbs; i > 0 ; i-- ) { + n += fprintf(fp, i!=a->nlimbs? "%0" STR2(BYTES_PER_MPI_LIMB2) + "lX":"%lX", (unsigned long)a->d[i-1] ); + } + if( !a->nlimbs ) + putc('0', fp ); + } + return n; +} + + +/**************** + * Special function to get the low 8 bytes from a mpi, + * this can be used as a keyid, KEYID is an 2 element array. + * Does return the low 4 bytes. + */ +u32 +mpi_get_keyid( MPI a, u32 *keyid ) +{ +#if BYTES_PER_MPI_LIMB != 4 + #error Make this function work with other LIMB sizes +#endif + if( keyid ) { + keyid[0] = a->nlimbs >= 2? a->d[1] : 0; + keyid[1] = a->nlimbs >= 1? a->d[0] : 0; + } + return a->nlimbs >= 1? a->d[0] : 0; +} + + diff --git a/mpi/mpih-add.c b/mpi/mpih-add.c new file mode 100644 index 00000000..90ce8d76 --- /dev/null +++ b/mpi/mpih-add.c @@ -0,0 +1,109 @@ +/* mpihelp-add.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" + +/**************** + * Add to S1_PTR with size S1_SIZE the limb S2_LIMB and + * store the result in RES_PTR. Return the carry + * S1_SIZE must be > 0. + */ +/*_EXTERN_INLINE */ +mpi_limb_t +mpihelp_add_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb += x; + *res_ptr++ = s2_limb; + if( s2_limb < x ) { /* sum is less than the left operand: handle carry */ + while( --s1_size ) { + x = *s1_ptr++ + 1; /* add carry */ + *res_ptr++ = x; /* and store */ + if( x ) /* not 0 (no overflow): we can stop */ + goto leave; + } + return 1; /* return carry (size of s1 to small) */ + } + + leave: + if( res_ptr != s1_ptr ) { /* not the same variable */ + mpi_size_t i; /* copy the rest */ + for( i=0; i < s1_size-1; i++ ) + res_ptr[i] = s1_ptr[i]; + } + return 0; /* no carry */ +} + + +/* FIXME: this should be done in assembly */ +mpi_limb_t +mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to one addend */ + cy = y < cy? 1:0; /* get out carry from that addition */ + y += x; /* add other addend */ + cy += y < x? 1:0; /* get out carry from that add, combine */ + res_ptr[j] = y; + } while( ++j ); + + return cy; +} + + +/*_EXTERN_INLINE*/ +mpi_limb_t +mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if( s2_size ) + cy = mpihelp_add_n( res_ptr, s1_ptr, s2_ptr, s2_size ); + + if( s1_size - s2_size ) + cy = mpihelp_add_1( res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + diff --git a/mpi/mpih-cmp.c b/mpi/mpih-cmp.c new file mode 100644 index 00000000..821c0ce8 --- /dev/null +++ b/mpi/mpih-cmp.c @@ -0,0 +1,53 @@ +/* mpihelp-sub.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" + +/**************** + * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. + * There are no restrictions on the relative sizes of + * the two arguments. + * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. + */ +int +mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size ) +{ + mpi_size_t i; + mpi_limb_t op1_word, op2_word; + + for( i = size - 1; i >= 0 ; i--) { + op1_word = op1_ptr[i]; + op2_word = op2_ptr[i]; + if( op1_word != op2_word ) + goto diff; + } + return 0; + + diff: + /* This can *not* be simplified to + * op2_word - op2_word + * since that expression might give signed overflow. */ + return (op1_word > op2_word) ? 1 : -1; +} + diff --git a/mpi/mpih-div.c b/mpi/mpih-div.c new file mode 100644 index 00000000..ca939a75 --- /dev/null +++ b/mpi/mpih-div.c @@ -0,0 +1,528 @@ +/* mpihelp-div.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +#ifndef UMUL_TIME + #define UMUL_TIME 1 +#endif +#ifndef UDIV_TIME + #define UDIV_TIME UMUL_TIME +#endif + +/* FIXME: We should be using invert_limb (or invert_normalized_limb) + * here (not udiv_qrnnd). + */ + +mpi_limb_t +mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + int dummy; + + /* Botch: Should this be handled at all? Rely on callers? */ + if( !dividend_size ) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if( UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { + int normalization_steps; + + count_leading_zeros( normalization_steps, divisor_limb ); + if( normalization_steps ) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV(dummy, r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + * + * Special case for DIVISOR_LIMB == 100...000. + */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if( r >= divisor_limb ) + r = 0; + else + i--; + + for( ; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV(dummy, r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else { + if( UDIV_NEEDS_NORMALIZATION ) { + int normalization_steps; + + count_leading_zeros(normalization_steps, divisor_limb); + if( normalization_steps ) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for(i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (dummy, r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if(r >= divisor_limb) + r = 0; + else + i--; + + for(; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (dummy, r, r, n0, divisor_limb); + } + return r; + } +} + +/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write + * the NSIZE-DSIZE least significant quotient limbs at QP + * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is + * non-zero, generate that many fraction bits and append them after the + * other quotient limbs. + * Return the most significant limb of the quotient, this is always 0 or 1. + * + * Preconditions: + * 0. NSIZE >= DSIZE. + * 1. The most significant bit of the divisor must be set. + * 2. QP must either not overlap with the input operands at all, or + * QP + DSIZE >= NP must hold true. (This means that it's + * possible to put the quotient in the high part of NUM, right after the + * remainder in NUM. + * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero. + */ + +mpi_limb_t +mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs, + mpi_ptr_t np, mpi_size_t nsize, + mpi_ptr_t dp, mpi_size_t dsize) +{ + mpi_limb_t most_significant_q_limb = 0; + + switch(dsize) { + case 0: + /* We are asked to divide by zero, so go ahead and do it! (To make + the compiler not remove this statement, return the value.) */ + return 1 / dsize; + + case 1: + { + mpi_size_t i; + mpi_limb_t n1; + mpi_limb_t d; + + d = dp[0]; + n1 = np[nsize - 1]; + + if( n1 >= d ) { + n1 -= d; + most_significant_q_limb = 1; + } + + qp += qextra_limbs; + for( i = nsize - 2; i >= 0; i--) + udiv_qrnnd( qp[i], n1, n1, np[i], d ); + qp -= qextra_limbs; + + for( i = qextra_limbs - 1; i >= 0; i-- ) + udiv_qrnnd (qp[i], n1, n1, 0, d); + + np[0] = n1; + } + break; + + case 2: + { + mpi_size_t i; + mpi_limb_t n1, n0, n2; + mpi_limb_t d1, d0; + + np += nsize - 2; + d1 = dp[1]; + d0 = dp[0]; + n1 = np[1]; + n0 = np[0]; + + if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) { + sub_ddmmss (n1, n0, n1, n0, d1, d0); + most_significant_q_limb = 1; + } + + for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) { + mpi_limb_t q; + mpi_limb_t r; + + if( i >= qextra_limbs ) + np--; + else + np[0] = 0; + + if( n1 == d1 ) { + /* Q should be either 111..111 or 111..110. Need special + * treatment of this rare case as normal division would + * give overflow. */ + q = ~(mpi_limb_t)0; + + r = n0 + d1; + if( r < d1 ) { /* Carry in the addition? */ + add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 ); + qp[i] = q; + continue; + } + n1 = d0 - (d0 != 0?1:0); + n0 = -d0; + } + else { + udiv_qrnnd (q, r, n1, n0, d1); + umul_ppmm (n1, n0, d0, q); + } + + n2 = np[0]; + q_test: + if( n1 > r || (n1 == r && n0 > n2) ) { + /* The estimated Q was too large. */ + q--; + sub_ddmmss (n1, n0, n1, n0, 0, d0); + r += d1; + if( r >= d1 ) /* If not carry, test Q again. */ + goto q_test; + } + + qp[i] = q; + sub_ddmmss (n1, n0, r, n2, n1, n0); + } + np[1] = n1; + np[0] = n0; + } + break; + + default: + { + mpi_size_t i; + mpi_limb_t dX, d1, n0; + + np += nsize - dsize; + dX = dp[dsize - 1]; + d1 = dp[dsize - 2]; + n0 = np[dsize - 1]; + + if( n0 >= dX ) { + if(n0 > dX || mpihelp_cmp(np, dp, dsize - 1) >= 0 ) { + mpihelp_sub_n(np, np, dp, dsize); + n0 = np[dsize - 1]; + most_significant_q_limb = 1; + } + } + + for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) { + mpi_limb_t q; + mpi_limb_t n1, n2; + mpi_limb_t cy_limb; + + if( i >= qextra_limbs ) { + np--; + n2 = np[dsize]; + } + else { + n2 = np[dsize - 1]; + MPN_COPY_DECR (np + 1, np, dsize); + np[0] = 0; + } + + if( n0 == dX ) { + /* This might over-estimate q, but it's probably not worth + * the extra code here to find out. */ + q = ~(mpi_limb_t)0; + } + else { + mpi_limb_t r; + + udiv_qrnnd(q, r, n0, np[dsize - 1], dX); + umul_ppmm(n1, n0, d1, q); + + while( n1 > r || (n1 == r && n0 > np[dsize - 2])) { + q--; + r += dX; + if( r < dX ) /* I.e. "carry in previous addition?" */ + break; + n1 -= n0 < d1; + n0 -= d1; + } + } + + /* Possible optimization: We already have (q * n0) and (1 * n1) + * after the calculation of q. Taking advantage of that, we + * could make this loop make two iterations less. */ + cy_limb = mpihelp_submul_1(np, dp, dsize, q); + + if( n2 != cy_limb ) { + mpihelp_add_n(np, np, dp, dsize); + q--; + } + + qp[i] = q; + n0 = np[dsize - 1]; + } + } + } + + return most_significant_q_limb; +} + + +/**************** + * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB. + * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR. + * Return the single-limb remainder. + * There are no constraints on the value of the divisor. + * + * QUOT_PTR and DIVIDEND_PTR might point to the same limb. + */ + +mpi_limb_t +mpihelp_divmod_1( mpi_ptr_t quot_ptr, + mpi_ptr_t dividend_ptr, mpi_size_t dividend_size, + mpi_limb_t divisor_limb) +{ + mpi_size_t i; + mpi_limb_t n1, n0, r; + int dummy; + + if( !dividend_size ) + return 0; + + /* If multiplication is much faster than division, and the + * dividend is large, pre-invert the divisor, and use + * only multiplications in the inner loop. + * + * This test should be read: + * Does it ever help to use udiv_qrnnd_preinv? + * && Does what we save compensate for the inversion overhead? + */ + if( UDIV_TIME > (2 * UMUL_TIME + 6) + && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) { + int normalization_steps; + + count_leading_zeros( normalization_steps, divisor_limb ); + if( normalization_steps ) { + mpi_limb_t divisor_limb_inverted; + + divisor_limb <<= normalization_steps; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t)0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb, divisor_limb_inverted); + n1 = n0; + } + UDIV_QRNND_PREINV( quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb, divisor_limb_inverted); + return r >> normalization_steps; + } + else { + mpi_limb_t divisor_limb_inverted; + + /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The + * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the + * most significant bit (with weight 2**N) implicit. + */ + /* Special case for DIVISOR_LIMB == 100...000. */ + if( !(divisor_limb << 1) ) + divisor_limb_inverted = ~(mpi_limb_t) 0; + else + udiv_qrnnd(divisor_limb_inverted, dummy, + -divisor_limb, 0, divisor_limb); + + i = dividend_size - 1; + r = dividend_ptr[i]; + + if( r >= divisor_limb ) + r = 0; + else + quot_ptr[i--] = 0; + + for( ; i >= 0; i-- ) { + n0 = dividend_ptr[i]; + UDIV_QRNND_PREINV( quot_ptr[i], r, r, + n0, divisor_limb, divisor_limb_inverted); + } + return r; + } + } + else { + if(UDIV_NEEDS_NORMALIZATION) { + int normalization_steps; + + count_leading_zeros (normalization_steps, divisor_limb); + if( normalization_steps ) { + divisor_limb <<= normalization_steps; + + n1 = dividend_ptr[dividend_size - 1]; + r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps); + + /* Possible optimization: + * if (r == 0 + * && divisor_limb > ((n1 << normalization_steps) + * | (dividend_ptr[dividend_size - 2] >> ...))) + * ...one division less... + */ + for( i = dividend_size - 2; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd (quot_ptr[i + 1], r, r, + ((n1 << normalization_steps) + | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))), + divisor_limb); + n1 = n0; + } + udiv_qrnnd (quot_ptr[0], r, r, + n1 << normalization_steps, + divisor_limb); + return r >> normalization_steps; + } + } + /* No normalization needed, either because udiv_qrnnd doesn't require + * it, or because DIVISOR_LIMB is already normalized. */ + i = dividend_size - 1; + r = dividend_ptr[i]; + + if(r >= divisor_limb) + r = 0; + else + quot_ptr[i--] = 0; + + for(; i >= 0; i--) { + n0 = dividend_ptr[i]; + udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb ); + } + return r; + } +} + + diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c new file mode 100644 index 00000000..c579a93f --- /dev/null +++ b/mpi/mpih-mul.c @@ -0,0 +1,557 @@ +/* mpihelp-mul.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" +#include "longlong.h" + +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ +#ifndef KARATSUBA_THRESHOLD + #define KARATSUBA_THRESHOLD 32 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 + #undef KARATSUBA_THRESHOLD + #define KARATSUBA_THRESHOLD 2 +#endif + + +#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ + do { \ + if( (size) < KARATSUBA_THRESHOLD ) \ + mul_n_basecase (prodp, up, vp, size); \ + else \ + mul_n (prodp, up, vp, size, tspace); \ + } while (0); + +#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ + do { \ + if ((size) < KARATSUBA_THRESHOLD) \ + sqr_n_basecase (prodp, up, size); \ + else \ + sqr_n (prodp, up, size, tspace); \ + } while (0); + + + +mpi_limb_t +mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb ); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + + x = res_ptr[j]; + prod_low = x + prod_low; + cy_limb += prod_low < x?1:0; + res_ptr[j] = prod_low; + } while ( ++j ); + return cy_limb; +} + + +mpi_limb_t +mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + mpi_limb_t x; + + /* The loop counter and index J goes from -SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + res_ptr -= j; + s1_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb); + + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + + x = res_ptr[j]; + prod_low = x - prod_low; + cy_limb += prod_low > x?1:0; + res_ptr[j] = prod_low; + } while( ++j ); + + return cy_limb; +} + +mpi_limb_t +mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_limb_t s2_limb) +{ + mpi_limb_t cy_limb; + mpi_size_t j; + mpi_limb_t prod_high, prod_low; + + /* The loop counter and index J goes from -S1_SIZE to -1. This way + * the loop becomes faster. */ + j = -s1_size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + res_ptr -= j; + + cy_limb = 0; + do { + umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb ); + prod_low += cy_limb; + cy_limb = (prod_low < cy_limb?1:0) + prod_high; + res_ptr[j] = prod_low; + } while( ++j ); + + return cy_limb; +} + + +/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), + * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are + * always stored. Return the most significant limb. + * + * Argument constraints: + * 1. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + * + * + * Handle simple cases with traditional multiplication. + * + * This is the most critical code of multiplication. All multiplies rely + * on this, both small and huge. Small ones arrive here immediately. Huge + * ones arrive here as this is the base case for Karatsuba's recursive + * algorithm below. + */ + +static mpi_limb_t +mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, + mpi_ptr_t vp, mpi_size_t size) +{ + mpi_size_t i; + mpi_limb_t cy; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, size ); + else + MPN_ZERO( prodp, size ); + cy = 0; + } + else + cy = mpihelp_mul_1( prodp, up, size, v_limb ); + + prodp[size] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i = 1; i < size; i++ ) { + v_limb = vp[i]; + if( v_limb <= 1 ) { + cy = 0; + if( v_limb == 1 ) + cy = mpihelp_add_n(prodp, prodp, up, size); + } + else + cy = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy; + prodp++; + } + + return cy; +} + + +static void +mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, + mpi_size_t size, mpi_ptr_t tspace ) +{ + if( size & 1 ) { + /* The size is odd, the code code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace ); + cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] ); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] ); + prodp[esize + size] = cy_limb; + } + else { + /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. + * + * Split U in two pieces, U1 and U0, such that + * U = U0 + U1*(B**n), + * and V in V1 and V0, such that + * V = V0 + V1*(B**n). + * + * UV is then computed recursively using the identity + * + * 2n n n n + * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V + * 1 1 1 0 0 1 0 0 + * + * Where B = 2**BITS_PER_MP_LIMB. + */ + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + int negflg; + + /* Product H. ________________ ________________ + * |_____U1 x V1____||____U0 x V0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(V0-V1)_| + */ + if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) { + mpihelp_sub_n(prodp, up + hsize, up, hsize); + negflg = 0; + } + else { + mpihelp_sub_n(prodp, up, up + hsize, hsize); + negflg = 1; + } + if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) { + mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); + negflg ^= 1; + } + else { + mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); + /* No change of NEGFLG. */ + } + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size); + + /* Add/copy product H. */ + MPN_COPY (prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n( prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number) */ + if(negflg) + cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); + else + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x V0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. + */ + MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); + + /* Add/copy Product L (twice) */ + + cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); + if( cy ) + mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if( cy ) + mpihelp_add_1(prodp + size, prodp + size, size, 1); + } +} + + +static void +sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) +{ + mpi_size_t i; + mpi_limb_t cy_limb; + mpi_limb_t v_limb; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = up[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, size ); + else + MPN_ZERO(prodp, size); + cy_limb = 0; + } + else + cy_limb = mpihelp_mul_1( prodp, up, size, v_limb ); + + prodp[size] = cy_limb; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i=1; i < size; i++) { + v_limb = up[i]; + if( v_limb <= 1 ) { + cy_limb = 0; + if( v_limb == 1 ) + cy_limb = mpihelp_add_n(prodp, prodp, up, size); + } + else + cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); + + prodp[size] = cy_limb; + prodp++; + } +} + + +static void +sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +{ + if( size & 1 ) { + /* The size is odd, the code code below doesn't handle that. + * Multiply the least significant (size - 1) limbs with a recursive + * call, and handle the most significant limb of S1 and S2 + * separately. + * A slightly faster way to do this would be to make the Karatsuba + * code below behave as if the size were even, and let it check for + * odd size in the end. I.e., in essence move this code to the end. + * Doing so would save us a recursive call, and potentially make the + * stack grow a lot less. + */ + mpi_size_t esize = size - 1; /* even size */ + mpi_limb_t cy_limb; + + MPN_SQR_N_RECURSE( prodp, up, esize, tspace ); + cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] ); + prodp[esize + esize] = cy_limb; + cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] ); + + prodp[esize + size] = cy_limb; + } + else { + mpi_size_t hsize = size >> 1; + mpi_limb_t cy; + + /* Product H. ________________ ________________ + * |_____U1 x U1____||____U0 x U0_____| + * Put result in upper part of PROD and pass low part of TSPACE + * as new TSPACE. + */ + MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); + + /* Product M. ________________ + * |_(U1-U0)(U0-U1)_| + */ + if( mpihelp_cmp( up + hsize, up, hsize) >= 0 ) + mpihelp_sub_n( prodp, up + hsize, up, hsize); + else + mpihelp_sub_n (prodp, up, up + hsize, hsize); + + /* Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); + + /* Add/copy product H */ + MPN_COPY(prodp + hsize, prodp + size, hsize); + cy = mpihelp_add_n(prodp + size, prodp + size, + prodp + size + hsize, hsize); + + /* Add product M (if NEGFLG M is a negative number). */ + cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size); + + /* Product L. ________________ ________________ + * |________________||____U0 x U0_____| + * Read temporary operands from low part of PROD. + * Put result in low part of TSPACE using upper part of TSPACE + * as new TSPACE. */ + MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size); + + /* Add/copy Product L (twice). */ + cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size); + if( cy ) + mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, + hsize, cy); + + MPN_COPY(prodp, tspace, hsize); + cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize); + if( cy ) + mpihelp_add_1 (prodp + size, prodp + size, size, 1); + } +} + + +/* This should be made into an inline function in gmp.h. */ +void +mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) +{ + if( up == vp ) { + if( size < KARATSUBA_THRESHOLD ) + sqr_n_basecase( prodp, up, size ); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space( 2 * size ); + sqr_n( prodp, up, size, tspace ); + mpi_free_limb_space( tspace ); + } + } + else { + if( size < KARATSUBA_THRESHOLD ) + mul_n_basecase( prodp, up, vp, size ); + else { + mpi_ptr_t tspace; + tspace = mpi_alloc_limb_space( 2 * size ); + mul_n (prodp, up, vp, size, tspace); + mpi_free_limb_space( tspace ); + } + } +} + + +/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) + * and v (pointed to by VP, with VSIZE limbs), and store the result at + * PRODP. USIZE + VSIZE limbs are always stored, but if the input + * operands are normalized. Return the most significant limb of the + * result. + * + * NOTE: The space pointed to by PRODP is overwritten before finished + * with U and V, so overlap is an error. + * + * Argument constraints: + * 1. USIZE >= VSIZE. + * 2. PRODP != UP and PRODP != VP, i.e. the destination + * must be distinct from the multiplier and the multiplicand. + */ + +mpi_limb_t +mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, + mpi_ptr_t vp, mpi_size_t vsize) +{ + mpi_ptr_t prod_endp = prodp + usize + vsize - 1; + mpi_limb_t cy; + mpi_ptr_t tspace; + + if( vsize < KARATSUBA_THRESHOLD ) { + mpi_size_t i; + mpi_limb_t v_limb; + + if( !vsize ) + return 0; + + /* Multiply by the first limb in V separately, as the result can be + * stored (not added) to PROD. We also avoid a loop for zeroing. */ + v_limb = vp[0]; + if( v_limb <= 1 ) { + if( v_limb == 1 ) + MPN_COPY( prodp, up, usize ); + else + MPN_ZERO( prodp, usize ); + cy = 0; + } + else + cy = mpihelp_mul_1( prodp, up, usize, v_limb ); + + prodp[usize] = cy; + prodp++; + + /* For each iteration in the outer loop, multiply one limb from + * U with one limb from V, and add it to PROD. */ + for( i = 1; i < vsize; i++ ) { + v_limb = vp[i]; + if( v_limb <= 1 ) { + cy = 0; + if( v_limb == 1 ) + cy = mpihelp_add_n(prodp, prodp, up, usize); + } + else + cy = mpihelp_addmul_1(prodp, up, usize, v_limb); + + prodp[usize] = cy; + prodp++; + } + + return cy; + } + + tspace = mpi_alloc_limb_space( 2 * vsize ); + MPN_MUL_N_RECURSE( prodp, up, vp, vsize, tspace ); + + prodp += vsize; + up += vsize; + usize -= vsize; + if( usize >= vsize ) { + mpi_ptr_t tp = mpi_alloc_limb_space( 2 * vsize ); + do { + MPN_MUL_N_RECURSE( tp, up, vp, vsize, tspace ); + cy = mpihelp_add_n( prodp, prodp, tp, vsize ); + mpihelp_add_1( prodp + vsize, tp + vsize, vsize, cy ); + prodp += vsize; + up += vsize; + usize -= vsize; + } while( usize >= vsize ); + mpi_free_limb_space( tp ); + } + + if( usize ) { + mpihelp_mul( tspace, vp, vsize, up, usize ); + cy = mpihelp_add_n( prodp, prodp, tspace, vsize); + mpihelp_add_1( prodp + vsize, tspace + vsize, usize, cy ); + } + + mpi_free_limb_space( tspace ); + return *prod_endp; +} + + diff --git a/mpi/mpih-shift.c b/mpi/mpih-shift.c new file mode 100644 index 00000000..a8fe26aa --- /dev/null +++ b/mpi/mpih-shift.c @@ -0,0 +1,94 @@ +/* mpihelp-shift.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include "mpi-internal.h" + +/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left + * and store the USIZE least significant digits of the result at WP. + * Return the bits shifted out from the most significant digit. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be >= UP. + */ + +mpi_limb_t +mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, + unsigned int cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp += 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + i = usize - 1; + low_limb = up[i]; + retval = low_limb >> sh_2; + high_limb = low_limb; + while( --i >= 0 ) { + low_limb = up[i]; + wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); + high_limb = low_limb; + } + wp[i] = high_limb << sh_1; + + return retval; +} + + +/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right + * and store the USIZE least significant limbs of the result at WP. + * The bits shifted out to the right are returned. + * + * Argument constraints: + * 1. 0 < CNT < BITS_PER_MP_LIMB + * 2. If the result is to be written over the input, WP must be <= UP. + */ + +mpi_limb_t +mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt) +{ + mpi_limb_t high_limb, low_limb; + unsigned sh_1, sh_2; + mpi_size_t i; + mpi_limb_t retval; + + sh_1 = cnt; + wp -= 1; + sh_2 = BITS_PER_MPI_LIMB - sh_1; + high_limb = up[0]; + retval = high_limb << sh_2; + low_limb = high_limb; + for( i=1; i < usize; i++) { + high_limb = up[i]; + wp[i] = (low_limb >> sh_1) | (high_limb << sh_2); + low_limb = high_limb; + } + wp[i] = low_limb >> sh_1; + + return retval; +} + diff --git a/mpi/mpih-sub.c b/mpi/mpih-sub.c new file mode 100644 index 00000000..3831d81c --- /dev/null +++ b/mpi/mpih-sub.c @@ -0,0 +1,106 @@ +/* mpihelp-sub.c - MPI helper functions + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> + +#include "mpi-internal.h" + + +/*_EXTERN_INLINE*/ +mpi_limb_t +mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_size_t s1_size, mpi_limb_t s2_limb ) +{ + mpi_limb_t x; + + x = *s1_ptr++; + s2_limb = x - s2_limb; + *res_ptr++ = s2_limb; + if( s2_limb > x ) { + while( --s1_size ) { + x = *s1_ptr++; + *res_ptr++ = x - 1; + if( x ) + goto leave; + } + return 1; + } + + leave: + if( res_ptr != s1_ptr ) { + mpi_size_t i; + for( i=0; i < s1_size-1; i++ ) + res_ptr[i] = s1_ptr[i]; + } + return 0; +} + + +/* FIXME: this should be done in assembly */ +mpi_limb_t +mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, + mpi_ptr_t s2_ptr, mpi_size_t size) +{ + mpi_limb_t x, y, cy; + mpi_size_t j; + + /* The loop counter and index J goes from -SIZE to -1. This way + the loop becomes faster. */ + j = -size; + + /* Offset the base pointers to compensate for the negative indices. */ + s1_ptr -= j; + s2_ptr -= j; + res_ptr -= j; + + cy = 0; + do { + y = s2_ptr[j]; + x = s1_ptr[j]; + y += cy; /* add previous carry to subtrahend */ + cy = y < cy ? 1:0; /* get out carry from that addition */ + y = x - y; /* main subtract */ + cy += y > x? 1:0; /* get out carry from the subtract, combine */ + res_ptr[j] = y; + } while( ++j ); + + return cy; +} + + +/*_EXTERN_INLINE*/ +mpi_limb_t +mpihelp_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, + mpi_ptr_t s2_ptr, mpi_size_t s2_size) +{ + mpi_limb_t cy = 0; + + if( s2_size ) + cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size); + + if( s1_size - s2_size ) + cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size, + s1_size - s2_size, cy); + return cy; +} + + diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c new file mode 100644 index 00000000..752ce7f8 --- /dev/null +++ b/mpi/mpiutil.c @@ -0,0 +1,326 @@ +/* mpiutil.c - Utility functions for MPI + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "mpi.h" +#include "mpi-internal.h" +#include "memory.h" +#include "util.h" + + +#ifdef M_DEBUG + #undef mpi_alloc + #undef mpi_alloc_secure + #undef mpi_free +#endif + +typedef struct unused_obj { + struct unused_obj *next; + unsigned length; + union { + MPI mpi; + mpi_limb_t *limb; + } u; +} *unused_obj_t; + +static unused_obj_t unused_objs; +static unused_obj_t unused_mpis; +static unused_obj_t unused_limbs; + + +MPI +#ifdef M_DEBUG +mpi_debug_alloc( unsigned nlimbs, const char *info ) +#else +mpi_alloc( unsigned nlimbs ) +#endif +{ + MPI a; + + if( unused_mpis ) { + unused_obj_t u; + + if( DBG_MEMORY ) + log_debug("mpi_alloc(%lu) reusing\n", nlimbs*BITS_PER_MPI_LIMB ); + a = unused_mpis->u.mpi; + u = unused_mpis; + unused_mpis = unused_mpis->next; + u->next = unused_objs; + unused_objs = u; + } + else { + if( DBG_MEMORY ) + log_debug("mpi_alloc(%lu) new\n", nlimbs*BITS_PER_MPI_LIMB ); + #ifdef M_DEBUG + a = m_debug_alloc( sizeof *a, info ); + #else + a = m_alloc( sizeof *a ); + #endif + } + #ifdef M_DEBUG + a->d = mpi_debug_alloc_limb_space( nlimbs, info ); + #else + a->d = mpi_alloc_limb_space( nlimbs ); + #endif + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + return a; +} + +void +mpi_m_check( MPI a ) +{ + m_check(a); + m_check(a->d); +} + +MPI +#ifdef M_DEBUG +mpi_debug_alloc_secure( unsigned nlimbs, const char *info ) +#else +mpi_alloc_secure( unsigned nlimbs ) +#endif +{ + MPI a; + + a = m_alloc( sizeof *a ); + #ifdef M_DEBUG + a->d = m_debug_alloc_secure( nlimbs * sizeof(mpi_limb_t), info ); + #else + a->d = m_alloc_secure( nlimbs * sizeof(mpi_limb_t) ); + #endif + a->alloced = nlimbs; + a->nlimbs = 0; + a->sign = 0; + return a; +} + + +mpi_ptr_t +#ifdef M_DEBUG +mpi_debug_alloc_limb_space( unsigned nlimbs, const char *info ) +#else +mpi_alloc_limb_space( unsigned nlimbs ) +#endif +{ + unused_obj_t u; + size_t len = nlimbs * sizeof(mpi_limb_t); + + for(u=unused_limbs; u; u = u->next ) + if( u->length >= len ) { + u->length = 0; + if( DBG_MEMORY ) + log_debug("mpi_alloc_limb_space(%lu) reusing\n", len*8 ); + return u->u.limb; + } + if( DBG_MEMORY ) + log_debug("mpi_alloc_limb_space(%u) new\n", len*8 ); + #ifdef M_DEBUG + return m_debug_alloc( len, info ); + #else + return m_alloc( len ); + #endif +} + +void +#ifdef M_DEBUG +mpi_debug_free_limb_space( mpi_ptr_t a, const char *info ) +#else +mpi_free_limb_space( mpi_ptr_t a ) +#endif +{ + unused_obj_t u; + + if( !a ) + return; + if( DBG_MEMORY ) + log_debug("mpi_free_limb_space of size %lu\n", (ulong)m_size(a)*8 ); + for(u=unused_limbs; u; u = u->next ) + if( !u->length ) { + u->length = m_size(a); + u->u.limb = a; + return; + } + + if( (u=unused_objs) ) + unused_objs = unused_objs->next; + else + u = m_alloc( sizeof *u ); + u->length = m_size(a); + u->u.limb = a; + u->next = unused_limbs; + unused_limbs = u; +} + + +void +mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs ) +{ + mpi_free_limb_space(a->d); + a->d = ap; + a->alloced = nlimbs; +} + + + +/**************** + * Resize the array of A to NLIMBS. the additional space is cleared + * (set to 0) [done by m_realloc()] + */ +void +#ifdef M_DEBUG +mpi_debug_resize( MPI a, unsigned nlimbs, const char *info ) +#else +mpi_resize( MPI a, unsigned nlimbs ) +#endif +{ + if( nlimbs <= a->alloced ) + return; /* no need to do it */ + #ifdef M_DEBUG + if( a->d ) + a->d = m_debug_realloc(a->d, nlimbs * sizeof(mpi_limb_t), info ); + else + a->d = m_debug_alloc_clear( nlimbs * sizeof(mpi_limb_t), info ); + #else + if( a->d ) + a->d = m_realloc(a->d, nlimbs * sizeof(mpi_limb_t) ); + else + a->d = m_alloc_clear( nlimbs * sizeof(mpi_limb_t) ); + #endif + a->alloced = nlimbs; +} + +void +mpi_clear( MPI a ) +{ + a->nlimbs = 0; +} + + +void +#ifdef M_DEBUG +mpi_debug_free( MPI a, const char *info ) +#else +mpi_free( MPI a ) +#endif +{ + unused_obj_t u; + + if( !a ) + return; + if( DBG_MEMORY ) + log_debug("mpi_free\n" ); + #ifdef M_DEBUG + mpi_debug_free_limb_space(a->d, info); + #else + mpi_free_limb_space(a->d); + #endif + + if( (u=unused_objs) ) + unused_objs = unused_objs->next; + else + u = m_alloc( sizeof *u ); + u->u.mpi = a; + u->next = unused_mpis; + unused_mpis = u; +} + + +MPI +#ifdef M_DEBUG +mpi_debug_copy( MPI a, const char *info ) +#else +mpi_copy( MPI a ) +#endif +{ + int i; + MPI b; + + if( a ) { + #ifdef M_DEBUG + b = mpi_debug_alloc( a->nlimbs, info ); + #else + b = mpi_alloc( a->nlimbs ); + #endif + b->nlimbs = a->nlimbs; + for(i=0; i < b->nlimbs; i++ ) + b->d[i] = a->d[i]; + } + else + b = NULL; + return b; +} + + +void +mpi_set( MPI w, MPI u) +{ + mpi_ptr_t wp, up; + mpi_size_t usize = u->nlimbs; + int usign = u->sign; + + RESIZE_IF_NEEDED(w, usize); + wp = w->d; + up = u->d; + MPN_COPY( wp, up, usize ); + w->nlimbs = usize; + w->sign = usign; +} + + +void +mpi_set_ui( MPI w, unsigned long u) +{ + RESIZE_IF_NEEDED(w, 1); + w->d[0] = u; + w->nlimbs = u? 1:0; + w->sign = 0; +} + + +MPI +mpi_alloc_set_ui( unsigned long u) +{ + #ifdef M_DEBUG + MPI w = mpi_debug_alloc(1,"alloc_set_ui"); + #else + MPI w = mpi_alloc(1); + #endif + w->d[0] = u; + w->nlimbs = u? 1:0; + w->sign = 0; + return w; +} + + +void +mpi_swap( MPI a, MPI b) +{ + MPI x; + + x = a; a = b; b = x; +} + + |