summaryrefslogtreecommitdiff
path: root/mpi
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>1997-11-18 14:05:56 +0000
committerWerner Koch <wk@gnupg.org>1997-11-18 14:05:56 +0000
commit4b5e71ca4e84e61e595dec19e1c7cab0c0a73f24 (patch)
treecfa374507b08344f49f28814c67f058723a485d4 /mpi
parent539284b719a82a79e3d3bba68de85581ea8f77f1 (diff)
downloadlibgcrypt-4b5e71ca4e84e61e595dec19e1c7cab0c0a73f24.tar.gz
initially checkin
Diffstat (limited to 'mpi')
-rw-r--r--mpi/Makefile.am27
-rw-r--r--mpi/Makefile.in271
-rw-r--r--mpi/longlong.h1398
-rw-r--r--mpi/mpi-add.c221
-rw-r--r--mpi/mpi-bit.c133
-rw-r--r--mpi/mpi-cmp.c72
-rw-r--r--mpi/mpi-div.c282
-rw-r--r--mpi/mpi-gcd.c54
-rw-r--r--mpi/mpi-internal.h198
-rw-r--r--mpi/mpi-inv.c127
-rw-r--r--mpi/mpi-mul.c178
-rw-r--r--mpi/mpi-pow.c247
-rw-r--r--mpi/mpi-scan.c88
-rw-r--r--mpi/mpicoder.c392
-rw-r--r--mpi/mpih-add.c109
-rw-r--r--mpi/mpih-cmp.c53
-rw-r--r--mpi/mpih-div.c528
-rw-r--r--mpi/mpih-mul.c557
-rw-r--r--mpi/mpih-shift.c94
-rw-r--r--mpi/mpih-sub.c106
-rw-r--r--mpi/mpiutil.c326
21 files changed, 5461 insertions, 0 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
new file mode 100644
index 00000000..5edd90c2
--- /dev/null
+++ b/mpi/Makefile.am
@@ -0,0 +1,27 @@
+## Process this file with automake to produce Makefile.in
+
+INCLUDES = -I$(top_srcdir)/include
+
+noinst_LIBRARIES = mpi
+
+
+mpi_SOURCES = longlong.h \
+ mpi-add.c \
+ mpi-bit.c \
+ mpi-cmp.c \
+ mpi-div.c \
+ mpi-gcd.c \
+ mpi-internal.h \
+ mpi-inv.c \
+ mpi-mul.c \
+ mpi-pow.c \
+ mpi-scan.c \
+ mpicoder.c \
+ mpihelp-add.c \
+ mpihelp-cmp.c \
+ mpihelp-div.c \
+ mpihelp-mul.c \
+ mpihelp-shift.c \
+ mpihelp-sub.c \
+ mpiutil.c
+
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
new file mode 100644
index 00000000..4f493a88
--- /dev/null
+++ b/mpi/Makefile.in
@@ -0,0 +1,271 @@
+# Makefile.in generated automatically by automake 1.0 from Makefile.am
+
+# Copyright (C) 1994, 1995, 1996 Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+
+
+SHELL = /bin/sh
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+
+top_builddir = ..
+
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+transform = @program_transform_name@
+
+INCLUDES = -I$(top_srcdir)/include
+
+noinst_LIBRARIES = mpi
+
+mpi_SOURCES = longlong.h \
+ mpi-add.c \
+ mpi-bit.c \
+ mpi-cmp.c \
+ mpi-div.c \
+ mpi-gcd.c \
+ mpi-internal.h \
+ mpi-inv.c \
+ mpi-mul.c \
+ mpi-pow.c \
+ mpi-scan.c \
+ mpicoder.c \
+ mpihelp-add.c \
+ mpihelp-cmp.c \
+ mpihelp-div.c \
+ mpihelp-mul.c \
+ mpihelp-shift.c \
+ mpihelp-sub.c \
+ mpiutil.c
+mkinstalldirs = $(top_srcdir)/scripts/mkinstalldirs
+CONFIG_HEADER = ../config.h
+LIBRARIES = $(noinst_LIBRARIES)
+
+noinst_LIBFILES = libmpi.a
+
+CC = @CC@
+LEX = @LEX@
+YACC = @YACC@
+
+DEFS = @DEFS@ -I. -I$(srcdir) -I..
+CPPFLAGS = @CPPFLAGS@
+CFLAGS = @CFLAGS@
+LDFLAGS = @LDFLAGS@
+LIBS = @LIBS@
+
+COMPILE = $(CC) -c $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
+LINK = $(CC) $(LDFLAGS) -o $@
+mpi_LIBADD =
+mpi_OBJECTS = mpi-add.o mpi-bit.o mpi-cmp.o mpi-div.o mpi-gcd.o \
+mpi-inv.o mpi-mul.o mpi-pow.o mpi-scan.o mpicoder.o mpihelp-add.o \
+mpihelp-cmp.o mpihelp-div.o mpihelp-mul.o mpihelp-shift.o mpihelp-sub.o \
+mpiutil.o
+EXTRA_mpi_SOURCES =
+LIBFILES = libmpi.a
+AR = ar
+RANLIB = @RANLIB@
+DIST_COMMON = Makefile.am Makefile.in
+
+
+PACKAGE = @PACKAGE@
+VERSION = @VERSION@
+
+DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFOS) $(MANS) $(EXTRA_DIST) $(DATA)
+DEP_DISTFILES = $(DIST_COMMON) $(SOURCES) $(BUILT_SOURCES) $(HEADERS) \
+ $(TEXINFOS) $(INFO_DEPS) $(MANS) $(EXTRA_DIST) $(DATA)
+
+TAR = tar
+DEP_FILES = $(srcdir)/.deps/mpi-add.P $(srcdir)/.deps/mpi-bit.P \
+$(srcdir)/.deps/mpi-cmp.P $(srcdir)/.deps/mpi-div.P \
+$(srcdir)/.deps/mpi-gcd.P $(srcdir)/.deps/mpi-inv.P \
+$(srcdir)/.deps/mpi-mul.P $(srcdir)/.deps/mpi-pow.P \
+$(srcdir)/.deps/mpi-scan.P $(srcdir)/.deps/mpicoder.P \
+$(srcdir)/.deps/mpihelp-add.P $(srcdir)/.deps/mpihelp-cmp.P \
+$(srcdir)/.deps/mpihelp-div.P $(srcdir)/.deps/mpihelp-mul.P \
+$(srcdir)/.deps/mpihelp-shift.P $(srcdir)/.deps/mpihelp-sub.P \
+$(srcdir)/.deps/mpiutil.P
+SOURCES = $(mpi_SOURCES)
+OBJECTS = $(mpi_OBJECTS)
+
+default: all
+
+
+$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in
+ cd $(top_srcdir) && automake $(subdir)/Makefile
+
+Makefile: $(top_builddir)/config.status Makefile.in
+ cd $(top_builddir) && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= ./config.status
+
+mostlyclean-noinstLIBRARIES:
+
+clean-noinstLIBRARIES:
+ rm -f $(noinst_LIBFILES)
+
+distclean-noinstLIBRARIES:
+
+maintainer-clean-noinstLIBRARIES:
+
+.c.o:
+ $(COMPILE) $<
+
+mostlyclean-compile:
+ rm -f *.o core
+
+clean-compile:
+
+distclean-compile:
+ rm -f *.tab.c
+
+maintainer-clean-compile:
+$(mpi_OBJECTS): ../config.h
+
+libmpi.a: $(mpi_OBJECTS) $(mpi_LIBADD)
+ rm -f libmpi.a
+ $(AR) cru libmpi.a $(mpi_OBJECTS) $(mpi_LIBADD)
+ $(RANLIB) libmpi.a
+
+ID: $(HEADERS) $(SOURCES)
+ here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS)
+
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES)
+ here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS
+
+mostlyclean-tags:
+
+clean-tags:
+
+distclean-tags:
+ rm -f TAGS ID
+
+maintainer-clean-tags:
+
+subdir = mpi
+distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir)
+distdir: $(DEP_DISTFILES)
+ @for file in `cd $(srcdir) && echo $(DISTFILES)`; do \
+ test -f $(distdir)/$$file \
+ || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \
+ || cp -p $(srcdir)/$$file $(distdir)/$$file; \
+ done
+
+# This fragment is probably only useful for maintainers. It relies on
+# GNU make and gcc. It is only included in the generated Makefile.in
+# if `automake' is not passed the `--include-deps' flag.
+
+MKDEP = gcc -MM $(DEFS) $(INCLUDES) $(CPPFLAGS) $(CFLAGS)
+
+-include $(srcdir)/.deps/.P
+$(srcdir)/.deps/.P: $(BUILT_SOURCES)
+ cd $(srcdir) && test -d .deps || mkdir .deps
+ echo > $@
+
+-include $(DEP_FILES)
+$(DEP_FILES): $(srcdir)/.deps/.P
+
+$(srcdir)/.deps/%.P: $(srcdir)/%.c
+ @echo "mkdeps $< > $@"
+ @re=`echo 's,^$(srcdir)//*,,g;s, $(srcdir)//*, ,g' | sed 's,\.,\\\\.,g'`; \
+ $(MKDEP) $< | sed "$$re" > $@-tmp
+ @if test -n "$o"; then \
+ sed 's/\.o:/$$o:/' $@-tmp > $@; \
+ rm $@-tmp; \
+ else \
+ mv $@-tmp $@; \
+ fi
+
+# End of maintainer-only section
+info:
+
+dvi:
+
+check: all
+
+installcheck:
+
+install-exec:
+
+install-data:
+
+install: install-exec install-data all
+ @:
+
+uninstall:
+
+all: $(LIBFILES) Makefile
+
+install-strip:
+ $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
+installdirs:
+
+
+mostlyclean-generic:
+ test -z "$(MOSTLYCLEANFILES)" || rm -f $(MOSTLYCLEANFILES)
+
+clean-generic:
+ test -z "$(CLEANFILES)" || rm -f $(CLEANFILES)
+
+distclean-generic:
+ rm -f Makefile $(DISTCLEANFILES)
+ rm -f config.cache config.log $(CONFIG_HEADER) stamp-h
+
+maintainer-clean-generic:
+ test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
+ test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
+mostlyclean: mostlyclean-noinstLIBRARIES mostlyclean-compile \
+ mostlyclean-tags mostlyclean-generic
+
+clean: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \
+ mostlyclean
+
+distclean: distclean-noinstLIBRARIES distclean-compile distclean-tags \
+ distclean-generic clean
+ rm -f config.status
+
+maintainer-clean: maintainer-clean-noinstLIBRARIES \
+ maintainer-clean-compile maintainer-clean-tags \
+ maintainer-clean-generic distclean
+ @echo "This command is intended for maintainers to use;"
+ @echo "it deletes files that may require special tools to rebuild."
+
+.PHONY: default mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \
+clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \
+mostlyclean-compile distclean-compile clean-compile \
+maintainer-clean-compile tags mostlyclean-tags distclean-tags \
+clean-tags maintainer-clean-tags distdir info dvi check installcheck \
+install-exec install-data install uninstall all installdirs \
+mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-generic clean mostlyclean distclean maintainer-clean
+
+.SUFFIXES:
+.SUFFIXES: .c .o
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/mpi/longlong.h b/mpi/longlong.h
new file mode 100644
index 00000000..006f69ed
--- /dev/null
+++ b/mpi/longlong.h
@@ -0,0 +1,1398 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+
+Copyright (C) 1991, 1992, 1993, 1994, 1996 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or (at your
+option) any later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
+License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with this file; see the file COPYING.LIB. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+MA 02111-1307, USA. */
+
+/* You have to define the following before including this file:
+
+ UWtype -- An unsigned type, default type for operations (typically a "word")
+ UHWtype -- An unsigned type, at least half the size of UWtype.
+ UDWtype -- An unsigned type, at least twice as large a UWtype
+ W_TYPE_SIZE -- size in bits of UWtype
+
+ SItype, USItype -- Signed and unsigned 32 bit types.
+ DItype, UDItype -- Signed and unsigned 64 bit types.
+
+ On a 32 bit machine UWtype should typically be USItype;
+ on a 64 bit machine, UWtype should typically be UDItype.
+*/
+
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+
+/* This is used to make sure no undesirable sharing between different libraries
+ that use this file takes place. */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+
+/* Define auxiliary asm macros.
+
+ 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ word product in HIGH_PROD and LOW_PROD.
+
+ 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ UDWtype product. This is just a variant of umul_ppmm.
+
+ 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator) divides a UDWtype, composed by the UWtype integers
+ HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
+ than DENOMINATOR for correct operation. If, in addition, the most
+ significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ UDIV_NEEDS_NORMALIZATION is defined to 1.
+
+ 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ denominator). Like udiv_qrnnd but the numbers are signed. The quotient
+ is rounded towards 0.
+
+ 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ msb to the first non-zero bit in the UWtype X. This is the number of
+ steps X needs to be shifted left to set the msb. Undefined for X == 0,
+ unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+
+ 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ from the least significant end.
+
+ 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
+ (i.e. carry out) is not stored anywhere, and is lost.
+
+ 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
+ and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
+ and is lost.
+
+ If any of these macros are left undefined for a particular CPU,
+ C macros are used. */
+
+/* The CPUs come in alphabetical order below.
+
+ Please add support for more CPUs here, or improve the current support
+ for the CPUs below! */
+
+#if defined (__GNUC__) && !defined (NO_ASM)
+
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+ understood by gcc1. Use cpp to avoid major code duplication. */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+
+#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %1,%4,%5
+ addc %0,%2,%3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%r" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "%r" ((USItype)(al)), \
+ "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %1,%4,%5
+ subc %0,%2,%3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "r" ((USItype)(al)), \
+ "rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("multiplu %0,%1,%2" \
+ : "=r" ((USItype)(xl)) \
+ : "r" (__m0), \
+ "r" (__m1)); \
+ __asm__ ("multmu %0,%1,%2" \
+ : "=r" ((USItype)(xh)) \
+ : "r" (__m0), \
+ "r" (__m1)); \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("dividu %0,%3,%4" \
+ : "=r" ((USItype)(q)), \
+ "=q" ((USItype)(r)) \
+ : "1" ((USItype)(n1)), \
+ "r" ((USItype)(n0)), \
+ "r" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) \
+ : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+
+#if defined (__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("umulh %r1,%2,%0" \
+ : "=r" ((UDItype) ph) \
+ : "%rJ" (__m0), \
+ "rI" (__m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { UDItype __r; \
+ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern UDItype __udiv_qrnnd ();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("adds %1, %4, %5
+ adc %0, %2, %3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%r" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "%r" ((USItype)(al)), \
+ "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subs %1, %4, %5
+ sbc %0, %2, %3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "r" ((USItype)(al)), \
+ "rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, a, b) \
+ __asm__ ("%@ Inlined umul_ppmm
+ mov %|r0, %2, lsr #16
+ mov %|r2, %3, lsr #16
+ bic %|r1, %2, %|r0, lsl #16
+ bic %|r2, %3, %|r2, lsl #16
+ mul %1, %|r1, %|r2
+ mul %|r2, %|r0, %|r2
+ mul %|r1, %0, %|r1
+ mul %0, %|r0, %0
+ adds %|r1, %|r2, %|r1
+ addcs %0, %0, #65536
+ adds %1, %1, %|r1, lsl #16
+ adc %0, %0, %|r1, lsr #16" \
+ : "=&r" ((USItype)(xh)), \
+ "=r" ((USItype)(xl)) \
+ : "r" ((USItype)(a)), \
+ "r" ((USItype)(b)) \
+ : "r0", "r1", "r2")
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+
+#if defined (__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __xx; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__xx.__ll) \
+ : "%0" ((USItype)(u)), \
+ "r" ((USItype)(v))); \
+ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define smul_ppmm(w1, w0, u, v) \
+ ({union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __xx; \
+ __asm__ ("mulwx %2,%0" \
+ : "=r" (__xx.__ll) \
+ : "%0" ((SItype)(u)), \
+ "r" ((SItype)(v))); \
+ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("mulwux %2,%0" \
+ : "=r" (__w) \
+ : "%0" ((USItype)(u)), \
+ "r" ((USItype)(v))); \
+ __w; })
+#endif /* __clipper__ */
+
+#if defined (__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add.w %5,%1
+ addx %3,%0" \
+ : "=g" ((USItype)(sh)), \
+ "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub.w %5,%1
+ subx %3,%0" \
+ : "=g" ((USItype)(sh)), \
+ "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+ __asm__ ("mulx %3,%0,%1" \
+ : "=g" ((USItype)(ph)), \
+ "=r" ((USItype)(pl)) \
+ : "%0" ((USItype)(m0)), \
+ "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("divx %4,%0,%1" \
+ : "=g" ((USItype)(q)), \
+ "=r" ((USItype)(r)) \
+ : "1" ((USItype)(nh)), \
+ "0" ((USItype)(nl)), \
+ "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ __asm__ ("bsch/1 %1,%0" \
+ : "=g" (count) \
+ : "g" ((USItype)(x)), \
+ "0" ((USItype)0))
+#endif
+
+#if defined (__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %4,%5,%1
+ addc %2,%3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%rM" ((USItype)(ah)), \
+ "rM" ((USItype)(bh)), \
+ "%rM" ((USItype)(al)), \
+ "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %4,%5,%1
+ subb %2,%3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "rM" ((USItype)(ah)), \
+ "rM" ((USItype)(bh)), \
+ "rM" ((USItype)(al)), \
+ "rM" ((USItype)(bl)))
+#if defined (_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __xx; \
+ __asm__ ("xmpyu %1,%2,%0" \
+ : "=*f" (__xx.__ll) \
+ : "*f" ((USItype)(u)), \
+ "*f" ((USItype)(v))); \
+ (wh) = __xx.__i.__h; \
+ (wl) = __xx.__i.__l; \
+ } while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __udiv_qrnnd ();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __tmp; \
+ __asm__ ( \
+ "ldi 1,%0
+ extru,= %1,15,16,%%r0 ; Bits 31..16 zero?
+ extru,tr %1,15,16,%1 ; No. Shift down, skip add.
+ ldo 16(%0),%0 ; Yes. Perform add.
+ extru,= %1,23,8,%%r0 ; Bits 15..8 zero?
+ extru,tr %1,23,8,%1 ; No. Shift down, skip add.
+ ldo 8(%0),%0 ; Yes. Perform add.
+ extru,= %1,27,4,%%r0 ; Bits 7..4 zero?
+ extru,tr %1,27,4,%1 ; No. Shift down, skip add.
+ ldo 4(%0),%0 ; Yes. Perform add.
+ extru,= %1,29,2,%%r0 ; Bits 3..2 zero?
+ extru,tr %1,29,2,%1 ; No. Shift down, skip add.
+ ldo 2(%0),%0 ; Yes. Perform add.
+ extru %1,30,1,%1 ; Extract bit 1.
+ sub %0,%1,%0 ; Subtract it.
+ " : "=r" (count), "=r" (__tmp) : "1" (x)); \
+ } while (0)
+#endif /* hppa */
+
+#if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __xx; \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mr %0,%3" \
+ : "=r" (__xx.__i.__h), \
+ "=r" (__xx.__i.__l) \
+ : "%1" (__m0), \
+ "r" (__m1)); \
+ (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+ (xh) += ((((SItype) __m0 >> 31) & __m1) \
+ + (((SItype) __m1 >> 31) & __m0)); \
+ } while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __xx; \
+ __asm__ ("mr %0,%3" \
+ : "=r" (__xx.__i.__h), \
+ "=r" (__xx.__i.__l) \
+ : "%1" (m0), \
+ "r" (m1)); \
+ (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __xx; \
+ __xx.__i.__h = n1; __xx.__i.__l = n0; \
+ __asm__ ("dr %0,%2" \
+ : "=r" (__xx.__ll) \
+ : "0" (__xx.__ll), "r" (d)); \
+ (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+ } while (0)
+#endif
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl %5,%1
+ adcl %3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl %5,%1
+ sbbl %3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mull %3" \
+ : "=a" ((USItype)(w0)), \
+ "=d" ((USItype)(w1)) \
+ : "%0" ((USItype)(u)), \
+ "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divl %4" \
+ : "=a" ((USItype)(q)), \
+ "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), \
+ "1" ((USItype)(n1)), \
+ "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("bsrl %1,%0" \
+ : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define count_trailing_zeros(count, x) \
+ __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+
+#if defined (__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r,h,l,c) \
+ __asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
+ "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+
+#if defined (__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%dI" ((USItype)(ah)), \
+ "dI" ((USItype)(bh)), \
+ "%dI" ((USItype)(al)), \
+ "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "dI" ((USItype)(ah)), \
+ "dI" ((USItype)(bh)), \
+ "dI" ((USItype)(al)), \
+ "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __xx; \
+ __asm__ ("emul %2,%1,%0" \
+ : "=d" (__xx.__ll) \
+ : "%dI" ((USItype)(u)), \
+ "dI" ((USItype)(v))); \
+ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("emul %2,%1,%0" \
+ : "=d" (__w) \
+ : "%dI" ((USItype)(u)), \
+ "dI" ((USItype)(v))); \
+ __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+ __asm__ ("ediv %d,%n,%0" \
+ : "=d" (__rq.__ll) \
+ : "dI" (__nn.__ll), \
+ "dI" ((USItype)(d))); \
+ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("scanbit %1,%0" \
+ : "=r" (__cbtmp) \
+ : "r" ((USItype)(x))); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+#if defined (__i960mx) /* what is the proper symbol to test??? */
+#define rshift_rhlc(r,h,l,c) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __nn; \
+ __nn.__i.__h = (h); __nn.__i.__l = (l); \
+ __asm__ ("shre %2,%1,%0" \
+ : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+ }
+#endif /* i960mx */
+#endif /* i960 */
+
+#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add%.l %5,%1
+ addx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), \
+ "=&d" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "d" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub%.l %5,%1
+ subx%.l %3,%0" \
+ : "=d" ((USItype)(sh)), \
+ "=&d" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "d" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("mulu%.l %3,%1:%0" \
+ : "=d" ((USItype)(w0)), \
+ "=d" ((USItype)(w1)) \
+ : "%0" ((USItype)(u)), \
+ "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divu%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), \
+ "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), \
+ "1" ((USItype)(n1)), \
+ "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("divs%.l %4,%1:%0" \
+ : "=d" ((USItype)(q)), \
+ "=d" ((USItype)(r)) \
+ : "0" ((USItype)(n0)), \
+ "1" ((USItype)(n1)), \
+ "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+ __asm__ ("bfffo %1{%b2:%b2},%0" \
+ : "=d" ((USItype)(count)) \
+ : "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+ do { USItype __umul_tmp1, __umul_tmp2; \
+ __asm__ ("| Inlined umul_ppmm
+ move%.l %5,%3
+ move%.l %2,%0
+ move%.w %3,%1
+ swap %3
+ swap %0
+ mulu %2,%1
+ mulu %3,%0
+ mulu %2,%3
+ swap %2
+ mulu %5,%2
+ add%.l %3,%2
+ jcc 1f
+ add%.l %#0x10000,%0
+1: move%.l %2,%3
+ clr%.w %2
+ swap %2
+ swap %3
+ clr%.w %3
+ add%.l %3,%1
+ addx%.l %2,%0
+ | End inlined umul_ppmm" \
+ : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+ "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+ : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+ } while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+
+#if defined (__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addu.co %1,%r4,%r5
+ addu.ci %0,%r2,%r3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%rJ" ((USItype)(ah)), \
+ "rJ" ((USItype)(bh)), \
+ "%rJ" ((USItype)(al)), \
+ "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subu.co %1,%r4,%r5
+ subu.ci %0,%r2,%r3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "rJ" ((USItype)(ah)), \
+ "rJ" ((USItype)(bh)), \
+ "rJ" ((USItype)(al)), \
+ "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+ do { \
+ USItype __cbtmp; \
+ __asm__ ("ff1 %0,%1" \
+ : "=r" (__cbtmp) \
+ : "r" ((USItype)(x))); \
+ (count) = __cbtmp ^ 31; \
+ } while (0)
+#define COUNT_LEADING_ZEROS_0 63 /* sic */
+#if defined (__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x; \
+ __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+ (wh) = __x.__i.__h; \
+ (wl) = __x.__i.__l; \
+ } while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __x, __q; \
+ __x.__i.__h = (n1); __x.__i.__l = (n0); \
+ __asm__ ("divu.d %0,%1,%2" \
+ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+ (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+
+#if defined (__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3" \
+ : "=l" ((USItype)(w0)), \
+ "=h" ((USItype)(w1)) \
+ : "d" ((USItype)(u)), \
+ "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("multu %2,%3
+ mflo %0
+ mfhi %1" \
+ : "=d" ((USItype)(w0)), \
+ "=d" ((USItype)(w1)) \
+ : "d" ((USItype)(u)), \
+ "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+
+#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3" \
+ : "=l" ((UDItype)(w0)), \
+ "=h" ((UDItype)(w1)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("dmultu %2,%3
+ mflo %0
+ mfhi %1" \
+ : "=d" ((UDItype)(w0)), \
+ "=d" ((UDItype)(w1)) \
+ : "d" ((UDItype)(u)), \
+ "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+
+#if defined (__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __xx; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__xx.__ll) \
+ : "%0" ((USItype)(u)), \
+ "g" ((USItype)(v))); \
+ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#define __umulsidi3(u, v) \
+ ({UDItype __w; \
+ __asm__ ("meid %2,%0" \
+ : "=g" (__w) \
+ : "%0" ((USItype)(u)), \
+ "g" ((USItype)(v))); \
+ __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ ({union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __xx; \
+ __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+ __asm__ ("deid %2,%0" \
+ : "=g" (__xx.__ll) \
+ : "0" (__xx.__ll), \
+ "g" ((USItype)(d))); \
+ (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count,x) \
+ do {
+ __asm__ ("ffsd %2,%0" \
+ : "=r" ((USItype) (count)) \
+ : "0" ((USItype) 0), \
+ "r" ((USItype) (x))); \
+ } while (0)
+#endif /* __ns32000__ */
+
+#if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%r" ((USItype)(ah)), \
+ "%r" ((USItype)(al)), \
+ "rI" ((USItype)(bl))); \
+ else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
+ __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%r" ((USItype)(ah)), \
+ "%r" ((USItype)(al)), \
+ "rI" ((USItype)(bl))); \
+ else \
+ __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%r" ((USItype)(ah)), \
+ "r" ((USItype)(bh)), \
+ "%r" ((USItype)(al)), \
+ "rI" ((USItype)(bl))); \
+ } while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ if (__builtin_constant_p (ah) && (ah) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(bh)), \
+ "rI" ((USItype)(al)), \
+ "r" ((USItype)(bl))); \
+ else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(bh)), \
+ "rI" ((USItype)(al)), \
+ "r" ((USItype)(bl))); \
+ else if (__builtin_constant_p (bh) && (bh) == 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(ah)), \
+ "rI" ((USItype)(al)), \
+ "r" ((USItype)(bl))); \
+ else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
+ __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(ah)), \
+ "rI" ((USItype)(al)), \
+ "r" ((USItype)(bl))); \
+ else \
+ __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "r" ((USItype)(ah)), \
+ "r" ((USItype)(bh)), \
+ "rI" ((USItype)(al)), \
+ "r" ((USItype)(bl))); \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("{cntlz|cntlzw} %0,%1" \
+ : "=r" ((USItype)(count)) \
+ : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined (_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhwu %0,%1,%2" \
+ : "=r" ((USItype) ph) \
+ : "%r" (__m0), \
+ "r" (__m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+ do { \
+ SItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mulhw %0,%1,%2" \
+ : "=r" ((SItype) ph) \
+ : "%r" (__m0), \
+ "r" (__m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mul %0,%2,%3" \
+ : "=r" ((USItype)(xh)), \
+ "=q" ((USItype)(xl)) \
+ : "r" (__m0), \
+ "r" (__m1)); \
+ (xh) += ((((SItype) __m0 >> 31) & __m1) \
+ + (((SItype) __m1 >> 31) & __m0)); \
+ } while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+ __asm__ ("mul %0,%2,%3" \
+ : "=r" ((SItype)(xh)), \
+ "=q" ((SItype)(xl)) \
+ : "r" (m0), \
+ "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+ __asm__ ("div %0,%2,%4" \
+ : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+ : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants. */
+
+#if defined (__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addw %5,%1
+ addwc %3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subw %5,%1
+ subwb %3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
+#define umul_ppmm(w1, w0, u, v) \
+ ({union {UDItype __ll; \
+ struct {USItype __h, __l;} __i; \
+ } __xx; \
+ __asm__ ("movw %1,%R0
+ uemul %2,%0" \
+ : "=&r" (__xx.__ll) \
+ : "g" ((USItype) (u)), \
+ "g" ((USItype)(v))); \
+ (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
+#endif /* __pyr__ */
+
+#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("a %1,%5
+ ae %0,%3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "r" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("s %1,%5
+ se %0,%3" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "r" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ( \
+ "s r2,r2
+ mts r10,%2
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ m r2,%3
+ cas %0,r2,r0
+ mfs r10,%1" \
+ : "=r" ((USItype)(ph)), \
+ "=r" ((USItype)(pl)) \
+ : "%r" (__m0), \
+ "r" (__m1) \
+ : "r2"); \
+ (ph) += ((((SItype) __m0 >> 31) & __m1) \
+ + (((SItype) __m1 >> 31) & __m0)); \
+ } while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+ do { \
+ if ((x) >= 0x10000) \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) \
+ : "r" ((USItype)(x) >> 16)); \
+ else \
+ { \
+ __asm__ ("clz %0,%1" \
+ : "=r" ((USItype)(count)) \
+ : "r" ((USItype)(x))); \
+ (count) += 16; \
+ } \
+ } while (0)
+#endif /* RT/ROMP */
+
+#if defined (__sh2__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ( \
+ "dmulu.l %2,%3
+ sts macl,%1
+ sts mach,%0" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "r" ((USItype)(u)), \
+ "r" ((USItype)(v)) \
+ : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addcc %r4,%5,%1
+ addx %r2,%3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "%rJ" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "%rJ" ((USItype)(al)), \
+ "rI" ((USItype)(bl)) \
+ __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subcc %r4,%5,%1
+ subx %r2,%3,%0" \
+ : "=r" ((USItype)(sh)), \
+ "=&r" ((USItype)(sl)) \
+ : "rJ" ((USItype)(ah)), \
+ "rI" ((USItype)(bh)), \
+ "rJ" ((USItype)(al)), \
+ "rI" ((USItype)(bl)) \
+ __CLOBBER_CC)
+#if defined (__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+ 2) the 'I' flag thinks of the range as a 13 bit signed interval,
+ while we want to match a 13 bit interval, sign extended to 32 bits,
+ but INTERPRETED AS UNSIGNED. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "r" ((USItype)(u)), \
+ "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC /* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ USItype __q; \
+ __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ : "=r" ((USItype)(__q)) \
+ : "r" ((USItype)(n1)), \
+ "r" ((USItype)(n0)), \
+ "r" ((USItype)(d))); \
+ (r) = (n0) - __q * (d); \
+ (q) = __q; \
+ } while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined (__sparclite__)
+/* This has hardware multiply but not divide. It also has two additional
+ instructions scan (ffs from high bit) and divscc. */
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "r" ((USItype)(u)), \
+ "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ __asm__ ("! Inlined udiv_qrnnd
+ wr %%g0,%2,%%y ! Not a delayed write for sparclite
+ tst %%g0
+ divscc %3,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%%g1
+ divscc %%g1,%4,%0
+ rd %%y,%1
+ bl,a 1f
+ add %1,%4,%1
+1: ! End of inline udiv_qrnnd" \
+ : "=r" ((USItype)(q)), \
+ "=r" ((USItype)(r)) \
+ : "r" ((USItype)(n1)), \
+ "r" ((USItype)(n0)), \
+ "rI" ((USItype)(d)) \
+ : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+ __asm__ ("scan %1,0,%0" \
+ : "=r" ((USItype)(x)) \
+ : "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+ implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
+ undefined. */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+ __asm__ ("! Inlined umul_ppmm
+ wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr
+ sra %3,31,%%g2 ! Don't move this insn
+ and %2,%%g2,%%g2 ! Don't move this insn
+ andcc %%g0,0,%%g1 ! Don't move this insn
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,%3,%%g1
+ mulscc %%g1,0,%%g1
+ add %%g1,%%g2,%0
+ rd %%y,%1" \
+ : "=r" ((USItype)(w1)), \
+ "=r" ((USItype)(w0)) \
+ : "%rI" ((USItype)(u)), \
+ "r" ((USItype)(v)) \
+ : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39 /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+ do { USItype __r; \
+ (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
+ (r) = __r; \
+ } while (0)
+extern USItype __udiv_qrnnd ();
+#define UDIV_TIME 140
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+
+#if defined (__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("addl2 %5,%1
+ adwc %3,%0" \
+ : "=g" ((USItype)(sh)), \
+ "=&g" ((USItype)(sl)) \
+ : "%0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "%1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subl2 %5,%1
+ sbwc %3,%0" \
+ : "=g" ((USItype)(sh)), \
+ "=&g" ((USItype)(sl)) \
+ : "0" ((USItype)(ah)), \
+ "g" ((USItype)(bh)), \
+ "1" ((USItype)(al)), \
+ "g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {UDItype __ll; \
+ struct {USItype __l, __h;} __i; \
+ } __xx; \
+ USItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("emul %1,%2,$0,%0" \
+ : "=g" (__xx.__ll) \
+ : "g" (__m0), \
+ "g" (__m1)); \
+ (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+ (xh) += ((((SItype) __m0 >> 31) & __m1) \
+ + (((SItype) __m1 >> 31) & __m0)); \
+ } while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+ do { \
+ union {DItype __ll; \
+ struct {SItype __l, __h;} __i; \
+ } __xx; \
+ __xx.__i.__h = n1; __xx.__i.__l = n0; \
+ __asm__ ("ediv %3,%2,%0,%1" \
+ : "=g" (q), "=g" (r) \
+ : "g" (__xx.__ll), "g" (d)); \
+ } while (0)
+#endif /* __vax__ */
+
+#if defined (__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), \
+ "=&r" ((unsigned int)(sl)) \
+ : "%0" ((unsigned int)(ah)), \
+ "r" ((unsigned int)(bh)), \
+ "%1" ((unsigned int)(al)), \
+ "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
+ : "=r" ((unsigned int)(sh)), \
+ "=&r" ((unsigned int)(sl)) \
+ : "0" ((unsigned int)(ah)), \
+ "r" ((unsigned int)(bh)), \
+ "1" ((unsigned int)(al)), \
+ "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+ do { \
+ union {long int __ll; \
+ struct {unsigned int __h, __l;} __i; \
+ } __xx; \
+ unsigned int __m0 = (m0), __m1 = (m1); \
+ __asm__ ("mult %S0,%H3" \
+ : "=r" (__xx.__i.__h), \
+ "=r" (__xx.__i.__l) \
+ : "%1" (__m0), \
+ "rQR" (__m1)); \
+ (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+ (xh) += ((((signed int) __m0 >> 15) & __m1) \
+ + (((signed int) __m1 >> 15) & __m0)); \
+ } while (0)
+#endif /* __z8000__ */
+
+#endif /* __GNUC__ */
+
+
+#if !defined (umul_ppmm) && defined (__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+ { \
+ UDWtype __ll = __umulsidi3 (m0, m1); \
+ ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+ pl = (UWtype) __ll; \
+ }
+#endif
+
+#if !defined (__umulsidi3)
+#define __umulsidi3(u, v) \
+ ({UWtype __hi, __lo; \
+ umul_ppmm (__hi, __lo, u, v); \
+ ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+
+/* If this machine has no inline assembler, use C macros. */
+
+#if !defined (add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) + (bl); \
+ (sh) = (ah) + (bh) + (__x < (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+#if !defined (sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ UWtype __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+#if !defined (umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x1, __x2, __x3; \
+ UHWtype __ul, __vl, __uh, __vh; \
+ UWtype __u = (u), __v = (v); \
+ \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ __vh = __ll_highpart (__v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x1 = (UWtype) __ul * __vh; \
+ __x2 = (UWtype) __uh * __vl; \
+ __x3 = (UWtype) __uh * __vh; \
+ \
+ __x1 += __ll_highpart (__x0);/* this can't give carry */ \
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += __ll_B; /* yes, add it in the proper pos. */ \
+ \
+ (w1) = __x3 + __ll_highpart (__x1); \
+ (w0) = (__ll_lowpart (__x1) << W_TYPE_SIZE/2) + __ll_lowpart (__x0);\
+ } while (0)
+#endif
+
+#if !defined (umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+ do { \
+ UWtype __w1; \
+ UWtype __m0 = (u), __m1 = (v); \
+ umul_ppmm (__w1, w0, __m0, __m1); \
+ (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+ - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+ } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+ __d1 = __ll_highpart (d); \
+ __d0 = __ll_lowpart (d); \
+ \
+ __r1 = (n1) % __d1; \
+ __q1 = (n1) / __d1; \
+ __m = (UWtype) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart (n0); \
+ if (__r1 < __m) \
+ { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __r0 = __r1 % __d1; \
+ __q0 = __r1 / __d1; \
+ __m = (UWtype) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart (n0); \
+ if (__r0 < __m) \
+ { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (UWtype) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+ __udiv_w_sdiv (defined in libgcc or elsewhere). */
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+ do { \
+ UWtype __r; \
+ (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+ (r) = __r; \
+ } while (0)
+#endif
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined (udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+extern
+#ifdef __STDC__
+const
+#endif
+unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+ do { \
+ UWtype __xr = (x); \
+ UWtype __a; \
+ \
+ if (W_TYPE_SIZE <= 32) \
+ { \
+ __a = __xr < ((UWtype) 1 << 2*__BITS4) \
+ ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+ : (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 : 3*__BITS4);\
+ } \
+ else \
+ { \
+ for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+ if (((__xr >> __a) & 0xff) != 0) \
+ break; \
+ } \
+ \
+ (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+ } while (0)
+/* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+
+#if !defined (count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros. The latter might be
+ defined in asm, but if it is not, the C version above is good enough. */
+#define count_trailing_zeros(count, x) \
+ do { \
+ UWtype __ctz_x = (x); \
+ UWtype __ctz_c; \
+ count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
+ (count) = W_TYPE_SIZE - 1 - __ctz_c; \
+ } while (0)
+#endif
+
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/mpi/mpi-add.c b/mpi/mpi-add.c
new file mode 100644
index 00000000..047a2fa3
--- /dev/null
+++ b/mpi/mpi-add.c
@@ -0,0 +1,221 @@
+/* mpi-add.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+void
+mpi_add_ui(MPI w, MPI u, unsigned long v )
+{
+ mpi_ptr_t wp, up;
+ mpi_size_t usize, wsize;
+ int usign, wsign;
+
+ usize = u->nlimbs;
+ usign = u->sign;
+ wsign = 0;
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = usize + 1;
+ if( w->alloced < wsize )
+ mpi_resize(w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->d;
+ wp = w->d;
+
+ if( !usize ) { /* simple */
+ wp[0] = v;
+ wsize = v? 1:0;
+ }
+ else if( !usign ) { /* mpi is not negative */
+ mpi_limb_t cy;
+ cy = mpihelp_add_1(wp, up, usize, v);
+ wp[usize] = cy;
+ wsize = usize + cy;
+ }
+ else { /* The signs are different. Need exact comparison to determine
+ * which operand to subtract from which. */
+ if( usize == 1 && up[0] < v ) {
+ wp[0] = v - up[0];
+ wsize = 1;
+ }
+ else {
+ mpihelp_sub_1(wp, up, usize, v);
+ /* Size can decrease with at most one limb. */
+ wsize = (usize - (wp[usize-1]? 0:1));
+ wsign = 1;
+ }
+ }
+
+ w->nlimbs = wsize;
+ w->sign = wsign;
+}
+
+
+void
+mpi_add(MPI w, MPI u, MPI v)
+{
+ mpi_ptr_t wp, up, vp;
+ mpi_size_t usize, vsize, wsize;
+ int usign, vsign, wsign;
+
+ usize = u->nlimbs;
+ vsize = v->nlimbs;
+ usign = u->sign;
+ vsign = v->sign;
+
+ if( usize < vsize ) { /* Swap U and V. */
+ { MPI t; t = u; u = v; v = t; }
+ { mpi_size_t t = usize; usize = vsize; vsize = t; }
+ { int t = usign; usign = vsign; vsign = t; }
+ }
+
+ /* If not space for w (and possible carry), increase space. */
+ wsize = usize + 1;
+ if( w->alloced < wsize )
+ mpi_resize(w, wsize);
+ wsign = 0;
+
+ /* These must be after realloc (u or v may be the same as w). */
+ up = u->d;
+ vp = v->d;
+ wp = w->d;
+
+ if( !vsize ) { /* simple */
+ MPN_COPY(wp, up, usize );
+ wsize = usize;
+ wsign = usign;
+ }
+ else if( usign != vsign ) { /* different sign */
+ /* This test is right since USIZE >= VSIZE */
+ if( usize != vsize ) {
+ mpihelp_sub(wp, up, usize, vp, vsize);
+ wsize = usize;
+ MPN_NORMALIZE(wp, wsize);
+ wsign = usign;
+ }
+ else if( mpihelp_cmp(up, vp, usize) < 0 ) {
+ mpihelp_sub_n(wp, vp, up, usize);
+ wsize = usize;
+ MPN_NORMALIZE(wp, wsize);
+ if( !usign )
+ wsign = 1;
+ }
+ else {
+ mpihelp_sub_n(wp, up, vp, usize);
+ wsize = usize;
+ MPN_NORMALIZE(wp, wsize);
+ if( usign )
+ wsign = 1;
+ }
+ }
+ else { /* U and V have same sign. Add them. */
+ mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+ wp[usize] = cy;
+ wsize = usize + cy;
+ if( usign )
+ wsize = 1;
+ }
+
+ w->nlimbs = wsize;
+ w->sign = wsign;
+}
+
+
+/****************
+ * Subtract the unsigned integer V from the mpi-integer U and store the
+ * result in W.
+ */
+void
+mpi_sub_ui(MPI w, MPI u, unsigned long v )
+{
+ mpi_ptr_t wp, up;
+ mpi_size_t usize, wsize;
+ int usign, wsign;
+
+ usize = u->nlimbs;
+ usign = u->sign;
+ wsign = 0;
+
+ /* If not space for W (and possible carry), increase space. */
+ wsize = usize + 1;
+ if( w->alloced < wsize )
+ mpi_resize(w, wsize);
+
+ /* These must be after realloc (U may be the same as W). */
+ up = u->d;
+ wp = w->d;
+
+ if( !usize ) { /* simple */
+ wp[0] = v;
+ wsize = v? 1:0;
+ wsign = 1;
+ }
+ else if( usign ) { /* mpi and v are negative */
+ mpi_limb_t cy;
+ cy = mpihelp_add_1(wp, up, usize, v);
+ wp[usize] = cy;
+ wsize = usize + cy;
+ }
+ else { /* The signs are different. Need exact comparison to determine
+ * which operand to subtract from which. */
+ if( usize == 1 && up[0] < v ) {
+ wp[0] = v - up[0];
+ wsize = 1;
+ wsign = 1;
+ }
+ else {
+ mpihelp_sub_1(wp, up, usize, v);
+ /* Size can decrease with at most one limb. */
+ wsize = (usize - (wp[usize-1]? 1:0));
+ }
+ }
+
+ w->nlimbs = wsize;
+ w->sign = wsign;
+}
+
+void
+mpi_sub(MPI w, MPI u, MPI v)
+{
+ if( w == v ) {
+ MPI vv = mpi_copy(v);
+ vv->sign = !vv->sign;
+ mpi_add( w, u, vv );
+ m_free(vv);
+ }
+ else {
+ /* fixme: this is not thread-save (we temp. modify v) */
+ v->sign = !v->sign;
+ mpi_add( w, u, v );
+ v->sign = !v->sign;
+ }
+}
+
+
diff --git a/mpi/mpi-bit.c b/mpi/mpi-bit.c
new file mode 100644
index 00000000..9cb346aa
--- /dev/null
+++ b/mpi/mpi-bit.c
@@ -0,0 +1,133 @@
+/* mpi-bit.c - MPI bit level fucntions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "mpi-internal.h"
+
+
+/****************
+ * Return the number of bits in A.
+ * fixme: we should not count leading zero bits
+ */
+unsigned
+mpi_get_nbits( MPI a )
+{
+ return a->nlimbs * BITS_PER_MPI_LIMB;
+}
+
+
+/****************
+ * Test wether bit N is set.
+ */
+int
+mpi_test_bit( MPI a, unsigned n )
+{
+ unsigned limbno, bitno;
+ mpi_limb_t limb;
+
+ limbno = n / BITS_PER_MPI_LIMB;
+ bitno = n % BITS_PER_MPI_LIMB;
+
+ if( limbno >= a->nlimbs )
+ return 0; /* too far left: this is a 0 */
+ limb = a->d[limbno];
+ return (limb & (1 << bitno))? 1: 0;
+}
+
+
+/****************
+ * Set bit N of A.
+ */
+void
+mpi_set_bit( MPI a, unsigned n )
+{
+ unsigned limbno, bitno;
+
+ limbno = n / BITS_PER_MPI_LIMB;
+ bitno = n % BITS_PER_MPI_LIMB;
+
+ if( limbno >= a->nlimbs ) { /* resize */
+ if( a->alloced >= limbno )
+ mpi_resize(a, limbno+1 );
+ a->nlimbs = limbno+1;
+ }
+ a->d[limbno] |= (1<<bitno);
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void
+mpi_clear_bit( MPI a, unsigned n )
+{
+ unsigned limbno, bitno;
+
+ limbno = n / BITS_PER_MPI_LIMB;
+ bitno = n % BITS_PER_MPI_LIMB;
+
+ if( limbno >= a->nlimbs )
+ return; /* don't need to clear this bit, it's to far to left */
+ a->d[limbno] &= ~(1 << bitno);
+}
+
+
+void
+mpi_set_bytes( MPI a, unsigned nbits, byte (*fnc)(int), int opaque )
+{
+ byte *p;
+ unsigned nlimbs, nlimbs2, xbits, xbytes;
+ unsigned n;
+ int i;
+
+ nlimbs = nbits / BITS_PER_MPI_LIMB;
+ xbits = nbits % BITS_PER_MPI_LIMB;
+ nlimbs2 = xbits? (nlimbs+1):nlimbs;
+ xbytes = xbits / 8;
+ xbits = xbits % 8;
+ if( a->alloced < nlimbs2 )
+ mpi_resize(a, nlimbs2 );
+ a->nlimbs = nlimbs2;
+ for(n=0; n < nlimbs; n++ ) {
+ p = (byte*)(a->d+n);
+ #ifdef HAVE_LITTLE_ENDIAN
+ for(i=0; i < BYTES_PER_MPI_LIMB; i++ )
+ p[i] = fnc(opaque);
+ #else
+ for(i=BYTES_PER_MPI_LIMB-1; i>=0; i-- )
+ p[i] = fnc(opaque);
+ #endif
+ }
+ if( xbytes ) {
+ p = (byte*)(a->d+n);
+ #ifdef HAVE_LITTLE_ENDIAN
+ for(i=0; i < xbytes; i++ )
+ p[i] = fnc(opaque);
+ #else
+ for(i=xbytes-1; i>=0; i-- )
+ p[i] = fnc(opaque);
+ #endif
+ }
+ assert(!xbits);
+}
+
+
diff --git a/mpi/mpi-cmp.c b/mpi/mpi-cmp.c
new file mode 100644
index 00000000..83e85ceb
--- /dev/null
+++ b/mpi/mpi-cmp.c
@@ -0,0 +1,72 @@
+/* mpi-cmp.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+int
+mpi_cmp_ui( MPI u, unsigned long v )
+{
+ mpi_limb_t limb = v;
+
+ if( !u->nlimbs && !limb )
+ return 0;
+ if( u->sign )
+ return -1;
+ if( u->nlimbs > 1 )
+ return 1;
+
+ if( u->d[0] == limb )
+ return 0;
+ else if( u->d[0] > limb )
+ return 1;
+ else
+ return -1;
+}
+
+int
+mpi_cmp( MPI u, MPI v )
+{
+ mpi_size_t usize = u->nlimbs;
+ mpi_size_t vsize = v->nlimbs;
+ int cmp;
+
+ /* FIXME: are the numbers always normalized? */
+ if( !u->sign && v->sign )
+ return 1;
+ else if( u->sign && !v->sign )
+ return -1;
+ else if( usize != vsize && !u->sign && !v->sign )
+ return usize - vsize;
+ else if( usize != vsize && u->sign && v->sign )
+ return vsize + usize;
+ else if( !usize )
+ return 0;
+ else if( !(cmp=mpihelp_cmp( u->d, v->d, usize )) )
+ return 0;
+ else if( (cmp < 0?1:0) == (u->sign?1:0))
+ return 1;
+ else
+ return -1;
+}
+
+
diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c
new file mode 100644
index 00000000..2955575a
--- /dev/null
+++ b/mpi/mpi-div.c
@@ -0,0 +1,282 @@
+/* mpi-div.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+
+void
+mpi_fdiv_r( MPI rem, MPI dividend, MPI divisor )
+{
+ int divisor_sign = divisor->sign;
+ MPI temp_divisor = NULL;
+
+ /* We need the original value of the divisor after the remainder has been
+ * preliminary calculated. We have to copy it to temporary space if it's
+ * the same variable as REM. */
+ if( rem == divisor ) {
+ temp_divisor = mpi_copy( divisor );
+ divisor = temp_divisor;
+ }
+
+ mpi_tdiv_r( rem, dividend, divisor );
+
+ if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs )
+ mpi_add( rem, rem, divisor);
+
+ if( temp_divisor )
+ mpi_free(temp_divisor);
+}
+
+
+
+/****************
+ * Division rounding the quotient towards -infinity.
+ * The remainder gets the same sign as the denominator.
+ * rem is optional
+ */
+
+ulong
+mpi_fdiv_r_ui( MPI rem, MPI dividend, ulong divisor )
+{
+ mpi_limb_t rlimb;
+
+ rlimb = mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor );
+ if( rlimb && dividend->sign )
+ rlimb = divisor - rlimb;
+
+ if( rem ) {
+ rem->d[0] = rlimb;
+ rem->nlimbs = rlimb? 1:0;
+ }
+ return rlimb;
+}
+
+
+void
+mpi_fdiv_q( MPI quot, MPI dividend, MPI divisor )
+{
+ MPI tmp = mpi_alloc( mpi_get_nlimbs(quot) );
+ mpi_fdiv_qr( quot, tmp, dividend, divisor);
+ mpi_free(tmp);
+}
+
+void
+mpi_fdiv_qr( MPI quot, MPI rem, MPI dividend, MPI divisor )
+{
+ int divisor_sign = divisor->sign;
+ MPI temp_divisor = NULL;
+
+ if( quot == divisor || rem == divisor ) {
+ temp_divisor = mpi_copy( divisor );
+ divisor = temp_divisor;
+ }
+
+ mpi_tdiv_qr( quot, rem, dividend, divisor );
+
+ if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) {
+ mpi_sub_ui( quot, quot, 1 );
+ mpi_add( rem, rem, divisor);
+ }
+
+ if( temp_divisor )
+ mpi_free(temp_divisor);
+}
+
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ * i.e no extra storage should be allocated.
+ */
+
+void
+mpi_tdiv_r( MPI rem, MPI num, MPI den)
+{
+ mpi_tdiv_qr(NULL, rem, num, den );
+}
+
+void
+mpi_tdiv_qr( MPI quot, MPI rem, MPI num, MPI den)
+{
+ mpi_ptr_t np, dp;
+ mpi_ptr_t qp, rp;
+ mpi_size_t nsize = num->nlimbs;
+ mpi_size_t dsize = den->nlimbs;
+ mpi_size_t qsize, rsize;
+ mpi_size_t sign_remainder = num->sign;
+ mpi_size_t sign_quotient = num->sign ^ den->sign;
+ unsigned normalization_steps;
+ mpi_limb_t q_limb;
+ mpi_ptr_t marker[5];
+ int markidx=0;
+
+ /* Ensure space is enough for quotient and remainder.
+ * We need space for an extra limb in the remainder, because it's
+ * up-shifted (normalized) below. */
+ rsize = nsize + 1;
+ if( rem->alloced < rsize )
+ mpi_resize( rem, rsize);
+
+ qsize = rsize - dsize; /* qsize cannot be bigger than this. */
+ if( qsize <= 0 ) {
+ if( num != rem ) {
+ rem->nlimbs = num->nlimbs;
+ rem->sign = num->sign;
+ MPN_COPY(rem->d, num->d, nsize);
+ }
+ if( quot ) {
+ /* This needs to follow the assignment to rem, in case the
+ * numerator and quotient are the same. */
+ quot->nlimbs = 0;
+ quot->sign = 0;
+ }
+ return;
+ }
+
+ if( quot && quot->alloced < qsize )
+ mpi_resize( quot, qsize);
+
+ /* Read pointers here, when reallocation is finished. */
+ np = num->d;
+ dp = den->d;
+ rp = rem->d;
+
+ /* Optimize division by a single-limb divisor. */
+ if( dsize == 1 ) {
+ mpi_limb_t rlimb;
+ if( quot ) {
+ qp = quot->d;
+ rlimb = mpihelp_divmod_1( qp, np, nsize, dp[0] );
+ qsize -= qp[qsize - 1] == 0;
+ quot->nlimbs = qsize;
+ quot->sign = sign_quotient;
+ }
+ else
+ rlimb = mpihelp_mod_1( np, nsize, dp[0] );
+ rp[0] = rlimb;
+ rsize = rlimb != 0?1:0;
+ rem->nlimbs = rsize;
+ rem->sign = sign_remainder;
+ return;
+ }
+
+
+ if( quot ) {
+ qp = quot->d;
+ /* Make sure QP and NP point to different objects. Otherwise the
+ * numerator would be gradually overwritten by the quotient limbs. */
+ if(qp == np) { /* Copy NP object to temporary space. */
+ np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+ MPN_COPY(np, qp, nsize);
+ }
+ }
+ else /* Put quotient at top of remainder. */
+ qp = rp + dsize;
+
+ count_leading_zeros( normalization_steps, dp[dsize - 1] );
+
+ /* Normalize the denominator, i.e. make its most significant bit set by
+ * shifting it NORMALIZATION_STEPS bits to the left. Also shift the
+ * numerator the same number of steps (to keep the quotient the same!).
+ */
+ if( normalization_steps ) {
+ mpi_ptr_t tp;
+ mpi_limb_t nlimb;
+
+ /* Shift up the denominator setting the most significant bit of
+ * the most significant word. Use temporary storage not to clobber
+ * the original contents of the denominator. */
+ tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+ mpihelp_lshift( tp, dp, dsize, normalization_steps );
+ dp = tp;
+
+ /* Shift up the numerator, possibly introducing a new most
+ * significant word. Move the shifted numerator in the remainder
+ * meanwhile. */
+ nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+ if( nlimb ) {
+ rp[nsize] = nlimb;
+ rsize = nsize + 1;
+ }
+ else
+ rsize = nsize;
+ }
+ else {
+ /* The denominator is already normalized, as required. Copy it to
+ * temporary space if it overlaps with the quotient or remainder. */
+ if( dp == rp || (quot && (dp == qp))) {
+ mpi_ptr_t tp;
+
+ tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+ MPN_COPY( tp, dp, dsize );
+ dp = tp;
+ }
+
+ /* Move the numerator to the remainder. */
+ if( rp != np )
+ MPN_COPY(rp, np, nsize);
+
+ rsize = nsize;
+ }
+
+ q_limb = mpihelp_divrem( qp, 0, rp, rsize, dp, dsize );
+
+ if( quot ) {
+ qsize = rsize - dsize;
+ if(q_limb) {
+ qp[qsize] = q_limb;
+ qsize += 1;
+ }
+
+ quot->nlimbs = qsize;
+ quot->sign = sign_quotient;
+ }
+
+ rsize = dsize;
+ MPN_NORMALIZE (rp, rsize);
+
+ if( normalization_steps && rsize ) {
+ mpihelp_rshift(rp, rp, rsize, normalization_steps);
+ rsize -= rp[rsize - 1] == 0?1:0;
+ }
+
+ rem->nlimbs = rsize;
+ rem->sign = sign_remainder;
+ while( markidx )
+ mpi_free_limb_space(marker[--markidx]);
+}
+
+
+/****************
+ * Check wether dividend is divisible by divisor
+ * (note: divisor must fit into a limb)
+ */
+int
+mpi_divisible_ui(MPI dividend, ulong divisor )
+{
+ return !mpihelp_mod_1( dividend->d, dividend->nlimbs, divisor );
+}
+
diff --git a/mpi/mpi-gcd.c b/mpi/mpi-gcd.c
new file mode 100644
index 00000000..f31e917f
--- /dev/null
+++ b/mpi/mpi-gcd.c
@@ -0,0 +1,54 @@
+/* mpi-gcd.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/****************
+ * Find the greatest common divisor G of A and B.
+ * Return: true if this 1, false in all other cases
+ */
+int
+mpi_gcd( MPI g, MPI xa, MPI xb )
+{
+ MPI a, b;
+
+ a = mpi_copy(xa);
+ b = mpi_copy(xb);
+
+ /* TAOCP Vol II, 4.5.2, Algorithm A */
+ a->sign = 0;
+ b->sign = 0;
+ while( mpi_cmp_ui( b, 0 ) ) {
+ mpi_fdiv_r( g, a, b ); /* g used as temorary variable */
+ mpi_set(a,b);
+ mpi_set(b,g);
+ }
+ mpi_set(g, a);
+
+ mpi_free(a);
+ mpi_free(b);
+ return !mpi_cmp_ui( g, 1);
+}
+
+
+
diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h
new file mode 100644
index 00000000..b5c00b6c
--- /dev/null
+++ b/mpi/mpi-internal.h
@@ -0,0 +1,198 @@
+/* mpi-internal.h - Internal to the Multi Precision Integers
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+
+#include "mpi.h"
+
+
+
+typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
+typedef int mpi_size_t; /* (must be a signed type) */
+
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define MAX(h,i) ((h) > (i) ? (h) : (i))
+#define RESIZE_IF_NEEDED(a,b) \
+ do { \
+ if( (a)->alloced < (b) ) \
+ mpi_resize((a), (b)); \
+ } while(0)
+
+/* Copy N limbs from S to D. */
+#define MPN_COPY( d, s, n) \
+ do { \
+ mpi_size_t _i; \
+ for( _i = 0; _i < (n); _i++ ) \
+ (d)[_i] = (s)[_i]; \
+ } while(0)
+
+#define MPN_COPY_DECR( d, s, n ) \
+ do { \
+ mpi_size_t _i; \
+ for( _i = (n)-1; _i >= 0; _i--) \
+ (d)[_i] = (s)[_i]; \
+ } while(0)
+
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+ do { \
+ int _i; \
+ for( _i = 0; _i < (n); _i++ ) \
+ (d)[_i] = 0; \
+ } while (0)
+
+#define MPN_NORMALIZE(d, n) \
+ do { \
+ while( (n) > 0 ) { \
+ if( (d)[(n)-1] ) \
+ break; \
+ (n)--; \
+ } \
+ } while(0)
+
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+ do { \
+ for(;;) { \
+ if( (d)[(n)-1] ) \
+ break; \
+ (n)--; \
+ } \
+ } while(0)
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+ do { \
+ if( (size) < KARATSUBA_THRESHOLD ) \
+ mul_n_basecase (prodp, up, vp, size); \
+ else \
+ mul_n (prodp, up, vp, size, tspace); \
+ } while (0);
+
+
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones). For correct operation, the most significant bit of D
+ * has to be set. Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+ do { \
+ mpi_limb_t _q, _ql, _r; \
+ mpi_limb_t _xh, _xl; \
+ umul_ppmm (_q, _ql, (nh), (di)); \
+ _q += (nh); /* DI is 2**BITS_PER_MPI_LIMB too small */ \
+ umul_ppmm (_xh, _xl, _q, (d)); \
+ sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
+ if( _xh ) { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q++; \
+ if( _xh) { \
+ sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
+ _q++; \
+ } \
+ } \
+ if( _r >= (d) ) { \
+ _r -= (d); \
+ _q++; \
+ } \
+ (r) = _r; \
+ (q) = _q; \
+ } while (0)
+
+
+/*-- mpiutil.c --*/
+#ifdef M_DEBUG
+ #define mpi_alloc_limb_space(n) mpi_debug_alloc_limb_space((n), M_DBGINFO( __LINE__ ) )
+ #define mpi_free_limb_space(n) mpi_debug_free_limb_space((n), M_DBGINFO( __LINE__ ) )
+ mpi_ptr_t mpi_debug_alloc_limb_space( unsigned nlimbs, const char *info );
+ void mpi_debug_free_limb_space( mpi_ptr_t a, const char *info );
+#else
+ mpi_ptr_t mpi_alloc_limb_space( unsigned nlimbs );
+ void mpi_free_limb_space( mpi_ptr_t a );
+#endif
+void mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs );
+
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb );
+mpi_limb_t mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+ mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb );
+mpi_limb_t mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+ mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size );
+
+/*-- mpihelp-mul.c --*/
+mpi_limb_t mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb);
+void mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+ mpi_size_t size);
+mpi_limb_t mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+ mpi_ptr_t vp, mpi_size_t vsize);
+
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+ mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
+ mpi_ptr_t np, mpi_size_t nsize,
+ mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1( mpi_ptr_t quot_ptr,
+ mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+ mpi_limb_t divisor_limb);
+
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+ unsigned cnt);
+mpi_limb_t mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+ unsigned cnt);
+
+
+/* Define stuff for longlong.h. */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+ typedef mpi_limb_t UWtype;
+ typedef unsigned int UHWtype;
+#if defined (__GNUC__)
+ typedef unsigned int UQItype __attribute__ ((mode (QI)));
+ typedef int SItype __attribute__ ((mode (SI)));
+ typedef unsigned int USItype __attribute__ ((mode (SI)));
+ typedef int DItype __attribute__ ((mode (DI)));
+ typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#else
+ typedef unsigned char UQItype;
+ typedef long SItype;
+ typedef unsigned long USItype;
+#endif
+
+
+#endif /*G10_MPI_INTERNAL_H*/
diff --git a/mpi/mpi-inv.c b/mpi/mpi-inv.c
new file mode 100644
index 00000000..acde6055
--- /dev/null
+++ b/mpi/mpi-inv.c
@@ -0,0 +1,127 @@
+/* mpi-inv.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/****************
+ * Calculate the multiplicative inverse X of U mod V
+ * That is: Find the solution for
+ * 1 = (u*x) mod v
+ * This has only a unique solution if U and V are relatively prime.
+ * Returns 0 if a solution was found.
+ */
+int
+mpi_inv_mod( MPI x, MPI u, MPI v )
+{
+ #if 0
+ /* Extended Euclid's algorithm (See TAOPC Vol II, 4.52. Alg X) */
+ MPI u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
+
+ u1 = mpi_alloc_set_ui(1);
+ u2 = mpi_alloc_set_ui(0);
+ u3 = mpi_copy(u);
+ v1 = mpi_alloc_set_ui(0);
+ v2 = mpi_alloc_set_ui(1);
+ v3 = mpi_copy(v);
+ q = mpi_alloc( mpi_get_nlimbs(u) );
+ t1 = mpi_alloc( mpi_get_nlimbs(u) );
+ t2 = mpi_alloc( mpi_get_nlimbs(u) );
+ t3 = mpi_alloc( mpi_get_nlimbs(u) );
+ while( mpi_cmp_ui( v3, 0 ) ) {
+ /*log_debug("----------------------\n");
+ log_mpidump("q =", u1);
+ log_mpidump("u1=", u1);
+ log_mpidump("u2=", u2);
+ log_mpidump("u3=", u3);
+ log_mpidump("v1=", v1);
+ log_mpidump("v2=", v2);
+ log_mpidump("v3=", v3); */
+ mpi_fdiv_q( q, u3, v3 );
+ mpi_mul(t1, v1, q); mpi_mul(t2, v2, q); mpi_mul(t3, v3, q);
+ mpi_sub(t1, u1, t1); mpi_sub(t2, u2, t2); mpi_sub(t3, u3, t3);
+
+ mpi_set(u1, v1); mpi_set(u2, v2); mpi_set(u3, v3);
+ mpi_set(v1, t1); mpi_set(v2, t2); mpi_set(v3, t3);
+ }
+ mpi_set(x, u3);
+
+ mpi_free(u1);
+ mpi_free(u2);
+ mpi_free(u3);
+ mpi_free(v1);
+ mpi_free(v2);
+ mpi_free(v3);
+ mpi_free(q);
+ mpi_free(t1);
+ mpi_free(t2);
+ mpi_free(t3);
+ #endif
+
+ /*****************************
+ * 1. Init: g0 = u g1 = v v0 = 0 v1 = 1
+ * 2. Test: if g1 is 0 terminate. Result = v0 < 0: v0 + n
+ * else: v0
+ * 3. Divide: div,rem = g0 / g1
+ * t1 = v0 - div * v1
+ * v0 = v1
+ * v1 = t1
+ * g0 = g1
+ * g1 = rem
+ * continue with step 2.
+ */
+ MPI g0, g1, v0, v1, div, rem, t1;
+
+ g0 = mpi_copy(v);
+ g1 = mpi_copy(u);
+ v0 = mpi_alloc_set_ui( 0 );
+ v1 = mpi_alloc_set_ui( 1 );
+ div = mpi_alloc(mpi_get_nlimbs(v));
+ rem = mpi_alloc(mpi_get_nlimbs(v));
+ t1 = mpi_alloc(mpi_get_nlimbs(v));
+ while( mpi_cmp_ui( g1, 0) ) {
+ mpi_fdiv_qr(div, rem, g0, g1);
+ mpi_mul(t1, div, v1);
+ mpi_sub(t1, v0, t1);
+ mpi_set(v0, v1);
+ mpi_set(v1, t1);
+ mpi_set(g0, g1);
+ mpi_set(g1, rem);
+
+ }
+ if( mpi_cmp_ui( v0, 0) < 0 )
+ mpi_add( x, v0, v);
+ else
+ mpi_set( x, v0);
+
+ mpi_free(g0);
+ mpi_free(g1);
+ mpi_free(v0);
+ mpi_free(v1);
+ mpi_free(div);
+ mpi_free(rem);
+ mpi_free(t1);
+ return 0;
+}
+
+
+
diff --git a/mpi/mpi-mul.c b/mpi/mpi-mul.c
new file mode 100644
index 00000000..03f2b4b4
--- /dev/null
+++ b/mpi/mpi-mul.c
@@ -0,0 +1,178 @@
+/* mpi-mul.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+
+void
+mpi_mul_ui( MPI prod, MPI mult, unsigned long small_mult )
+{
+ mpi_size_t size, prod_size;
+ mpi_ptr_t prod_ptr;
+ mpi_limb_t cy;
+ int sign;
+
+ size = mult->nlimbs;
+ sign = mult->sign;
+
+ if( !size || !small_mult ) {
+ prod->nlimbs = 0;
+ prod->sign = 0;
+ return;
+ }
+
+ prod_size = size + 1;
+ if( prod->alloced < prod_size )
+ mpi_resize( prod, prod_size );
+ prod_ptr = prod->d;
+
+ cy = mpihelp_mul_1( prod_ptr, mult->d, size, (mpi_limb_t)small_mult );
+ if( cy )
+ prod_ptr[size++] = cy;
+ prod->nlimbs = size;
+ prod->sign = sign;
+}
+
+
+void
+mpi_mul_2exp( MPI w, MPI u, unsigned long cnt)
+{
+ mpi_size_t usize, wsize, limb_cnt;
+ mpi_ptr_t wp;
+ mpi_limb_t wlimb;
+ int usign, wsign;
+
+ usize = u->nlimbs;
+ usign = u->sign;
+
+ if( !usize ) {
+ w->nlimbs = 0;
+ w->sign = 0;
+ return;
+ }
+
+ limb_cnt = cnt / BITS_PER_MPI_LIMB;
+ wsize = usize + limb_cnt + 1;
+ if( w->alloced < wsize )
+ mpi_resize(w, wsize );
+ wp = w->d;
+ wsize = usize + limb_cnt;
+ wsign = usign;
+
+ cnt %= BITS_PER_MPI_LIMB;
+ if( cnt ) {
+ wlimb = mpihelp_lshift( wp + limb_cnt, u->d, usize, cnt );
+ if( wlimb ) {
+ wp[wsize] = wlimb;
+ wsize++;
+ }
+ }
+ else {
+ MPN_COPY_DECR( wp + limb_cnt, u->d, usize );
+ }
+
+ /* Zero all whole limbs at low end. Do it here and not before calling
+ * mpn_lshift, not to lose for U == W. */
+ MPN_ZERO( wp, limb_cnt );
+
+ w->nlimbs = wsize;
+ w->sign = wsign;
+}
+
+
+
+void
+mpi_mul( MPI w, MPI u, MPI v)
+{
+ mpi_size_t usize, vsize, wsize;
+ mpi_ptr_t up, vp, wp;
+ mpi_limb_t cy;
+ int usign, vsign, sign_product;
+ int assign_wp=0;
+ mpi_ptr_t tmp_limb=NULL;
+
+ if( u->nlimbs < v->nlimbs ) { /* Swap U and V. */
+ usize = v->nlimbs;
+ usign = v->sign;
+ up = v->d;
+ vsize = u->nlimbs;
+ vsign = u->sign;
+ vp = u->d;
+ }
+ else {
+ usize = u->nlimbs;
+ usign = u->sign;
+ up = u->d;
+ vsize = v->nlimbs;
+ vsign = v->sign;
+ vp = v->d;
+ }
+ sign_product = usign ^ vsign;
+ wp = w->d;
+
+ /* Ensure W has space enough to store the result. */
+ wsize = usize + vsize;
+ if( w->alloced < wsize ) {
+ if( wp == up || wp == vp ) {
+ wp = mpi_alloc_limb_space( wsize );
+ assign_wp = 1;
+ }
+ else {
+ mpi_resize(w, wsize );
+ wp = w->d;
+ }
+ }
+ else { /* Make U and V not overlap with W. */
+ if( wp == up ) {
+ /* W and U are identical. Allocate temporary space for U. */
+ up = tmp_limb = mpi_alloc_limb_space( usize );
+ /* Is V identical too? Keep it identical with U. */
+ if( wp == vp )
+ vp = up;
+ /* Copy to the temporary space. */
+ MPN_COPY( up, wp, usize );
+ }
+ else if( wp == vp ) {
+ /* W and V are identical. Allocate temporary space for V. */
+ vp = tmp_limb = mpi_alloc_limb_space( vsize );
+ /* Copy to the temporary space. */
+ MPN_COPY( vp, wp, vsize );
+ }
+ }
+
+ if( !vsize )
+ wsize = 0;
+ else {
+ cy = mpihelp_mul( wp, up, usize, vp, vsize );
+ wsize -= cy? 0:1;
+ }
+
+ if( assign_wp )
+ mpi_assign_limb_space( w, wp, wsize );
+ w->nlimbs = wsize;
+ w->sign = sign_product;
+ if( tmp_limb )
+ mpi_free_limb_space( tmp_limb );
+}
+
+
diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c
new file mode 100644
index 00000000..14fe4de4
--- /dev/null
+++ b/mpi/mpi-pow.c
@@ -0,0 +1,247 @@
+/* mpi-pow.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#include <assert.h>
+
+
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+void
+mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
+{
+ mpi_ptr_t rp, ep, mp, bp;
+ mpi_size_t esize, msize, bsize, rsize;
+ int esign, msign, bsign, rsign;
+ mpi_size_t size;
+ int mod_shift_cnt;
+ int negative_result;
+ mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL;
+ mpi_ptr_t xp_marker=NULL;
+ int assign_rp=0;
+
+ esize = exp->nlimbs;
+ msize = mod->nlimbs;
+ size = 2 * msize;
+ esign = exp->sign;
+ msign = mod->sign;
+
+ rp = res->d;
+ ep = exp->d;
+
+ if( !msize )
+ msize = 1 / msize; /* provoke a signal */
+
+ if( !esize ) {
+ /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+ * depending on if MOD equals 1. */
+ rp[0] = 1;
+ res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+ res->sign = 0;
+ goto leave;
+ }
+
+ /* Normalize MOD (i.e. make its most significant bit set) as required by
+ * mpn_divrem. This will make the intermediate values in the calculation
+ * slightly larger, but the correct result is obtained after a final
+ * reduction using the original MOD value. */
+ mp = mp_marker = mpi_alloc_limb_space(msize);
+ count_leading_zeros( mod_shift_cnt, mod->d[msize-1] );
+ if( mod_shift_cnt )
+ mpihelp_lshift( mp, mod->d, msize, mod_shift_cnt );
+ else
+ MPN_COPY( mp, mod->d, msize );
+
+ bsize = base->nlimbs;
+ bsign = base->sign;
+ if( bsize > msize ) { /* The base is larger than the module. Reduce it. */
+ /* Allocate (BSIZE + 1) with space for remainder and quotient.
+ * (The quotient is (bsize - msize + 1) limbs.) */
+ bp = bp_marker = mpi_alloc_limb_space( bsize + 1);
+ MPN_COPY( bp, base->d, bsize );
+ /* We don't care about the quotient, store it above the remainder,
+ * at BP + MSIZE. */
+ mpihelp_divrem( bp + msize, 0, bp, bsize, mp, msize );
+ bsize = msize;
+ /* Canonicalize the base, since we are going to multiply with it
+ * quite a few times. */
+ MPN_NORMALIZE( bp, bsize );
+ }
+ else
+ bp = base->d;
+
+ if( !bsize ) {
+ res->nlimbs = 0;
+ res->sign = 0;
+ goto leave;
+ }
+
+ if( res->alloced < size ) {
+ /* We have to allocate more space for RES. If any of the input
+ * parameters are identical to RES, defer deallocation of the old
+ * space. */
+ if( rp == ep || rp == mp || rp == bp ) {
+ rp = mpi_alloc_limb_space( size );
+ assign_rp = 1;
+ }
+ else {
+ mpi_resize( res, size );
+ rp = res->d;
+ }
+ }
+ else { /* Make BASE, EXP and MOD not overlap with RES. */
+ if( rp == bp ) {
+ /* RES and BASE are identical. Allocate temp. space for BASE. */
+ assert( !bp_marker );
+ bp = bp_marker = mpi_alloc_limb_space( bsize );
+ MPN_COPY(bp, rp, bsize);
+ }
+ if( rp == ep ) {
+ /* RES and EXP are identical. Allocate temp. space for EXP. */
+ ep = ep_marker = mpi_alloc_limb_space( esize );
+ MPN_COPY(ep, rp, esize);
+ }
+ if( rp == mp ) {
+ /* RES and MOD are identical. Allocate temporary space for MOD.*/
+ assert( !mp_marker );
+ mp = mp_marker = mpi_alloc_limb_space( msize );
+ MPN_COPY(mp, rp, msize);
+ }
+ }
+
+ MPN_COPY( rp, bp, bsize );
+ rsize = bsize;
+ rsign = bsign;
+
+ {
+ mpi_size_t i;
+ mpi_ptr_t xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1) );
+ int c;
+ mpi_limb_t e;
+ mpi_limb_t carry_limb;
+
+ negative_result = (ep[0] & 1) && base->sign;
+
+ i = esize - 1;
+ e = ep[i];
+ count_leading_zeros (c, e);
+ e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
+ c = BITS_PER_MPI_LIMB - 1 - c;
+
+ /* Main loop.
+ *
+ * Make the result be pointed to alternately by XP and RP. This
+ * helps us avoid block copying, which would otherwise be necessary
+ * with the overlap restrictions of mpihelp_divmod. With 50% probability
+ * the result after this loop will be in the area originally pointed
+ * by RP (==RES->d), and with 50% probability in the area originally
+ * pointed to by XP.
+ */
+ for(;;) {
+ while( c ) {
+ mpi_ptr_t tp;
+ mpi_size_t xsize;
+
+ mpihelp_mul_n(xp, rp, rp, rsize);
+ xsize = 2 * rsize;
+ if( xsize > msize ) {
+ mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+
+ if( (mpi_limb_signed_t)e < 0 ) {
+ mpihelp_mul( xp, rp, rsize, bp, bsize );
+ xsize = rsize + bsize;
+ if( xsize > msize ) {
+ mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
+ xsize = msize;
+ }
+
+ tp = rp; rp = xp; xp = tp;
+ rsize = xsize;
+ }
+ e <<= 1;
+ c--;
+ }
+
+ i--;
+ if( i < 0 )
+ break;
+ e = ep[i];
+ c = BITS_PER_MPI_LIMB;
+ }
+
+ /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+ * steps. Adjust the result by reducing it with the original MOD.
+ *
+ * Also make sure the result is put in RES->d (where it already
+ * might be, see above).
+ */
+ if( mod_shift_cnt ) {
+ carry_limb = mpihelp_lshift( res->d, rp, rsize, mod_shift_cnt);
+ rp = res->d;
+ if( carry_limb ) {
+ rp[rsize] = carry_limb;
+ rsize++;
+ }
+ }
+ else {
+ MPN_COPY( res->d, rp, rsize);
+ rp = res->d;
+ }
+
+ if( rsize >= msize ) {
+ mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+ rsize = msize;
+ }
+
+ /* Remove any leading zero words from the result. */
+ if( mod_shift_cnt )
+ mpihelp_rshift( rp, rp, rsize, mod_shift_cnt);
+ MPN_NORMALIZE (rp, rsize);
+ }
+
+ if( negative_result && rsize ) {
+ if( mod_shift_cnt )
+ mpihelp_rshift( mp, mp, msize, mod_shift_cnt);
+ mpihelp_sub( rp, mp, msize, rp, rsize);
+ rsize = msize;
+ rsign = msign;
+ MPN_NORMALIZE(rp, rsize);
+ }
+ res->nlimbs = rsize;
+ res->sign = rsign;
+
+ leave:
+ if( assign_rp ) mpi_assign_limb_space( res, rp, size );
+ if( mp_marker ) mpi_free_limb_space( mp_marker );
+ if( bp_marker ) mpi_free_limb_space( bp_marker );
+ if( ep_marker ) mpi_free_limb_space( ep_marker );
+ if( xp_marker ) mpi_free_limb_space( xp_marker );
+}
+
diff --git a/mpi/mpi-scan.c b/mpi/mpi-scan.c
new file mode 100644
index 00000000..8626032a
--- /dev/null
+++ b/mpi/mpi-scan.c
@@ -0,0 +1,88 @@
+/* mpi-scan.c - MPI functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/****************
+ * Scan through an mpi and return byte for byte. a -1 is returned to indicate
+ * the end of the mpi. Scanning is done from the lsb to the msb, returned
+ * values are in the range of 0 .. 255.
+ *
+ * FIXME: This code is VERY ugly!
+ */
+int
+mpi_getbyte( MPI a, unsigned index )
+{
+ int i, j;
+ unsigned n;
+ mpi_ptr_t ap;
+ mpi_limb_t limb;
+
+ ap = a->d;
+ for(n=0,i=0; i < a->nlimbs; i++ ) {
+ limb = ap[i];
+ for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
+ if( n == index )
+ return (limb >> j*8) & 0xff;
+ }
+ return -1;
+}
+
+
+/****************
+ * Put a value at position INDEX into A. index counts from lsb to msb
+ */
+void
+mpi_putbyte( MPI a, unsigned index, int c )
+{
+ int i, j;
+ unsigned n;
+ mpi_ptr_t ap;
+ mpi_limb_t limb;
+
+#if BYTES_PER_MPI_LIMB != 4
+ #error please enhance this function, its ugly - i know.
+#endif
+ c &= 0xff;
+ ap = a->d;
+ for(n=0,i=0; i < a->alloced; i++ ) {
+ limb = ap[i];
+ for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
+ if( n == index ) {
+ if( j == 0 )
+ limb = (limb & 0xffffff00) | c;
+ else if( j == 1 )
+ limb = (limb & 0xffff00ff) | (c<<8);
+ else if( j == 2 )
+ limb = (limb & 0xff00ffff) | (c<<16);
+ else
+ limb = (limb & 0x00ffffff) | (c<<24);
+ if( a->nlimbs <= i )
+ a->nlimbs = i+1;
+ ap[i] = limb;
+ return;
+ }
+ }
+ abort(); /* index out of range */
+}
+
diff --git a/mpi/mpicoder.c b/mpi/mpicoder.c
new file mode 100644
index 00000000..23454c0f
--- /dev/null
+++ b/mpi/mpicoder.c
@@ -0,0 +1,392 @@
+/* mpicoder.c - Coder for the external representation of MPIs
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "mpi.h"
+#include "iobuf.h"
+#include "memory.h"
+#include "util.h"
+
+#ifdef M_DEBUG
+ #undef mpi_decode
+ #undef mpi_decode_buffer
+#endif
+
+#define MAX_EXTERN_MPI_BITS 16384
+
+/****************
+ * write an mpi to out.
+ */
+int
+mpi_encode( IOBUF out, MPI a )
+{
+ u16 dummy;
+ return mpi_encode_csum( out, a, &dummy );
+}
+
+int
+mpi_encode_csum( IOBUF out, MPI a, u16 *csum )
+{
+ int i;
+ byte c;
+ unsigned nbits = a->nlimbs * BITS_PER_MPI_LIMB;
+ mpi_limb_t limb;
+
+#if BYTES_PER_MPI_LIMB != 4
+ #error Make this function work with other LIMB sizes
+#endif
+ if( nbits > MAX_EXTERN_MPI_BITS )
+ log_bug("mpi_encode: mpi too large (%u bits)\n", nbits);
+ iobuf_put(out, (c=nbits >>8) ); *csum += c;
+ iobuf_put(out, (c=nbits) ); *csum += c;
+ for(i=a->nlimbs-1; i >= 0; i-- ) {
+ limb = a->d[i];
+ iobuf_put(out, (c=limb >> 24) ); *csum += c;
+ iobuf_put(out, (c=limb >> 16) ); *csum += c;
+ iobuf_put(out, (c=limb >> 8) ); *csum += c;
+ iobuf_put(out, (c=limb ) ); *csum += c;
+ }
+ return 0;
+}
+
+/****************
+ * encode the MPI into a newly allocated buffer, the buffer is
+ * so constructed, that it can be used for mpi_write. The caller
+ * must free the returned buffer. The buffer is allocated in the same
+ * type of memory space as A is.
+ */
+byte *
+mpi_encode_buffer( MPI a )
+{
+ abort();
+ return NULL;
+}
+
+/****************
+ * write an mpi to out. This is a special function to handle
+ * encrypted values. It simply writes the buffer a to OUT.
+ * A is a special buffer, starting with 2 bytes giving it's length
+ * (in big endian order) and 2 bytes giving it's length in bits (also
+ * big endian)
+ */
+int
+mpi_write( IOBUF out, byte *a)
+{
+ u16 dummy;
+ return mpi_write_csum( out, a, &dummy );
+}
+
+int
+mpi_write_csum( IOBUF out, byte *a, u16 *csum)
+{
+ int rc;
+ unsigned n;
+
+ n = *a++ << 8;
+ n |= *a++;
+ rc = iobuf_write(out, a, n );
+ for( ; n; n--, a++ )
+ *csum += *a;
+ return rc;
+}
+
+/****************
+ * Decode an external representation and return an MPI
+ * The external format is a 16 bit unsigned value stored in network byte order,
+ * giving the number of bits for the following integer. The integer is stored
+ * with MSB first (left padded with zeroes to align on a byte boundary).
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_decode(IOBUF inp, unsigned *ret_nread, const char *info)
+#else
+mpi_decode(IOBUF inp, unsigned *ret_nread)
+#endif
+{
+ int c, i, j;
+ unsigned nbits, nbytes, nlimbs, nread=0;
+ mpi_limb_t a;
+ MPI val = MPI_NULL;
+
+ if( (c = iobuf_get(inp)) == -1 )
+ goto leave;
+ nbits = c << 8;
+ if( (c = iobuf_get(inp)) == -1 )
+ goto leave;
+ nbits |= c;
+ if( nbits > MAX_EXTERN_MPI_BITS ) {
+ log_error("mpi too large (%u bits)\n", nbits);
+ goto leave;
+ }
+ nread = 2;
+
+ nbytes = (nbits+7) / 8;
+ nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+ #ifdef M_DEBUG
+ val = mpi_debug_alloc( nlimbs, info );
+ #else
+ val = mpi_alloc( nlimbs );
+ #endif
+ i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+ i %= BYTES_PER_MPI_LIMB;
+ j= val->nlimbs = nlimbs;
+ val->sign = 0;
+ for( ; j > 0; j-- ) {
+ a = 0;
+ for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+ a <<= 8;
+ a |= iobuf_get(inp) & 0xff; nread++;
+ }
+ i = 0;
+ val->d[j-1] = a;
+ }
+
+ leave:
+ if( nread > *ret_nread )
+ log_error("Ooops: mpi crosses packet border");
+ else
+ *ret_nread = nread;
+ return val;
+}
+
+
+/****************
+ * Decode an MPI from the buffer, the buffer starts with two bytes giving
+ * the length of the data to follow, the original data follows.
+ * The MPI is alloced from secure MPI space
+ */
+MPI
+#ifdef M_DEBUG
+mpi_debug_decode_buffer(byte *buffer, const char *info )
+#else
+mpi_decode_buffer(byte *buffer )
+#endif
+{
+ int i, j;
+ u16 buflen;
+ unsigned nbits, nbytes, nlimbs;
+ mpi_limb_t a;
+ byte *p = buffer;
+ MPI val;
+
+ if( !buffer )
+ log_bug("mpi_decode_buffer: no buffer\n");
+ buflen = *p++ << 8;
+ buflen |= *p++;
+ nbits = *p++ << 8;
+ nbits |= *p++;
+ nbytes = (nbits+7) / 8;
+ if( nbytes+2 != buflen )
+ log_bug("mpi_decode_buffer: length conflict\n");
+ nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+ #ifdef M_DEBUG
+ val = mpi_debug_alloc_secure( nlimbs, info );
+ #else
+ val = mpi_alloc_secure( nlimbs );
+ #endif
+ i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+ i %= BYTES_PER_MPI_LIMB;
+ j= val->nlimbs = nlimbs;
+ val->sign = 0;
+ for( ; j > 0; j-- ) {
+ a = 0;
+ for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+ a <<= 8;
+ a |= *p++;
+ }
+ i = 0;
+ val->d[j-1] = a;
+ }
+ return val;
+}
+
+
+/****************
+ * Read a MPI from the external medium and return it in a newly allocated
+ * buffer (This buffer is allocated in the secure memory space, because
+ * we properly need this to decipher this string).
+ * Return: the allocated string and in RET_NREAD the number of bytes
+ * read (including the 2 length bytes), the returned buffer will
+ * be prefixed with two bytes describing the length of the following
+ * data.
+ */
+byte *
+mpi_read(IOBUF inp, unsigned *ret_nread)
+{
+ int c;
+ u16 buflen;
+ unsigned nbits, nbytes, nread;
+ byte *p, *buf;
+
+ if( (c = iobuf_get(inp)) == -1 )
+ return NULL;
+ nbits = c << 8;
+ if( (c = iobuf_get(inp)) == -1 )
+ return NULL;
+ nbits |= c;
+ if( nbits > MAX_EXTERN_MPI_BITS ) {
+ log_error("mpi too large (%u bits)\n", nbits);
+ return NULL;
+ }
+ nread = 2;
+
+ nbytes = (nbits+7) / 8;
+ buflen = nbytes + 2;
+ p = buf = m_alloc_secure( buflen+2 );
+ *p++ = buflen >> 8;
+ *p++ = buflen & 0xff;
+ *p++ = nbits >> 8;
+ *p++ = nbits & 0xff;
+ for( ; nbytes ; nbytes--, nread++ )
+ *p++ = iobuf_get(inp) & 0xff;
+
+ if( nread > *ret_nread )
+ log_error("Ooops: mpi crosses packet border");
+ else
+ *ret_nread = nread;
+ return buf;
+}
+
+
+/****************
+ * Make a mpi from a character string.
+ */
+int
+mpi_fromstr(MPI val, const char *str)
+{
+ int hexmode=0, sign=0, prepend_zero=0, i, j, c, c1, c2;
+ unsigned nbits, nbytes, nlimbs;
+ mpi_limb_t a;
+
+ if( *str == '-' ) {
+ sign = 1;
+ str++;
+ }
+ if( *str == '0' && str[1] == 'x' )
+ hexmode = 1;
+ else
+ return 1; /* other bases are not yet supported */
+ str += 2;
+
+ nbits = strlen(str)*4;
+ if( nbits % 8 )
+ prepend_zero = 1;
+ nbytes = (nbits+7) / 8;
+ nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+ if( val->alloced < nlimbs )
+ mpi_resize(val, nlimbs );
+ i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+ i %= BYTES_PER_MPI_LIMB;
+ j= val->nlimbs = nlimbs;
+ val->sign = sign;
+ for( ; j > 0; j-- ) {
+ a = 0;
+ for(; i < BYTES_PER_MPI_LIMB; i++ ) {
+ if( prepend_zero ) {
+ c1 = '0';
+ prepend_zero = 0;
+ }
+ else
+ c1 = *str++;
+ assert(c1);
+ c2 = *str++;
+ assert(c2);
+ if( c1 >= '0' && c1 <= '9' )
+ c = c1 - '0';
+ else if( c1 >= 'a' && c1 <= 'f' )
+ c = c1 - 'a' + 10;
+ else if( c1 >= 'A' && c1 <= 'F' )
+ c = c1 - 'A' + 10;
+ else {
+ mpi_clear(val);
+ return 1;
+ }
+ c <<= 4;
+ if( c2 >= '0' && c2 <= '9' )
+ c |= c2 - '0';
+ else if( c2 >= 'a' && c2 <= 'f' )
+ c |= c2 - 'a' + 10;
+ else if( c2 >= 'A' && c2 <= 'F' )
+ c |= c2 - 'A' + 10;
+ else {
+ mpi_clear(val);
+ return 1;
+ }
+ a <<= 8;
+ a |= c;
+ }
+ i = 0;
+ val->d[j-1] = a;
+ }
+
+ return 0;
+}
+
+
+/****************
+ * print an MPI to the give stream and return the number of characters
+ * printed.
+ */
+int
+mpi_print( FILE *fp, MPI a, int mode )
+{
+ int i, n=0;
+
+ if( a == MPI_NULL )
+ return fprintf(fp, "[MPI_NULL]");
+ if( !mode )
+ n += fprintf(fp, "[%d bits]", a->nlimbs * BITS_PER_MPI_LIMB );
+ else {
+ if( a->sign )
+ putc('-', fp);
+ for(i=a->nlimbs; i > 0 ; i-- ) {
+ n += fprintf(fp, i!=a->nlimbs? "%0" STR2(BYTES_PER_MPI_LIMB2)
+ "lX":"%lX", (unsigned long)a->d[i-1] );
+ }
+ if( !a->nlimbs )
+ putc('0', fp );
+ }
+ return n;
+}
+
+
+/****************
+ * Special function to get the low 8 bytes from a mpi,
+ * this can be used as a keyid, KEYID is an 2 element array.
+ * Does return the low 4 bytes.
+ */
+u32
+mpi_get_keyid( MPI a, u32 *keyid )
+{
+#if BYTES_PER_MPI_LIMB != 4
+ #error Make this function work with other LIMB sizes
+#endif
+ if( keyid ) {
+ keyid[0] = a->nlimbs >= 2? a->d[1] : 0;
+ keyid[1] = a->nlimbs >= 1? a->d[0] : 0;
+ }
+ return a->nlimbs >= 1? a->d[0] : 0;
+}
+
+
diff --git a/mpi/mpih-add.c b/mpi/mpih-add.c
new file mode 100644
index 00000000..90ce8d76
--- /dev/null
+++ b/mpi/mpih-add.c
@@ -0,0 +1,109 @@
+/* mpihelp-add.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+/****************
+ * Add to S1_PTR with size S1_SIZE the limb S2_LIMB and
+ * store the result in RES_PTR. Return the carry
+ * S1_SIZE must be > 0.
+ */
+/*_EXTERN_INLINE */
+mpi_limb_t
+mpihelp_add_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+ mpi_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb += x;
+ *res_ptr++ = s2_limb;
+ if( s2_limb < x ) { /* sum is less than the left operand: handle carry */
+ while( --s1_size ) {
+ x = *s1_ptr++ + 1; /* add carry */
+ *res_ptr++ = x; /* and store */
+ if( x ) /* not 0 (no overflow): we can stop */
+ goto leave;
+ }
+ return 1; /* return carry (size of s1 to small) */
+ }
+
+ leave:
+ if( res_ptr != s1_ptr ) { /* not the same variable */
+ mpi_size_t i; /* copy the rest */
+ for( i=0; i < s1_size-1; i++ )
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0; /* no carry */
+}
+
+
+/* FIXME: this should be done in assembly */
+mpi_limb_t
+mpihelp_add_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+ mpi_limb_t x, y, cy;
+ mpi_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to one addend */
+ cy = y < cy? 1:0; /* get out carry from that addition */
+ y += x; /* add other addend */
+ cy += y < x? 1:0; /* get out carry from that add, combine */
+ res_ptr[j] = y;
+ } while( ++j );
+
+ return cy;
+}
+
+
+/*_EXTERN_INLINE*/
+mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+ mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+ mpi_limb_t cy = 0;
+
+ if( s2_size )
+ cy = mpihelp_add_n( res_ptr, s1_ptr, s2_ptr, s2_size );
+
+ if( s1_size - s2_size )
+ cy = mpihelp_add_1( res_ptr + s2_size, s1_ptr + s2_size,
+ s1_size - s2_size, cy);
+ return cy;
+}
+
diff --git a/mpi/mpih-cmp.c b/mpi/mpih-cmp.c
new file mode 100644
index 00000000..821c0ce8
--- /dev/null
+++ b/mpi/mpih-cmp.c
@@ -0,0 +1,53 @@
+/* mpihelp-sub.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int
+mpihelp_cmp( mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size )
+{
+ mpi_size_t i;
+ mpi_limb_t op1_word, op2_word;
+
+ for( i = size - 1; i >= 0 ; i--) {
+ op1_word = op1_ptr[i];
+ op2_word = op2_ptr[i];
+ if( op1_word != op2_word )
+ goto diff;
+ }
+ return 0;
+
+ diff:
+ /* This can *not* be simplified to
+ * op2_word - op2_word
+ * since that expression might give signed overflow. */
+ return (op1_word > op2_word) ? 1 : -1;
+}
+
diff --git a/mpi/mpih-div.c b/mpi/mpih-div.c
new file mode 100644
index 00000000..ca939a75
--- /dev/null
+++ b/mpi/mpih-div.c
@@ -0,0 +1,528 @@
+/* mpihelp-div.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#ifndef UMUL_TIME
+ #define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+ #define UDIV_TIME UMUL_TIME
+#endif
+
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+ * here (not udiv_qrnnd).
+ */
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+ mpi_limb_t divisor_limb)
+{
+ mpi_size_t i;
+ mpi_limb_t n1, n0, r;
+ int dummy;
+
+ /* Botch: Should this be handled at all? Rely on callers? */
+ if( !dividend_size )
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ * dividend is large, pre-invert the divisor, and use
+ * only multiplications in the inner loop.
+ *
+ * This test should be read:
+ * Does it ever help to use udiv_qrnnd_preinv?
+ * && Does what we save compensate for the inversion overhead?
+ */
+ if( UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
+ int normalization_steps;
+
+ count_leading_zeros( normalization_steps, divisor_limb );
+ if( normalization_steps ) {
+ mpi_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+
+ /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
+ * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+ * most significant bit (with weight 2**N) implicit.
+ *
+ * Special case for DIVISOR_LIMB == 100...000.
+ */
+ if( !(divisor_limb << 1) )
+ divisor_limb_inverted = ~(mpi_limb_t)0;
+ else
+ udiv_qrnnd(divisor_limb_inverted, dummy,
+ -divisor_limb, 0, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ * if (r == 0
+ * && divisor_limb > ((n1 << normalization_steps)
+ * | (dividend_ptr[dividend_size - 2] >> ...)))
+ * ...one division less...
+ */
+ for( i = dividend_size - 2; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ UDIV_QRNND_PREINV(dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ UDIV_QRNND_PREINV(dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else {
+ mpi_limb_t divisor_limb_inverted;
+
+ /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
+ * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+ * most significant bit (with weight 2**N) implicit.
+ *
+ * Special case for DIVISOR_LIMB == 100...000.
+ */
+ if( !(divisor_limb << 1) )
+ divisor_limb_inverted = ~(mpi_limb_t)0;
+ else
+ udiv_qrnnd(divisor_limb_inverted, dummy,
+ -divisor_limb, 0, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if( r >= divisor_limb )
+ r = 0;
+ else
+ i--;
+
+ for( ; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ UDIV_QRNND_PREINV(dummy, r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else {
+ if( UDIV_NEEDS_NORMALIZATION ) {
+ int normalization_steps;
+
+ count_leading_zeros(normalization_steps, divisor_limb);
+ if( normalization_steps ) {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ * if (r == 0
+ * && divisor_limb > ((n1 << normalization_steps)
+ * | (dividend_ptr[dividend_size - 2] >> ...)))
+ * ...one division less...
+ */
+ for(i = dividend_size - 2; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (dummy, r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ * it, or because DIVISOR_LIMB is already normalized. */
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if(r >= divisor_limb)
+ r = 0;
+ else
+ i--;
+
+ for(; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (dummy, r, r, n0, divisor_limb);
+ }
+ return r;
+ }
+}
+
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP. If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ * QP + DSIZE >= NP must hold true. (This means that it's
+ * possible to put the quotient in the high part of NUM, right after the
+ * remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+
+mpi_limb_t
+mpihelp_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
+ mpi_ptr_t np, mpi_size_t nsize,
+ mpi_ptr_t dp, mpi_size_t dsize)
+{
+ mpi_limb_t most_significant_q_limb = 0;
+
+ switch(dsize) {
+ case 0:
+ /* We are asked to divide by zero, so go ahead and do it! (To make
+ the compiler not remove this statement, return the value.) */
+ return 1 / dsize;
+
+ case 1:
+ {
+ mpi_size_t i;
+ mpi_limb_t n1;
+ mpi_limb_t d;
+
+ d = dp[0];
+ n1 = np[nsize - 1];
+
+ if( n1 >= d ) {
+ n1 -= d;
+ most_significant_q_limb = 1;
+ }
+
+ qp += qextra_limbs;
+ for( i = nsize - 2; i >= 0; i--)
+ udiv_qrnnd( qp[i], n1, n1, np[i], d );
+ qp -= qextra_limbs;
+
+ for( i = qextra_limbs - 1; i >= 0; i-- )
+ udiv_qrnnd (qp[i], n1, n1, 0, d);
+
+ np[0] = n1;
+ }
+ break;
+
+ case 2:
+ {
+ mpi_size_t i;
+ mpi_limb_t n1, n0, n2;
+ mpi_limb_t d1, d0;
+
+ np += nsize - 2;
+ d1 = dp[1];
+ d0 = dp[0];
+ n1 = np[1];
+ n0 = np[0];
+
+ if( n1 >= d1 && (n1 > d1 || n0 >= d0) ) {
+ sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ most_significant_q_limb = 1;
+ }
+
+ for( i = qextra_limbs + nsize - 2 - 1; i >= 0; i-- ) {
+ mpi_limb_t q;
+ mpi_limb_t r;
+
+ if( i >= qextra_limbs )
+ np--;
+ else
+ np[0] = 0;
+
+ if( n1 == d1 ) {
+ /* Q should be either 111..111 or 111..110. Need special
+ * treatment of this rare case as normal division would
+ * give overflow. */
+ q = ~(mpi_limb_t)0;
+
+ r = n0 + d1;
+ if( r < d1 ) { /* Carry in the addition? */
+ add_ssaaaa( n1, n0, r - d0, np[0], 0, d0 );
+ qp[i] = q;
+ continue;
+ }
+ n1 = d0 - (d0 != 0?1:0);
+ n0 = -d0;
+ }
+ else {
+ udiv_qrnnd (q, r, n1, n0, d1);
+ umul_ppmm (n1, n0, d0, q);
+ }
+
+ n2 = np[0];
+ q_test:
+ if( n1 > r || (n1 == r && n0 > n2) ) {
+ /* The estimated Q was too large. */
+ q--;
+ sub_ddmmss (n1, n0, n1, n0, 0, d0);
+ r += d1;
+ if( r >= d1 ) /* If not carry, test Q again. */
+ goto q_test;
+ }
+
+ qp[i] = q;
+ sub_ddmmss (n1, n0, r, n2, n1, n0);
+ }
+ np[1] = n1;
+ np[0] = n0;
+ }
+ break;
+
+ default:
+ {
+ mpi_size_t i;
+ mpi_limb_t dX, d1, n0;
+
+ np += nsize - dsize;
+ dX = dp[dsize - 1];
+ d1 = dp[dsize - 2];
+ n0 = np[dsize - 1];
+
+ if( n0 >= dX ) {
+ if(n0 > dX || mpihelp_cmp(np, dp, dsize - 1) >= 0 ) {
+ mpihelp_sub_n(np, np, dp, dsize);
+ n0 = np[dsize - 1];
+ most_significant_q_limb = 1;
+ }
+ }
+
+ for( i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+ mpi_limb_t q;
+ mpi_limb_t n1, n2;
+ mpi_limb_t cy_limb;
+
+ if( i >= qextra_limbs ) {
+ np--;
+ n2 = np[dsize];
+ }
+ else {
+ n2 = np[dsize - 1];
+ MPN_COPY_DECR (np + 1, np, dsize);
+ np[0] = 0;
+ }
+
+ if( n0 == dX ) {
+ /* This might over-estimate q, but it's probably not worth
+ * the extra code here to find out. */
+ q = ~(mpi_limb_t)0;
+ }
+ else {
+ mpi_limb_t r;
+
+ udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+ umul_ppmm(n1, n0, d1, q);
+
+ while( n1 > r || (n1 == r && n0 > np[dsize - 2])) {
+ q--;
+ r += dX;
+ if( r < dX ) /* I.e. "carry in previous addition?" */
+ break;
+ n1 -= n0 < d1;
+ n0 -= d1;
+ }
+ }
+
+ /* Possible optimization: We already have (q * n0) and (1 * n1)
+ * after the calculation of q. Taking advantage of that, we
+ * could make this loop make two iterations less. */
+ cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+
+ if( n2 != cy_limb ) {
+ mpihelp_add_n(np, np, dp, dsize);
+ q--;
+ }
+
+ qp[i] = q;
+ n0 = np[dsize - 1];
+ }
+ }
+ }
+
+ return most_significant_q_limb;
+}
+
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1( mpi_ptr_t quot_ptr,
+ mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+ mpi_limb_t divisor_limb)
+{
+ mpi_size_t i;
+ mpi_limb_t n1, n0, r;
+ int dummy;
+
+ if( !dividend_size )
+ return 0;
+
+ /* If multiplication is much faster than division, and the
+ * dividend is large, pre-invert the divisor, and use
+ * only multiplications in the inner loop.
+ *
+ * This test should be read:
+ * Does it ever help to use udiv_qrnnd_preinv?
+ * && Does what we save compensate for the inversion overhead?
+ */
+ if( UDIV_TIME > (2 * UMUL_TIME + 6)
+ && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME ) {
+ int normalization_steps;
+
+ count_leading_zeros( normalization_steps, divisor_limb );
+ if( normalization_steps ) {
+ mpi_limb_t divisor_limb_inverted;
+
+ divisor_limb <<= normalization_steps;
+
+ /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
+ * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+ * most significant bit (with weight 2**N) implicit.
+ */
+ /* Special case for DIVISOR_LIMB == 100...000. */
+ if( !(divisor_limb << 1) )
+ divisor_limb_inverted = ~(mpi_limb_t)0;
+ else
+ udiv_qrnnd(divisor_limb_inverted, dummy,
+ -divisor_limb, 0, divisor_limb);
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ * if (r == 0
+ * && divisor_limb > ((n1 << normalization_steps)
+ * | (dividend_ptr[dividend_size - 2] >> ...)))
+ * ...one division less...
+ */
+ for( i = dividend_size - 2; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ UDIV_QRNND_PREINV( quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+ divisor_limb, divisor_limb_inverted);
+ n1 = n0;
+ }
+ UDIV_QRNND_PREINV( quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb, divisor_limb_inverted);
+ return r >> normalization_steps;
+ }
+ else {
+ mpi_limb_t divisor_limb_inverted;
+
+ /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB. The
+ * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+ * most significant bit (with weight 2**N) implicit.
+ */
+ /* Special case for DIVISOR_LIMB == 100...000. */
+ if( !(divisor_limb << 1) )
+ divisor_limb_inverted = ~(mpi_limb_t) 0;
+ else
+ udiv_qrnnd(divisor_limb_inverted, dummy,
+ -divisor_limb, 0, divisor_limb);
+
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if( r >= divisor_limb )
+ r = 0;
+ else
+ quot_ptr[i--] = 0;
+
+ for( ; i >= 0; i-- ) {
+ n0 = dividend_ptr[i];
+ UDIV_QRNND_PREINV( quot_ptr[i], r, r,
+ n0, divisor_limb, divisor_limb_inverted);
+ }
+ return r;
+ }
+ }
+ else {
+ if(UDIV_NEEDS_NORMALIZATION) {
+ int normalization_steps;
+
+ count_leading_zeros (normalization_steps, divisor_limb);
+ if( normalization_steps ) {
+ divisor_limb <<= normalization_steps;
+
+ n1 = dividend_ptr[dividend_size - 1];
+ r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+ /* Possible optimization:
+ * if (r == 0
+ * && divisor_limb > ((n1 << normalization_steps)
+ * | (dividend_ptr[dividend_size - 2] >> ...)))
+ * ...one division less...
+ */
+ for( i = dividend_size - 2; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd (quot_ptr[i + 1], r, r,
+ ((n1 << normalization_steps)
+ | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+ divisor_limb);
+ n1 = n0;
+ }
+ udiv_qrnnd (quot_ptr[0], r, r,
+ n1 << normalization_steps,
+ divisor_limb);
+ return r >> normalization_steps;
+ }
+ }
+ /* No normalization needed, either because udiv_qrnnd doesn't require
+ * it, or because DIVISOR_LIMB is already normalized. */
+ i = dividend_size - 1;
+ r = dividend_ptr[i];
+
+ if(r >= divisor_limb)
+ r = 0;
+ else
+ quot_ptr[i--] = 0;
+
+ for(; i >= 0; i--) {
+ n0 = dividend_ptr[i];
+ udiv_qrnnd( quot_ptr[i], r, r, n0, divisor_limb );
+ }
+ return r;
+ }
+}
+
+
diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c
new file mode 100644
index 00000000..c579a93f
--- /dev/null
+++ b/mpi/mpih-mul.c
@@ -0,0 +1,557 @@
+/* mpihelp-mul.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines. */
+#ifndef KARATSUBA_THRESHOLD
+ #define KARATSUBA_THRESHOLD 32
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
+#if KARATSUBA_THRESHOLD < 2
+ #undef KARATSUBA_THRESHOLD
+ #define KARATSUBA_THRESHOLD 2
+#endif
+
+
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+ do { \
+ if( (size) < KARATSUBA_THRESHOLD ) \
+ mul_n_basecase (prodp, up, vp, size); \
+ else \
+ mul_n (prodp, up, vp, size, tspace); \
+ } while (0);
+
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
+ do { \
+ if ((size) < KARATSUBA_THRESHOLD) \
+ sqr_n_basecase (prodp, up, size); \
+ else \
+ sqr_n (prodp, up, size, tspace); \
+ } while (0);
+
+
+
+mpi_limb_t
+mpihelp_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+ mpi_limb_t cy_limb;
+ mpi_size_t j;
+ mpi_limb_t prod_high, prod_low;
+ mpi_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ * the loop becomes faster. */
+ j = -s1_size;
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do {
+ umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x + prod_low;
+ cy_limb += prod_low < x?1:0;
+ res_ptr[j] = prod_low;
+ } while ( ++j );
+ return cy_limb;
+}
+
+
+mpi_limb_t
+mpihelp_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+ mpi_limb_t cy_limb;
+ mpi_size_t j;
+ mpi_limb_t prod_high, prod_low;
+ mpi_limb_t x;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ * the loop becomes faster. */
+ j = -s1_size;
+ res_ptr -= j;
+ s1_ptr -= j;
+
+ cy_limb = 0;
+ do {
+ umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb);
+
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+
+ x = res_ptr[j];
+ prod_low = x - prod_low;
+ cy_limb += prod_low > x?1:0;
+ res_ptr[j] = prod_low;
+ } while( ++j );
+
+ return cy_limb;
+}
+
+mpi_limb_t
+mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+ mpi_limb_t s2_limb)
+{
+ mpi_limb_t cy_limb;
+ mpi_size_t j;
+ mpi_limb_t prod_high, prod_low;
+
+ /* The loop counter and index J goes from -S1_SIZE to -1. This way
+ * the loop becomes faster. */
+ j = -s1_size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ res_ptr -= j;
+
+ cy_limb = 0;
+ do {
+ umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
+ prod_low += cy_limb;
+ cy_limb = (prod_low < cy_limb?1:0) + prod_high;
+ res_ptr[j] = prod_low;
+ } while( ++j );
+
+ return cy_limb;
+}
+
+
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
+ * always stored. Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ * must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication. All multiplies rely
+ * on this, both small and huge. Small ones arrive here immediately. Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+
+static mpi_limb_t
+mul_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up,
+ mpi_ptr_t vp, mpi_size_t size)
+{
+ mpi_size_t i;
+ mpi_limb_t cy;
+ mpi_limb_t v_limb;
+
+ /* Multiply by the first limb in V separately, as the result can be
+ * stored (not added) to PROD. We also avoid a loop for zeroing. */
+ v_limb = vp[0];
+ if( v_limb <= 1 ) {
+ if( v_limb == 1 )
+ MPN_COPY( prodp, up, size );
+ else
+ MPN_ZERO( prodp, size );
+ cy = 0;
+ }
+ else
+ cy = mpihelp_mul_1( prodp, up, size, v_limb );
+
+ prodp[size] = cy;
+ prodp++;
+
+ /* For each iteration in the outer loop, multiply one limb from
+ * U with one limb from V, and add it to PROD. */
+ for( i = 1; i < size; i++ ) {
+ v_limb = vp[i];
+ if( v_limb <= 1 ) {
+ cy = 0;
+ if( v_limb == 1 )
+ cy = mpihelp_add_n(prodp, prodp, up, size);
+ }
+ else
+ cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+ prodp[size] = cy;
+ prodp++;
+ }
+
+ return cy;
+}
+
+
+static void
+mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+ mpi_size_t size, mpi_ptr_t tspace )
+{
+ if( size & 1 ) {
+ /* The size is odd, the code code below doesn't handle that.
+ * Multiply the least significant (size - 1) limbs with a recursive
+ * call, and handle the most significant limb of S1 and S2
+ * separately.
+ * A slightly faster way to do this would be to make the Karatsuba
+ * code below behave as if the size were even, and let it check for
+ * odd size in the end. I.e., in essence move this code to the end.
+ * Doing so would save us a recursive call, and potentially make the
+ * stack grow a lot less.
+ */
+ mpi_size_t esize = size - 1; /* even size */
+ mpi_limb_t cy_limb;
+
+ MPN_MUL_N_RECURSE( prodp, up, vp, esize, tspace );
+ cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, vp[esize] );
+ prodp[esize + esize] = cy_limb;
+ cy_limb = mpihelp_addmul_1( prodp + esize, vp, size, up[esize] );
+ prodp[esize + size] = cy_limb;
+ }
+ else {
+ /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+ *
+ * Split U in two pieces, U1 and U0, such that
+ * U = U0 + U1*(B**n),
+ * and V in V1 and V0, such that
+ * V = V0 + V1*(B**n).
+ *
+ * UV is then computed recursively using the identity
+ *
+ * 2n n n n
+ * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
+ * 1 1 1 0 0 1 0 0
+ *
+ * Where B = 2**BITS_PER_MP_LIMB.
+ */
+ mpi_size_t hsize = size >> 1;
+ mpi_limb_t cy;
+ int negflg;
+
+ /* Product H. ________________ ________________
+ * |_____U1 x V1____||____U0 x V0_____|
+ * Put result in upper part of PROD and pass low part of TSPACE
+ * as new TSPACE.
+ */
+ MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, tspace);
+
+ /* Product M. ________________
+ * |_(U1-U0)(V0-V1)_|
+ */
+ if( mpihelp_cmp(up + hsize, up, hsize) >= 0 ) {
+ mpihelp_sub_n(prodp, up + hsize, up, hsize);
+ negflg = 0;
+ }
+ else {
+ mpihelp_sub_n(prodp, up, up + hsize, hsize);
+ negflg = 1;
+ }
+ if( mpihelp_cmp(vp + hsize, vp, hsize) >= 0 ) {
+ mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+ negflg ^= 1;
+ }
+ else {
+ mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+ /* No change of NEGFLG. */
+ }
+ /* Read temporary operands from low part of PROD.
+ * Put result in low part of TSPACE using upper part of TSPACE
+ * as new TSPACE.
+ */
+ MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, tspace + size);
+
+ /* Add/copy product H. */
+ MPN_COPY (prodp + hsize, prodp + size, hsize);
+ cy = mpihelp_add_n( prodp + size, prodp + size,
+ prodp + size + hsize, hsize);
+
+ /* Add product M (if NEGFLG M is a negative number) */
+ if(negflg)
+ cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+ else
+ cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+
+ /* Product L. ________________ ________________
+ * |________________||____U0 x V0_____|
+ * Read temporary operands from low part of PROD.
+ * Put result in low part of TSPACE using upper part of TSPACE
+ * as new TSPACE.
+ */
+ MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+
+ /* Add/copy Product L (twice) */
+
+ cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+ if( cy )
+ mpihelp_add_1(prodp + hsize + size, prodp + hsize + size, hsize, cy);
+
+ MPN_COPY(prodp, tspace, hsize);
+ cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ if( cy )
+ mpihelp_add_1(prodp + size, prodp + size, size, 1);
+ }
+}
+
+
+static void
+sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
+{
+ mpi_size_t i;
+ mpi_limb_t cy_limb;
+ mpi_limb_t v_limb;
+
+ /* Multiply by the first limb in V separately, as the result can be
+ * stored (not added) to PROD. We also avoid a loop for zeroing. */
+ v_limb = up[0];
+ if( v_limb <= 1 ) {
+ if( v_limb == 1 )
+ MPN_COPY( prodp, up, size );
+ else
+ MPN_ZERO(prodp, size);
+ cy_limb = 0;
+ }
+ else
+ cy_limb = mpihelp_mul_1( prodp, up, size, v_limb );
+
+ prodp[size] = cy_limb;
+ prodp++;
+
+ /* For each iteration in the outer loop, multiply one limb from
+ * U with one limb from V, and add it to PROD. */
+ for( i=1; i < size; i++) {
+ v_limb = up[i];
+ if( v_limb <= 1 ) {
+ cy_limb = 0;
+ if( v_limb == 1 )
+ cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+ }
+ else
+ cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+
+ prodp[size] = cy_limb;
+ prodp++;
+ }
+}
+
+
+static void
+sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+ if( size & 1 ) {
+ /* The size is odd, the code code below doesn't handle that.
+ * Multiply the least significant (size - 1) limbs with a recursive
+ * call, and handle the most significant limb of S1 and S2
+ * separately.
+ * A slightly faster way to do this would be to make the Karatsuba
+ * code below behave as if the size were even, and let it check for
+ * odd size in the end. I.e., in essence move this code to the end.
+ * Doing so would save us a recursive call, and potentially make the
+ * stack grow a lot less.
+ */
+ mpi_size_t esize = size - 1; /* even size */
+ mpi_limb_t cy_limb;
+
+ MPN_SQR_N_RECURSE( prodp, up, esize, tspace );
+ cy_limb = mpihelp_addmul_1( prodp + esize, up, esize, up[esize] );
+ prodp[esize + esize] = cy_limb;
+ cy_limb = mpihelp_addmul_1( prodp + esize, up, size, up[esize] );
+
+ prodp[esize + size] = cy_limb;
+ }
+ else {
+ mpi_size_t hsize = size >> 1;
+ mpi_limb_t cy;
+
+ /* Product H. ________________ ________________
+ * |_____U1 x U1____||____U0 x U0_____|
+ * Put result in upper part of PROD and pass low part of TSPACE
+ * as new TSPACE.
+ */
+ MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+
+ /* Product M. ________________
+ * |_(U1-U0)(U0-U1)_|
+ */
+ if( mpihelp_cmp( up + hsize, up, hsize) >= 0 )
+ mpihelp_sub_n( prodp, up + hsize, up, hsize);
+ else
+ mpihelp_sub_n (prodp, up, up + hsize, hsize);
+
+ /* Read temporary operands from low part of PROD.
+ * Put result in low part of TSPACE using upper part of TSPACE
+ * as new TSPACE. */
+ MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+
+ /* Add/copy product H */
+ MPN_COPY(prodp + hsize, prodp + size, hsize);
+ cy = mpihelp_add_n(prodp + size, prodp + size,
+ prodp + size + hsize, hsize);
+
+ /* Add product M (if NEGFLG M is a negative number). */
+ cy -= mpihelp_sub_n (prodp + hsize, prodp + hsize, tspace, size);
+
+ /* Product L. ________________ ________________
+ * |________________||____U0 x U0_____|
+ * Read temporary operands from low part of PROD.
+ * Put result in low part of TSPACE using upper part of TSPACE
+ * as new TSPACE. */
+ MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
+
+ /* Add/copy Product L (twice). */
+ cy += mpihelp_add_n (prodp + hsize, prodp + hsize, tspace, size);
+ if( cy )
+ mpihelp_add_1(prodp + hsize + size, prodp + hsize + size,
+ hsize, cy);
+
+ MPN_COPY(prodp, tspace, hsize);
+ cy = mpihelp_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
+ if( cy )
+ mpihelp_add_1 (prodp + size, prodp + size, size, 1);
+ }
+}
+
+
+/* This should be made into an inline function in gmp.h. */
+void
+mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+ if( up == vp ) {
+ if( size < KARATSUBA_THRESHOLD )
+ sqr_n_basecase( prodp, up, size );
+ else {
+ mpi_ptr_t tspace;
+ tspace = mpi_alloc_limb_space( 2 * size );
+ sqr_n( prodp, up, size, tspace );
+ mpi_free_limb_space( tspace );
+ }
+ }
+ else {
+ if( size < KARATSUBA_THRESHOLD )
+ mul_n_basecase( prodp, up, vp, size );
+ else {
+ mpi_ptr_t tspace;
+ tspace = mpi_alloc_limb_space( 2 * size );
+ mul_n (prodp, up, vp, size, tspace);
+ mpi_free_limb_space( tspace );
+ }
+ }
+}
+
+
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP. USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized. Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ * must be distinct from the multiplier and the multiplicand.
+ */
+
+mpi_limb_t
+mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+ mpi_ptr_t vp, mpi_size_t vsize)
+{
+ mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+ mpi_limb_t cy;
+ mpi_ptr_t tspace;
+
+ if( vsize < KARATSUBA_THRESHOLD ) {
+ mpi_size_t i;
+ mpi_limb_t v_limb;
+
+ if( !vsize )
+ return 0;
+
+ /* Multiply by the first limb in V separately, as the result can be
+ * stored (not added) to PROD. We also avoid a loop for zeroing. */
+ v_limb = vp[0];
+ if( v_limb <= 1 ) {
+ if( v_limb == 1 )
+ MPN_COPY( prodp, up, usize );
+ else
+ MPN_ZERO( prodp, usize );
+ cy = 0;
+ }
+ else
+ cy = mpihelp_mul_1( prodp, up, usize, v_limb );
+
+ prodp[usize] = cy;
+ prodp++;
+
+ /* For each iteration in the outer loop, multiply one limb from
+ * U with one limb from V, and add it to PROD. */
+ for( i = 1; i < vsize; i++ ) {
+ v_limb = vp[i];
+ if( v_limb <= 1 ) {
+ cy = 0;
+ if( v_limb == 1 )
+ cy = mpihelp_add_n(prodp, prodp, up, usize);
+ }
+ else
+ cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+
+ prodp[usize] = cy;
+ prodp++;
+ }
+
+ return cy;
+ }
+
+ tspace = mpi_alloc_limb_space( 2 * vsize );
+ MPN_MUL_N_RECURSE( prodp, up, vp, vsize, tspace );
+
+ prodp += vsize;
+ up += vsize;
+ usize -= vsize;
+ if( usize >= vsize ) {
+ mpi_ptr_t tp = mpi_alloc_limb_space( 2 * vsize );
+ do {
+ MPN_MUL_N_RECURSE( tp, up, vp, vsize, tspace );
+ cy = mpihelp_add_n( prodp, prodp, tp, vsize );
+ mpihelp_add_1( prodp + vsize, tp + vsize, vsize, cy );
+ prodp += vsize;
+ up += vsize;
+ usize -= vsize;
+ } while( usize >= vsize );
+ mpi_free_limb_space( tp );
+ }
+
+ if( usize ) {
+ mpihelp_mul( tspace, vp, vsize, up, usize );
+ cy = mpihelp_add_n( prodp, prodp, tspace, vsize);
+ mpihelp_add_1( prodp + vsize, tspace + vsize, usize, cy );
+ }
+
+ mpi_free_limb_space( tspace );
+ return *prod_endp;
+}
+
+
diff --git a/mpi/mpih-shift.c b/mpi/mpih-shift.c
new file mode 100644
index 00000000..a8fe26aa
--- /dev/null
+++ b/mpi/mpih-shift.c
@@ -0,0 +1,94 @@
+/* mpihelp-shift.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+
+mpi_limb_t
+mpihelp_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+ unsigned int cnt)
+{
+ mpi_limb_t high_limb, low_limb;
+ unsigned sh_1, sh_2;
+ mpi_size_t i;
+ mpi_limb_t retval;
+
+ sh_1 = cnt;
+ wp += 1;
+ sh_2 = BITS_PER_MPI_LIMB - sh_1;
+ i = usize - 1;
+ low_limb = up[i];
+ retval = low_limb >> sh_2;
+ high_limb = low_limb;
+ while( --i >= 0 ) {
+ low_limb = up[i];
+ wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+ high_limb = low_limb;
+ }
+ wp[i] = high_limb << sh_1;
+
+ return retval;
+}
+
+
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+
+mpi_limb_t
+mpihelp_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+ mpi_limb_t high_limb, low_limb;
+ unsigned sh_1, sh_2;
+ mpi_size_t i;
+ mpi_limb_t retval;
+
+ sh_1 = cnt;
+ wp -= 1;
+ sh_2 = BITS_PER_MPI_LIMB - sh_1;
+ high_limb = up[0];
+ retval = high_limb << sh_2;
+ low_limb = high_limb;
+ for( i=1; i < usize; i++) {
+ high_limb = up[i];
+ wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+ low_limb = high_limb;
+ }
+ wp[i] = low_limb >> sh_1;
+
+ return retval;
+}
+
diff --git a/mpi/mpih-sub.c b/mpi/mpih-sub.c
new file mode 100644
index 00000000..3831d81c
--- /dev/null
+++ b/mpi/mpih-sub.c
@@ -0,0 +1,106 @@
+/* mpihelp-sub.c - MPI helper functions
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi-internal.h"
+
+
+/*_EXTERN_INLINE*/
+mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_size_t s1_size, mpi_limb_t s2_limb )
+{
+ mpi_limb_t x;
+
+ x = *s1_ptr++;
+ s2_limb = x - s2_limb;
+ *res_ptr++ = s2_limb;
+ if( s2_limb > x ) {
+ while( --s1_size ) {
+ x = *s1_ptr++;
+ *res_ptr++ = x - 1;
+ if( x )
+ goto leave;
+ }
+ return 1;
+ }
+
+ leave:
+ if( res_ptr != s1_ptr ) {
+ mpi_size_t i;
+ for( i=0; i < s1_size-1; i++ )
+ res_ptr[i] = s1_ptr[i];
+ }
+ return 0;
+}
+
+
+/* FIXME: this should be done in assembly */
+mpi_limb_t
+mpihelp_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+ mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+ mpi_limb_t x, y, cy;
+ mpi_size_t j;
+
+ /* The loop counter and index J goes from -SIZE to -1. This way
+ the loop becomes faster. */
+ j = -size;
+
+ /* Offset the base pointers to compensate for the negative indices. */
+ s1_ptr -= j;
+ s2_ptr -= j;
+ res_ptr -= j;
+
+ cy = 0;
+ do {
+ y = s2_ptr[j];
+ x = s1_ptr[j];
+ y += cy; /* add previous carry to subtrahend */
+ cy = y < cy ? 1:0; /* get out carry from that addition */
+ y = x - y; /* main subtract */
+ cy += y > x? 1:0; /* get out carry from the subtract, combine */
+ res_ptr[j] = y;
+ } while( ++j );
+
+ return cy;
+}
+
+
+/*_EXTERN_INLINE*/
+mpi_limb_t
+mpihelp_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+ mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+ mpi_limb_t cy = 0;
+
+ if( s2_size )
+ cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+
+ if( s1_size - s2_size )
+ cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+ s1_size - s2_size, cy);
+ return cy;
+}
+
+
diff --git a/mpi/mpiutil.c b/mpi/mpiutil.c
new file mode 100644
index 00000000..752ce7f8
--- /dev/null
+++ b/mpi/mpiutil.c
@@ -0,0 +1,326 @@
+/* mpiutil.c - Utility functions for MPI
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+#include "mpi.h"
+#include "mpi-internal.h"
+#include "memory.h"
+#include "util.h"
+
+
+#ifdef M_DEBUG
+ #undef mpi_alloc
+ #undef mpi_alloc_secure
+ #undef mpi_free
+#endif
+
+typedef struct unused_obj {
+ struct unused_obj *next;
+ unsigned length;
+ union {
+ MPI mpi;
+ mpi_limb_t *limb;
+ } u;
+} *unused_obj_t;
+
+static unused_obj_t unused_objs;
+static unused_obj_t unused_mpis;
+static unused_obj_t unused_limbs;
+
+
+MPI
+#ifdef M_DEBUG
+mpi_debug_alloc( unsigned nlimbs, const char *info )
+#else
+mpi_alloc( unsigned nlimbs )
+#endif
+{
+ MPI a;
+
+ if( unused_mpis ) {
+ unused_obj_t u;
+
+ if( DBG_MEMORY )
+ log_debug("mpi_alloc(%lu) reusing\n", nlimbs*BITS_PER_MPI_LIMB );
+ a = unused_mpis->u.mpi;
+ u = unused_mpis;
+ unused_mpis = unused_mpis->next;
+ u->next = unused_objs;
+ unused_objs = u;
+ }
+ else {
+ if( DBG_MEMORY )
+ log_debug("mpi_alloc(%lu) new\n", nlimbs*BITS_PER_MPI_LIMB );
+ #ifdef M_DEBUG
+ a = m_debug_alloc( sizeof *a, info );
+ #else
+ a = m_alloc( sizeof *a );
+ #endif
+ }
+ #ifdef M_DEBUG
+ a->d = mpi_debug_alloc_limb_space( nlimbs, info );
+ #else
+ a->d = mpi_alloc_limb_space( nlimbs );
+ #endif
+ a->alloced = nlimbs;
+ a->nlimbs = 0;
+ a->sign = 0;
+ return a;
+}
+
+void
+mpi_m_check( MPI a )
+{
+ m_check(a);
+ m_check(a->d);
+}
+
+MPI
+#ifdef M_DEBUG
+mpi_debug_alloc_secure( unsigned nlimbs, const char *info )
+#else
+mpi_alloc_secure( unsigned nlimbs )
+#endif
+{
+ MPI a;
+
+ a = m_alloc( sizeof *a );
+ #ifdef M_DEBUG
+ a->d = m_debug_alloc_secure( nlimbs * sizeof(mpi_limb_t), info );
+ #else
+ a->d = m_alloc_secure( nlimbs * sizeof(mpi_limb_t) );
+ #endif
+ a->alloced = nlimbs;
+ a->nlimbs = 0;
+ a->sign = 0;
+ return a;
+}
+
+
+mpi_ptr_t
+#ifdef M_DEBUG
+mpi_debug_alloc_limb_space( unsigned nlimbs, const char *info )
+#else
+mpi_alloc_limb_space( unsigned nlimbs )
+#endif
+{
+ unused_obj_t u;
+ size_t len = nlimbs * sizeof(mpi_limb_t);
+
+ for(u=unused_limbs; u; u = u->next )
+ if( u->length >= len ) {
+ u->length = 0;
+ if( DBG_MEMORY )
+ log_debug("mpi_alloc_limb_space(%lu) reusing\n", len*8 );
+ return u->u.limb;
+ }
+ if( DBG_MEMORY )
+ log_debug("mpi_alloc_limb_space(%u) new\n", len*8 );
+ #ifdef M_DEBUG
+ return m_debug_alloc( len, info );
+ #else
+ return m_alloc( len );
+ #endif
+}
+
+void
+#ifdef M_DEBUG
+mpi_debug_free_limb_space( mpi_ptr_t a, const char *info )
+#else
+mpi_free_limb_space( mpi_ptr_t a )
+#endif
+{
+ unused_obj_t u;
+
+ if( !a )
+ return;
+ if( DBG_MEMORY )
+ log_debug("mpi_free_limb_space of size %lu\n", (ulong)m_size(a)*8 );
+ for(u=unused_limbs; u; u = u->next )
+ if( !u->length ) {
+ u->length = m_size(a);
+ u->u.limb = a;
+ return;
+ }
+
+ if( (u=unused_objs) )
+ unused_objs = unused_objs->next;
+ else
+ u = m_alloc( sizeof *u );
+ u->length = m_size(a);
+ u->u.limb = a;
+ u->next = unused_limbs;
+ unused_limbs = u;
+}
+
+
+void
+mpi_assign_limb_space( MPI a, mpi_ptr_t ap, unsigned nlimbs )
+{
+ mpi_free_limb_space(a->d);
+ a->d = ap;
+ a->alloced = nlimbs;
+}
+
+
+
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+void
+#ifdef M_DEBUG
+mpi_debug_resize( MPI a, unsigned nlimbs, const char *info )
+#else
+mpi_resize( MPI a, unsigned nlimbs )
+#endif
+{
+ if( nlimbs <= a->alloced )
+ return; /* no need to do it */
+ #ifdef M_DEBUG
+ if( a->d )
+ a->d = m_debug_realloc(a->d, nlimbs * sizeof(mpi_limb_t), info );
+ else
+ a->d = m_debug_alloc_clear( nlimbs * sizeof(mpi_limb_t), info );
+ #else
+ if( a->d )
+ a->d = m_realloc(a->d, nlimbs * sizeof(mpi_limb_t) );
+ else
+ a->d = m_alloc_clear( nlimbs * sizeof(mpi_limb_t) );
+ #endif
+ a->alloced = nlimbs;
+}
+
+void
+mpi_clear( MPI a )
+{
+ a->nlimbs = 0;
+}
+
+
+void
+#ifdef M_DEBUG
+mpi_debug_free( MPI a, const char *info )
+#else
+mpi_free( MPI a )
+#endif
+{
+ unused_obj_t u;
+
+ if( !a )
+ return;
+ if( DBG_MEMORY )
+ log_debug("mpi_free\n" );
+ #ifdef M_DEBUG
+ mpi_debug_free_limb_space(a->d, info);
+ #else
+ mpi_free_limb_space(a->d);
+ #endif
+
+ if( (u=unused_objs) )
+ unused_objs = unused_objs->next;
+ else
+ u = m_alloc( sizeof *u );
+ u->u.mpi = a;
+ u->next = unused_mpis;
+ unused_mpis = u;
+}
+
+
+MPI
+#ifdef M_DEBUG
+mpi_debug_copy( MPI a, const char *info )
+#else
+mpi_copy( MPI a )
+#endif
+{
+ int i;
+ MPI b;
+
+ if( a ) {
+ #ifdef M_DEBUG
+ b = mpi_debug_alloc( a->nlimbs, info );
+ #else
+ b = mpi_alloc( a->nlimbs );
+ #endif
+ b->nlimbs = a->nlimbs;
+ for(i=0; i < b->nlimbs; i++ )
+ b->d[i] = a->d[i];
+ }
+ else
+ b = NULL;
+ return b;
+}
+
+
+void
+mpi_set( MPI w, MPI u)
+{
+ mpi_ptr_t wp, up;
+ mpi_size_t usize = u->nlimbs;
+ int usign = u->sign;
+
+ RESIZE_IF_NEEDED(w, usize);
+ wp = w->d;
+ up = u->d;
+ MPN_COPY( wp, up, usize );
+ w->nlimbs = usize;
+ w->sign = usign;
+}
+
+
+void
+mpi_set_ui( MPI w, unsigned long u)
+{
+ RESIZE_IF_NEEDED(w, 1);
+ w->d[0] = u;
+ w->nlimbs = u? 1:0;
+ w->sign = 0;
+}
+
+
+MPI
+mpi_alloc_set_ui( unsigned long u)
+{
+ #ifdef M_DEBUG
+ MPI w = mpi_debug_alloc(1,"alloc_set_ui");
+ #else
+ MPI w = mpi_alloc(1);
+ #endif
+ w->d[0] = u;
+ w->nlimbs = u? 1:0;
+ w->sign = 0;
+ return w;
+}
+
+
+void
+mpi_swap( MPI a, MPI b)
+{
+ MPI x;
+
+ x = a; a = b; b = x;
+}
+
+