diff options
author | Werner Koch <wk@gnupg.org> | 2007-03-23 19:55:14 +0000 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 2007-03-23 19:55:14 +0000 |
commit | 5c8ee46baeed3d72945729e5792213cc6850782d (patch) | |
tree | e1965e98d6789c5fee6740038d83d41c539dd6ae | |
parent | a2070cf05cffd66ee71a7d1af5084865d43a77cb (diff) | |
download | libgcrypt-5c8ee46baeed3d72945729e5792213cc6850782d.tar.gz |
Did some performance experiments and added code for Barrett reduction.
-rw-r--r-- | cipher/ChangeLog | 12 | ||||
-rw-r--r-- | cipher/ecc.c | 430 | ||||
-rw-r--r-- | mpi/ChangeLog | 10 | ||||
-rw-r--r-- | mpi/Makefile.am | 1 | ||||
-rw-r--r-- | mpi/mpi-bit.c | 3 | ||||
-rw-r--r-- | mpi/mpi-div.c | 13 | ||||
-rw-r--r-- | mpi/mpi-internal.h | 3 | ||||
-rw-r--r-- | mpi/mpi-mod.c | 194 | ||||
-rw-r--r-- | src/mpi.h | 22 | ||||
-rw-r--r-- | tests/benchmark.c | 2 |
10 files changed, 496 insertions, 194 deletions
diff --git a/cipher/ChangeLog b/cipher/ChangeLog index 0bd32b66..3fe17e07 100644 --- a/cipher/ChangeLog +++ b/cipher/ChangeLog @@ -1,3 +1,12 @@ +2007-03-23 Werner Koch <wk@g10code.com> + + * ecc.c (ecc_ctx_init, ecc_ctx_free, ecc_mod, ecc_mulm): New. + (duplicate_point, sum_points, escalar_mult): Don't use a + copy of base->p. Replaced all mpi_mulm by ecc_mulm so that we can + experiment with different algorithms. + (generate_key, check_secret_key, sign, verify): Initialize a + computation context for use by ecc_mulm. + 2007-03-22 Werner Koch <wk@g10code.com> * pubkey.c (pubkey_table): Initialize ECC. @@ -8,6 +17,9 @@ functions. (duplicate_point): Ditto (sum_points): Ditto + (sign, verify): Remove unneeded copy operations. + (sum_points): Removed memory leaks and optimized some compares. + (verify): Simplified input check. 2007-03-14 Werner Koch <wk@g10code.com> diff --git a/cipher/ecc.c b/cipher/ecc.c index 6f650708..44f18024 100644 --- a/cipher/ecc.c +++ b/cipher/ecc.c @@ -23,6 +23,33 @@ - Check whether we can LGPL the code. + - Use affine coordinates for points with the public API and format + them in the way described by BSI's TR-03111, 5.1.3. + + - If we support point compression we need to decide how to compute + the keygrip it should not change due to compression. + + - mpi_mulm is quite slow which has never been a problem for the + other algorithms. However here we are using it several times in a + row and thus it is an important performance factor. + + - We use mpi_powm for x^2 mod p: Either implement a special case in + mpi_powm or check whether mpi_mulm is faster. + + + +Algorithm generate 100*sign 100*verify +---------------------------------------------- +Orginal version: +ECDSA 192 bit 100ms 2630ms 5040ms +ECDSA 256 bit 120ms 2960ms 6070ms +ECDSA 384 bit 370ms 9570ms 18900ms + +Using Barrett: +ECDSA 192 bit 130ms 3050ms 5620ms +ECDSA 256 bit 140ms 3410ms 6950ms +ECDSA 384 bit 400ms 10500ms 21670ms + */ @@ -50,9 +77,9 @@ * mmr: found too many mistakes in the mathematical background transcription. * mmr: improve the mathematical performance. * mmr: solve ECElGamal IFP weakness. - * more polite gen_k() and its calls. + * more polite gen_k() and its calls. * mmr: extend the check_secret_key() - * In process: + * In progress: * gen_big_point(): Randomize the point generation. * improve te memory uses. * Separation between sign & encrypt keys to facility the subkeys creation. @@ -71,6 +98,7 @@ #include "mpi.h" #include "cipher.h" + /* ECC over F_p; E(F_p) T=(p,a,b,G,n,h) @@ -86,6 +114,15 @@ */ +/* An object to hold a computation context. */ +struct ecc_ctx_s +{ + gcry_mpi_t m; /* The modulus - may not be modified. */ + int m_copied; /* If true, M needs to be released. */ +}; +typedef struct ecc_ctx_s *ecc_ctx_t; + + /* Point representation in projective coordinates. */ typedef struct { @@ -238,6 +275,61 @@ progress (int c) fputc (c, stderr); } + +/* + + M P I W R A P P E R + + */ + +static ecc_ctx_t +ecc_ctx_init (gcry_mpi_t m, int copy) +{ + ecc_ctx_t ctx; + + ctx = gcry_xcalloc (1, sizeof *ctx); + + if (copy) + { + ctx->m = mpi_copy (m); + ctx->m_copied = 1; + } + else + ctx->m = m; + + return ctx; +} + +static void +ecc_ctx_free (ecc_ctx_t ctx) +{ + if (!ctx) + return; + if (ctx->m_copied) + mpi_free (ctx->m); + gcry_free (ctx); +} + +/* Our version of mpi_mod which uses a reduction algorithm depending + on the the context. The modulus is part of the context. */ +static void +ecc_mod (gcry_mpi_t r, gcry_mpi_t x, ecc_ctx_t ctx) +{ + mpi_mod (r, x, ctx->m); +} + +/* Our version of mpi_mulm which uses a reduction algorithm depending + on the the context. The modulus is part of the context. */ +static void +ecc_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, ecc_ctx_t ctx) +{ + /* Barrett is actually slower, so we don't do any switching here. + Howerver, we should take advantage of fast reduction for the NIST + curves. */ + mpi_mulm (w, u, v, ctx->m); +} + + /* @@ -589,10 +681,11 @@ invert_point (point_t *P, elliptic_curve_t *base) * R = 2P */ static void -duplicate_point (point_t *R, point_t *P, elliptic_curve_t * base) +duplicate_point (point_t *R, point_t *P, elliptic_curve_t *base, + ecc_ctx_t pctx) { gcry_mpi_t one, two, three, four, eight; - gcry_mpi_t p, p_3, a; + gcry_mpi_t p_3, a; gcry_mpi_t t1, t2, t3, t4, t5, t6, t7; gcry_mpi_t aux; @@ -601,18 +694,19 @@ duplicate_point (point_t *R, point_t *P, elliptic_curve_t * base) three = mpi_alloc_set_ui (3); four = mpi_alloc_set_ui (4); eight = mpi_alloc_set_ui (8); - p = mpi_copy (base->p_); - p_3 = mpi_alloc (mpi_get_nlimbs (p)); - mpi_sub_ui (p_3, p, 3); - a = mpi_copy (base->a_); - t1 = mpi_alloc (mpi_get_nlimbs (p)); - t2 = mpi_alloc (mpi_get_nlimbs (p)); - t3 = mpi_alloc (mpi_get_nlimbs (p)); - t4 = mpi_alloc (mpi_get_nlimbs (p)); - t5 = mpi_alloc (mpi_get_nlimbs (p)); - t6 = mpi_alloc (mpi_get_nlimbs (p)); - t7 = mpi_alloc (mpi_get_nlimbs (p)); - aux = mpi_alloc (mpi_get_nlimbs (p)); + + p_3 = mpi_alloc_like (base->p_); + mpi_sub_ui (p_3, base->p_, 3); /* p_3 = p - 3 */ + + a = mpi_copy (base->a_); + t1 = mpi_alloc_like (base->p_); + t2 = mpi_alloc_like (base->p_); + t3 = mpi_alloc_like (base->p_); + t4 = mpi_alloc_like (base->p_); + t5 = mpi_alloc_like (base->p_); + t6 = mpi_alloc_like (base->p_); + t7 = mpi_alloc_like (base->p_); + aux = mpi_alloc_like (base->p_); t1 = mpi_copy (P->x_); /* t1=x1 */ t2 = mpi_copy (P->y_); /* t2=y1 */ @@ -626,40 +720,40 @@ duplicate_point (point_t *R, point_t *P, elliptic_curve_t * base) } else { - mpi_mod (a, a, p); /* a mod p */ + ecc_mod (a, a, pctx); /* a mod p FIXME: really needed? */ if (!mpi_cmp (a, p_3)) { /* a==p-3 */ - mpi_powm (t4, t3, two, p); /* t4=t3^2 mod p */ - mpi_subm (t5, t1, t4, p); /* t5=t1-t4 mod p */ - mpi_addm (t4, t1, t4, p); /* t4=t1+t4 mod p */ - mpi_mulm (t5, t4, t5, p); /* t5=t4*t5 mod p */ - mpi_mulm (t4, three, t5, p); /* t4=3*t5 mod p */ + mpi_powm (t4, t3, two, base->p_); /* t4=t3^2 mod p */ + mpi_subm (t5, t1, t4, base->p_); /* t5=t1-t4 mod p */ + mpi_addm (t4, t1, t4, base->p_); /* t4=t1+t4 mod p */ + ecc_mulm (t5, t4, t5, pctx); /* t5=t4*t5 mod p */ + ecc_mulm (t4, three, t5, pctx); /* t4=3*t5 mod p */ } else { - t4 = mpi_copy (a); /* t4=a */ - mpi_powm (t5, t3, two, p); /* t5=t3^2 mod p */ - mpi_powm (t5, t5, two, p); /* t5=t5^2 mod p */ - mpi_mulm (t5, t4, t5, p); /* t5=t4*t5 mod p */ - mpi_powm (t4, t1, two, p); /* t4=t1^2 mod p */ - mpi_mulm (t4, three, t4, p); /* t4=3*t4 mod p */ - mpi_addm (t4, t4, t5, p); /* t4=t4+t5 mod p */ + mpi_set (t4, a); /* t4=a */ + mpi_powm (t5, t3, two, base->p_); /* t5=t3^2 mod p */ + mpi_powm (t5, t5, two, base->p_); /* t5=t5^2 mod p */ + ecc_mulm (t5, t4, t5, pctx); /* t5=t4*t5 mod p */ + mpi_powm (t4, t1, two, base->p_); /* t4=t1^2 mod p */ + ecc_mulm (t4, three, t4, pctx); /* t4=3*t4 mod p */ + mpi_addm (t4, t4, t5, base->p_); /* t4=t4+t5 mod p */ } - mpi_mulm (t3, t2, t3, p); /* t3=t2*t3 mod p */ - mpi_mulm (t3, two, t3, p); /* t3=2*t3 mod p */ - mpi_powm (aux, t2, two, p); /* t2=t2^2 mod p */ - t2 = mpi_copy (aux); - mpi_mulm (t5, t1, t2, p); /* t5=t1*t2 mod p */ - mpi_mulm (t5, four, t5, p); /* t5=4*t5 mod p */ - mpi_powm (t1, t4, two, p); /* t1=t4^2 mod p */ - mpi_mulm (aux, two, t5, p); - mpi_subm (t1, t1, aux, p); /* t1=t1-2*t5 mod p */ - mpi_powm (aux, t2, two, p); /* t2=t2^2 mod p */ - t2 = mpi_copy (aux); - mpi_mulm (t2, eight, t2, p); /* t2=8*t2 mod p */ - mpi_subm (t5, t5, t1, p); /* t5=t5-t1 mod p */ - mpi_mulm (t5, t4, t5, p); /* t5=t4*t5 mod p */ - mpi_subm (t2, t5, t2, p); /* t2=t5-t2 mod p */ + ecc_mulm (t3, t2, t3, pctx); /* t3=t2*t3 mod p */ + ecc_mulm (t3, two, t3, pctx); /* t3=2*t3 mod p */ + mpi_powm (aux, t2, two, base->p_); /* t2=t2^2 mod p */ + mpi_set (t2, aux); + ecc_mulm (t5, t1, t2, pctx); /* t5=t1*t2 mod p */ + ecc_mulm (t5, four, t5, pctx); /* t5=4*t5 mod p */ + mpi_powm (t1, t4, two, base->p_); /* t1=t4^2 mod p */ + ecc_mulm (aux, two, t5, pctx); + mpi_subm (t1, t1, aux, base->p_); /* t1=t1-2*t5 mod p */ + mpi_powm (aux, t2, two, base->p_); /* t2=t2^2 mod p */ + mpi_set (t2, aux); + ecc_mulm (t2, eight, t2, pctx); /* t2=8*t2 mod p */ + mpi_subm (t5, t5, t1, base->p_); /* t5=t5-t1 mod p */ + ecc_mulm (t5, t4, t5, pctx); /* t5=t4*t5 mod p */ + mpi_subm (t2, t5, t2, base->p_); /* t2=t5-t2 mod p */ mpi_set (R->x_, t1); mpi_set (R->y_, t2); @@ -674,7 +768,6 @@ duplicate_point (point_t *R, point_t *P, elliptic_curve_t * base) mpi_free (t3); mpi_free (t2); mpi_free (t1); - mpi_free (p); mpi_free (p_3); mpi_free (a); mpi_free (eight); @@ -691,67 +784,57 @@ duplicate_point (point_t *R, point_t *P, elliptic_curve_t * base) R = P0 + P1 */ static void -sum_points (point_t *R, point_t *P0, point_t *P1, elliptic_curve_t * base) +sum_points (point_t *R, point_t *P0, point_t *P1, elliptic_curve_t *base, + ecc_ctx_t pctx) { - gcry_mpi_t one, two; - gcry_mpi_t p; - gcry_mpi_t t1, t2, t3, t4, t5, t6, t7; - unsigned int nbits; - - one = mpi_alloc_set_ui (1); - two = mpi_alloc_set_ui (2); - p = mpi_copy (base->p_); - nbits = mpi_get_nbits (p); - t1 = mpi_new (nbits); - t2 = mpi_new (nbits); - t3 = mpi_new (nbits); - t4 = mpi_new (nbits); - t5 = mpi_new (nbits); - t6 = mpi_new (nbits); - t7 = mpi_new (nbits); if ( (!mpi_cmp (P1->x_, P0->x_)) && (!mpi_cmp (P1->y_, P0->y_)) && (!mpi_cmp (P1->z_, P0->z_)) ) /* P1 == P0 */ { - duplicate_point (R, P0, base); + duplicate_point (R, P0, base, pctx); } else if (point_at_infinity (*P0)) /* R == 0 && P1 == P1 */ { - /* (!mpi_cmp_ui(P0->y_,0) || !mpi_cmp_ui(P0->z_,0))*/ mpi_set (R->x_, P1->x_); mpi_set (R->y_, P1->y_); mpi_set (R->z_, P1->z_); } else if (point_at_infinity (*P1)) /* R == P0 && P0 == 0 */ { - /* (!mpi_cmp_ui(P1->y_,0) || !mpi_cmp_ui(P1->z_,0)) */ mpi_set (R->x_, P0->x_); mpi_set (R->y_, P0->y_); mpi_set (R->z_, P0->z_); } else { + gcry_mpi_t two; + gcry_mpi_t t1, t2, t3, t4, t5, t6, t7; + + two = mpi_alloc_set_ui (2); + t1 = mpi_copy (P0->x_); /* t1=x0 */ t2 = mpi_copy (P0->y_); /* t2=y0 */ t3 = mpi_copy (P0->z_); /* t3=z0 */ t4 = mpi_copy (P1->x_); /* t4=x1 */ t5 = mpi_copy (P1->y_); /* t5=y2 */ - if (mpi_cmp (P1->z_, one)) /* z1 != 1 */ + t6 = mpi_new (0); + t7 = mpi_new (0); + + if (mpi_cmp_ui (P1->z_, 1)) /* z1 != 1 */ { - /* fixme: Release old t6 or just set it. */ - t6 = mpi_copy (P1->z_); /* t6=z1 */ - mpi_powm (t7, t6, two, p); /* t7=t6^2 mod p */ - mpi_mulm (t1, t1, t7, p); /* t1=t1*t7 mod p */ - mpi_mulm (t7, t6, t7, p); /* t7=t6*t7 mod p */ - mpi_mulm (t2, t2, t7, p); /* t2=t2*t7 mod p */ + mpi_set (t6, P1->z_); /* t6=z1 */ + mpi_powm (t7, t6, two, base->p_); /* t7=t6^2 mod p */ + ecc_mulm (t1, t1, t7, pctx); /* t1=t1*t7 mod p */ + ecc_mulm (t7, t6, t7, pctx); /* t7=t6*t7 mod p */ + ecc_mulm (t2, t2, t7, pctx); /* t2=t2*t7 mod p */ } - mpi_powm (t7, t3, two, p);/* t7=t3^2 mod p */ - mpi_mulm (t4, t4, t7, p); /* t4=t4*t7 mod p */ - mpi_mulm (t7, t3, t7, p); /* t7=t3*t7 mod p */ - mpi_mulm (t5, t5, t7, p); /* t5=t5*t7 mod p */ - mpi_subm (t4, t1, t4, p); /* t4=t1-t4 mod p */ - mpi_subm (t5, t2, t5, p); /* t5=t2-t5 mod p */ + mpi_powm (t7, t3, two, base->p_);/* t7=t3^2 mod p */ + ecc_mulm (t4, t4, t7, pctx); /* t4=t4*t7 mod p */ + ecc_mulm (t7, t3, t7, pctx); /* t7=t3*t7 mod p */ + ecc_mulm (t5, t5, t7, pctx); /* t5=t5*t7 mod p */ + mpi_subm (t4, t1, t4, base->p_); /* t4=t1-t4 mod p */ + mpi_subm (t5, t2, t5, base->p_); /* t5=t2-t5 mod p */ if (!mpi_cmp_ui (t4, 0)) /* t4==0 */ { @@ -775,44 +858,41 @@ sum_points (point_t *R, point_t *P0, point_t *P1, elliptic_curve_t * base) } else { - mpi_mulm (t1, two, t1, p); - mpi_subm (t1, t1, t4, p); /* t1=2*t1-t4 mod p */ - mpi_mulm (t2, two, t2, p); - mpi_subm (t2, t2, t5, p); /* t2=2*t2-t5 mod p */ - if (mpi_cmp (P1->z_, one)) /* z1 != 1 */ + ecc_mulm (t1, two, t1, pctx); + mpi_subm (t1, t1, t4, base->p_); /* t1=2*t1-t4 mod p */ + ecc_mulm (t2, two, t2, pctx); + mpi_subm (t2, t2, t5, base->p_); /* t2=2*t2-t5 mod p */ + if (mpi_cmp_ui (P1->z_, 1)) /* z1 != 1 */ { - mpi_mulm (t3, t3, t6, p); /* t3=t3*t6 */ + ecc_mulm (t3, t3, t6, pctx); /* t3=t3*t6 */ } - mpi_mulm (t3, t3, t4, p); /* t3=t3*t4 mod p */ - mpi_powm (t7, t4, two, p); /* t7=t4^2 mod p */ - mpi_mulm (t4, t4, t7, p); /* t4=t4*t7 mod p */ - mpi_mulm (t7, t1, t7, p); /* t7=t1*t7 mod p */ - mpi_powm (t1, t5, two, p); /* t1=t5^2 mod p */ - mpi_subm (t1, t1, t7, p); /* t1=t1-t7 mod p */ - mpi_mulm (t6, two, t1, p); - mpi_subm (t7, t7, t6, p); /* t7=t7-2*t1 mod p */ - mpi_mulm (t5, t5, t7, p); /* t5=t5*t7 mod p */ - mpi_mulm (t4, t2, t4, p); /* t4=t2*t4 mod p */ - mpi_subm (t2, t5, t4, p); /* t2=t5-t4 mod p */ - mpi_invm (t6, two, p); - mpi_mulm (t2, t2, t6, p); /* t2 = t2/2 */ + ecc_mulm (t3, t3, t4, pctx); /* t3=t3*t4 mod p */ + mpi_powm (t7, t4, two, base->p_); /* t7=t4^2 mod p */ + ecc_mulm (t4, t4, t7, pctx); /* t4=t4*t7 mod p */ + ecc_mulm (t7, t1, t7, pctx); /* t7=t1*t7 mod p */ + mpi_powm (t1, t5, two, base->p_); /* t1=t5^2 mod p */ + mpi_subm (t1, t1, t7, base->p_); /* t1=t1-t7 mod p */ + ecc_mulm (t6, two, t1, pctx); + mpi_subm (t7, t7, t6, base->p_); /* t7=t7-2*t1 mod p */ + ecc_mulm (t5, t5, t7, pctx); /* t5=t5*t7 mod p */ + ecc_mulm (t4, t2, t4, pctx); /* t4=t2*t4 mod p */ + mpi_subm (t2, t5, t4, base->p_); /* t2=t5-t4 mod p */ + mpi_invm (t6, two, base->p_); + ecc_mulm (t2, t2, t6, pctx); /* t2 = t2/2 */ mpi_set (R->x_, t1); mpi_set (R->y_, t2); mpi_set (R->z_, t3); } + mpi_free (t7); + mpi_free (t6); + mpi_free (t5); + mpi_free (t4); + mpi_free (t3); + mpi_free (t2); + mpi_free (t1); + mpi_free (two); } - - mpi_free (t7); - mpi_free (t6); - mpi_free (t5); - mpi_free (t4); - mpi_free (t3); - mpi_free (t2); - mpi_free (t1); - mpi_free (p); - mpi_free (two); - mpi_free (one); } /**************** @@ -828,11 +908,9 @@ sum_points (point_t *R, point_t *P0, point_t *P1, elliptic_curve_t * base) */ static void escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, - elliptic_curve_t *base) + elliptic_curve_t *base, ecc_ctx_t pctx) { - gcry_mpi_t one, two, three; - gcry_mpi_t p; gcry_mpi_t x1, y1, z1, z2, z3, k, h; gcry_mpi_t xx, yy, zz; unsigned int i, loops; @@ -845,8 +923,6 @@ escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, two = mpi_alloc_set_ui (2); three = mpi_alloc_set_ui (3); - p = mpi_copy (base->p_); - x1 = mpi_alloc_like (P->x_); y1 = mpi_alloc_like (P->y_); /* z1 is not yet intialized. */ @@ -871,7 +947,7 @@ escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, escalar->sign = 0; /* +n */ k = mpi_copy (escalar); yy = mpi_copy (P->y_); /* -P */ - mpi_invm (yy, yy, p); + mpi_invm (yy, yy, base->p_); } else { @@ -885,12 +961,12 @@ escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, } else { - mpi_mulm (z2, zz, zz, p); /* z^2 */ - mpi_mulm (z3, zz, z2, p); /* z^3 */ - mpi_invm (z2, z2, p); /* 1/Z^2 */ - mpi_mulm (x1, xx, z2, p); /* xx/z^2 */ - mpi_invm (z3, z3, p); /* 1/z^3 */ - mpi_mulm (y1, yy, z3, p); /* yy/z^3 */ + ecc_mulm (z2, zz, zz, pctx); /* z^2 */ + ecc_mulm (z3, zz, z2, pctx); /* z^3 */ + mpi_invm (z2, z2, base->p_); /* 1/Z^2 */ + ecc_mulm (x1, xx, z2, pctx); /* xx/z^2 */ + mpi_invm (z3, z3, base->p_); /* 1/z^3 */ + ecc_mulm (y1, yy, z3, pctx); /* yy/z^3 */ } mpi_mul (h, three, k); /* h=3k */ loops = mpi_get_nbits (h); @@ -903,18 +979,18 @@ escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, P1.z_ = mpi_copy (z1); while (i > 0) { /* A.10.9. step 11 i from l-1 downto 1 */ - duplicate_point (R, R, base); + duplicate_point (R, R, base, pctx); if (mpi_test_bit (h, i) == 1 && mpi_test_bit (k, i) == 0) { /* h_i=1 & k_i=0 */ P2 = point_copy (*R); - sum_points (R, &P2, &P1, base); /* R=P2+P1 over the base elliptic curve */ + sum_points (R, &P2, &P1, base, pctx); /* R=P2+P1 over the base elliptic curve */ } if (mpi_test_bit (h, i) == 0 && mpi_test_bit (k, i) == 1) { /* h_i=0 & k_i=1 */ P2 = point_copy (*R); P1_ = point_copy (P1); invert_point (&P1_, base); - sum_points (R, &P2, &P1_, base); /* R=P2+P1_ over the base elliptic curve */ + sum_points (R, &P2, &P1_, base, pctx); /* R=P2+P1_ over the base elliptic curve */ } i--; } @@ -935,7 +1011,6 @@ escalar_mult (point_t *R, gcry_mpi_t escalar, point_t *P, mpi_free (zz); mpi_free (yy); mpi_free (xx); - mpi_free (p); mpi_free (three); mpi_free (two); mpi_free (one); @@ -1124,6 +1199,7 @@ generate_key (ECC_secret_key *sk, unsigned int nbits) elliptic_curve_t E; gcry_mpi_t d; point_t Q, G; + ecc_ctx_t pctx; err = generate_curve (nbits, &E); if (err) @@ -1137,7 +1213,9 @@ generate_key (ECC_secret_key *sk, unsigned int nbits) /* Compute Q. */ point_init (&Q); - escalar_mult (&Q, d, &E.G, &E); + pctx = ecc_ctx_init (E.p_, 0); + escalar_mult (&Q, d, &E.G, &E, pctx); + ecc_ctx_free (pctx); /* Copy the stuff to the key structures. */ sk->E.p_ = mpi_copy (E.p_); @@ -1228,6 +1306,7 @@ check_secret_key (ECC_secret_key * sk) { point_t Q; gcry_mpi_t y_2, y2 = mpi_alloc (0); + ecc_ctx_t pctx; /* ?primarity test of 'p' */ /* (...) //!! */ @@ -1255,12 +1334,14 @@ check_secret_key (ECC_secret_key * sk) /* Q=[n]G over E = PaI */ point_init (&Q); - escalar_mult (&Q, sk->E.n_, &sk->E.G, &sk->E); + pctx = ecc_ctx_init (sk->E.p_, 0); + escalar_mult (&Q, sk->E.n_, &sk->E.G, &sk->E, pctx); if (!point_at_infinity (Q)) { if (DBG_CIPHER) log_debug ("check_secret_key: E is not a curve of order n\n"); point_free (&Q); + ecc_ctx_free (pctx); return 1; } /* pubkey cannot be PaI */ @@ -1268,17 +1349,20 @@ check_secret_key (ECC_secret_key * sk) { if (DBG_CIPHER) log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + ecc_ctx_free (pctx); return (1); } /* pubkey = [d]G over E */ - escalar_mult (&Q, sk->d, &sk->E.G, &sk->E); + escalar_mult (&Q, sk->d, &sk->E.G, &sk->E, pctx); if ((Q.x_ == sk->Q.x_) && (Q.y_ == sk->Q.y_) && (Q.z_ == sk->Q.z_)) { if (DBG_CIPHER) log_debug ("Bad check: There is NO correspondence between 'd' and 'Q'!\n"); + ecc_ctx_free (pctx); return (1); } + ecc_ctx_free (pctx); point_free (&Q); return 0; } @@ -1380,36 +1464,35 @@ static gpg_err_code_t sign (gcry_mpi_t input, ECC_secret_key *skey, gcry_mpi_t r, gcry_mpi_t s) { gpg_err_code_t err = 0; - gcry_mpi_t k, i, dr, sum, k_1, x, y; - point_t G, I; - elliptic_curve_t E; - + gcry_mpi_t k, dr, sum, k_1, x, y; + point_t I; + ecc_ctx_t pctx; + k = NULL; - i = mpi_alloc (0); /* Fixme: we could do trivially without it. */ dr = mpi_alloc (0); sum = mpi_alloc (0); k_1 = mpi_alloc (0); x = mpi_alloc (0); y = mpi_alloc (0); - G = point_copy (skey->E.G); - E = curve_copy (skey->E); point_init (&I); mpi_set_ui (s, 0); mpi_set_ui (r, 0); + pctx = ecc_ctx_init (skey->E.p_, 0); + while (!mpi_cmp_ui (s, 0)) /* s == 0 */ { while (!mpi_cmp_ui (r, 0)) /* r == 0 */ { /* Note, that we are guaranteed to enter this loop at least - once because r has been intialized to 0. We casn use a - do_while because we want to keep the value of R value - even if S has to be recomputed. */ + once because r has been intialized to 0. We can't use a + do_while because we want to keep the value of R even if S + has to be recomputed. */ mpi_free (k); - k = gen_k (E.p_, 1); - escalar_mult (&I, k, &G, &E); /* I = [k]G */ - if (point_affine (&I, x, y, &E)) + k = gen_k (skey->E.p_, 1); /* fixme: shouldn't that be E.n ? */ + escalar_mult (&I, k, &skey->E.G, &skey->E, pctx); /* I = [k]G */ + if (point_affine (&I, x, y, &skey->E)) { if (DBG_CIPHER) log_debug ("ecc sign: Cannot turn to affine. " @@ -1417,24 +1500,22 @@ sign (gcry_mpi_t input, ECC_secret_key *skey, gcry_mpi_t r, gcry_mpi_t s) err = GPG_ERR_BAD_SIGNATURE; goto leave; } - mpi_set (i, x); /* i = I_x */ - mpi_mod (r, i, E.n_); /* r = i mod n */ + mpi_mod (r, x, skey->E.n_); /* r = x mod n */ } - mpi_mulm (dr, skey->d, r, E.n_); /* dr = d*r mod n */ - mpi_addm (sum, input, dr, E.n_); /* sum = hash + (d*r) mod n */ - mpi_invm (k_1, k, E.n_); /* k_1 = k^(-1) mod n */ - mpi_mulm (s, k_1, sum, E.n_); /* s = k^(-1)*(hash+(d*r)) mod n */ + mpi_mulm (dr, skey->d, r, skey->E.n_);/* dr = d*r mod n */ + mpi_addm (sum, input, dr, skey->E.n_);/* sum = hash + (d*r) mod n */ + mpi_invm (k_1, k, skey->E.n_); /* k_1 = k^(-1) mod n */ + mpi_mulm (s, k_1, sum, skey->E.n_); /* s = k^(-1)*(hash+(d*r)) mod n */ } - /* Fixme: What about releasing G and E? Why do we need copies at all? */ leave: + ecc_ctx_free (pctx); point_free (&I); mpi_free (y); mpi_free (x); mpi_free (k_1); mpi_free (sum); mpi_free (dr); - mpi_free (i); mpi_free (k); return err; @@ -1448,25 +1529,13 @@ verify (gcry_mpi_t input, ECC_public_key *pkey, gcry_mpi_t r, gcry_mpi_t s) { gpg_err_code_t err = 0; gcry_mpi_t h, h1, h2, x, y; - point_t Q, Q1, Q2, G; - elliptic_curve_t E; + point_t Q, Q1, Q2; + ecc_ctx_t pctx; - /* Check that the input parameters are valid. */ - { - gcry_mpi_t r_ = mpi_alloc_like (r); - gcry_mpi_t s_ = mpi_alloc_like (s); - mpi_mod (r_, r, pkey->E.n_); /* r = r mod E_n */ - mpi_mod (s_, s, pkey->E.n_); /* s = s mod E_n */ - err = (mpi_cmp (r_, r) || mpi_cmp (s_, s)); - mpi_free (r_); - mpi_free (s_); - if (err) - { - if (DBG_CIPHER) - log_debug ("ecc verification: No valid values.\n"); - return GPG_ERR_BAD_SIGNATURE; - } - } + if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, pkey->E.n_) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, pkey->E.n_) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ h = mpi_alloc (0); h1 = mpi_alloc (0); @@ -1476,15 +1545,21 @@ verify (gcry_mpi_t input, ECC_public_key *pkey, gcry_mpi_t r, gcry_mpi_t s) point_init (&Q); point_init (&Q1); point_init (&Q2); - G = point_copy (pkey->E.G); /* Fixme: We don't need the copy. */ - E = curve_copy (pkey->E); /* Fixme: We don't need the copy. */ - mpi_invm (h, s, E.n_); /* h = s^(-1) (mod n) */ - mpi_mulm (h1, input, h, E.n_); /* h1 = hash * s^(-1) (mod n) */ - escalar_mult (&Q1, h1, &G, &E); /* Q1 = [ hash * s^(-1) ]G */ - mpi_mulm (h2, r, h, E.n_); /* h2 = r * s^(-1) (mod n) */ - escalar_mult (&Q2, h2, &pkey->Q, &E); /* Q2 = [ r * s^(-1) ]Q */ - sum_points (&Q, &Q1, &Q2, &E);/* Q = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */ + pctx = ecc_ctx_init (pkey->E.p_, 0); + + /* h = s^(-1) (mod n) */ + mpi_invm (h, s, pkey->E.n_); + /* h1 = hash * s^(-1) (mod n) */ + mpi_mulm (h1, input, h, pkey->E.n_); + /* Q1 = [ hash * s^(-1) ]G */ + escalar_mult (&Q1, h1, &pkey->E.G, &pkey->E, pctx ); + /* h2 = r * s^(-1) (mod n) */ + mpi_mulm (h2, r, h, pkey->E.n_); + /* Q2 = [ r * s^(-1) ]Q */ + escalar_mult (&Q2, h2, &pkey->Q, &pkey->E, pctx); + /* Q = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */ + sum_points (&Q, &Q1, &Q2, &pkey->E, pctx); if (point_at_infinity (Q)) { @@ -1493,14 +1568,14 @@ verify (gcry_mpi_t input, ECC_public_key *pkey, gcry_mpi_t r, gcry_mpi_t s) err = GPG_ERR_BAD_SIGNATURE; goto leave; } - if (point_affine (&Q, x, y, &E)) + if (point_affine (&Q, x, y, &pkey->E)) { if (DBG_CIPHER) log_debug ("ecc verification: Cannot turn to affine. Rejected.\n"); err = GPG_ERR_BAD_SIGNATURE; goto leave; } - mpi_mod (x, x, E.n_); /* x = x mod E_n */ + mpi_mod (x, x, pkey->E.n_); /* x = x mod E_n */ if (mpi_cmp (x, r)) /* x != r */ { if (DBG_CIPHER) @@ -1512,8 +1587,7 @@ verify (gcry_mpi_t input, ECC_public_key *pkey, gcry_mpi_t r, gcry_mpi_t s) log_debug ("ecc verification: Accepted.\n"); leave: - curve_free (&E); - point_free (&G); + ecc_ctx_free (pctx); point_free (&Q2); point_free (&Q1); point_free (&Q); diff --git a/mpi/ChangeLog b/mpi/ChangeLog index 1bb6cd1c..fac7e0ce 100644 --- a/mpi/ChangeLog +++ b/mpi/ChangeLog @@ -1,3 +1,13 @@ +2007-03-23 Werner Koch <wk@g10code.com> + + * mpi-bit.c (_gcry_mpi_lshift_limbs): Assign AP after the resize. + + * mpi-div.c (gcry_mpi_mod, _gcry_mpi_mod): Moved to .. + * mpi-mod.c: .. new file. + (_gcry_mpi_barrett_init, _gcry_mpi_barrett_free): New. + (_gcry_mpi_mod_barrett): New. + (_gcry_mpi_mul_barrett): New. + 2007-03-22 Werner Koch <wk@g10code.com> * mpi-div.c (_gcry_mpi_mod): New. diff --git a/mpi/Makefile.am b/mpi/Makefile.am index b6213341..8ee15b78 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -168,6 +168,7 @@ libmpi_la_SOURCES = longlong.h \ mpi-inline.c \ mpi-inv.c \ mpi-mul.c \ + mpi-mod.c \ mpi-pow.c \ mpi-mpow.c \ mpi-scan.c \ diff --git a/mpi/mpi-bit.c b/mpi/mpi-bit.c index fe4895dc..b60e2bfb 100644 --- a/mpi/mpi-bit.c +++ b/mpi/mpi-bit.c @@ -279,7 +279,7 @@ gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n ) void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count ) { - mpi_ptr_t ap = a->d; + mpi_ptr_t ap; int n = a->nlimbs; int i; @@ -288,6 +288,7 @@ _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count ) RESIZE_IF_NEEDED( a, n+count ); + ap = a->d; for( i = n-1; i >= 0; i-- ) ap[i+count] = ap[i]; for(i=0; i < count; i++ ) diff --git a/mpi/mpi-div.c b/mpi/mpi-div.c index 022cde8d..0d8a2d16 100644 --- a/mpi/mpi-div.c +++ b/mpi/mpi-div.c @@ -355,17 +355,4 @@ gcry_mpi_div (gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t d } -void -gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor) -{ - _gcry_mpi_fdiv_r (rem, dividend, divisor); - rem->sign = 0; -} - -void -_gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor) -{ - _gcry_mpi_fdiv_r (rem, dividend, divisor); - rem->sign = 0; -} diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h index d78c1809..f9c1f9d4 100644 --- a/mpi/mpi-internal.h +++ b/mpi/mpi-internal.h @@ -175,6 +175,9 @@ void _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs ); void _gcry_mpi_assign_limb_space( gcry_mpi_t a, mpi_ptr_t ap, unsigned nlimbs ); /*-- mpi-bit.c --*/ +#define mpi_rshift_limbs(a,n) _gcry_mpi_rshift_limbs ((a), (n)) +#define mpi_lshift_limbs(a,n) _gcry_mpi_lshift_limbs ((a), (n)) + void _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count ); void _gcry_mpi_lshift_limbs( gcry_mpi_t a, unsigned int count ); diff --git a/mpi/mpi-mod.c b/mpi/mpi-mod.c new file mode 100644 index 00000000..72eeea04 --- /dev/null +++ b/mpi/mpi-mod.c @@ -0,0 +1,194 @@ +/* mpi-mod.c - Modular reduction + Copyright (C) 1998, 1999, 2001, 2002, 2003, + 2007 Free Software Foundation, Inc. + + This file is part of Libgcrypt. + + Libgcrypt is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of + the License, or (at your option) any later version. + + Libgcrypt is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + USA. */ + + +#include <config.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> + +#include "mpi-internal.h" +#include "longlong.h" +#include "g10lib.h" + + +/* Context used with Barrett reduction. */ +struct barrett_ctx_s +{ + gcry_mpi_t m; /* The modulus - may not be modified. */ + int m_copied; /* If true, M needs to be released. */ + int k; + gcry_mpi_t y; + gcry_mpi_t r1; /* Helper MPI. */ + gcry_mpi_t r2; /* Helper MPI. */ + gcry_mpi_t r3; /* Helper MPI allocated on demand. */ +}; + + + +void +_gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor) +{ + _gcry_mpi_fdiv_r (rem, dividend, divisor); + rem->sign = 0; +} + +void +gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor) +{ + _gcry_mpi_fdiv_r (rem, dividend, divisor); + rem->sign = 0; +} + + + +/* This function returns a new context for Barrett based operations on + the modulus M. This context needs to be released using + _gcry_mpi_barrett_free. If COPY is true M will be transferred to + the context and the user may change M. If COPY is false, M may not + be changed until gcry_mpi_barrett_free has been called. */ +mpi_barrett_t +_gcry_mpi_barrett_init (gcry_mpi_t m, int copy) +{ + mpi_barrett_t ctx; + gcry_mpi_t tmp; + + mpi_normalize (m); + ctx = gcry_xcalloc (1, sizeof *ctx); + + if (copy) + { + ctx->m = mpi_copy (m); + ctx->m_copied = 1; + } + else + ctx->m = m; + + ctx->k = mpi_get_nlimbs (m); + tmp = mpi_alloc (ctx->k + 1); + + /* Barrett precalculation: y = floor(b^(2k) / m). */ + mpi_set_ui (tmp, 1); + mpi_lshift_limbs (tmp, 2 * ctx->k); + mpi_fdiv_q (tmp, tmp, m); + + ctx->y = tmp; + ctx->r1 = mpi_alloc ( 2 * ctx->k + 1 ); + ctx->r2 = mpi_alloc ( 2 * ctx->k + 1 ); + + return ctx; +} + +void +_gcry_mpi_barrett_free (mpi_barrett_t ctx) +{ + if (ctx) + { + mpi_free (ctx->y); + mpi_free (ctx->r1); + mpi_free (ctx->r2); + if (ctx->r3) + mpi_free (ctx->r3); + if (ctx->m_copied) + mpi_free (ctx->m); + gcry_free (ctx); + } +} + + +/* R = X mod M + + Using Barrett reduction. Before using this function + _gcry_mpi_barrett_init must have been called to do the + precalculations. CTX is the context created by this precalculation + and also conveys M. If the Barret reduction could no be done a + starightforward reduction method is used. + + We assume that these conditions are met: + Input: x =(x_2k-1 ...x_0)_b + m =(m_k-1 ....m_0)_b with m_k-1 != 0 + Output: r = x mod m + */ +void +_gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, mpi_barrett_t ctx) +{ + gcry_mpi_t m = ctx->m; + int k = ctx->k; + gcry_mpi_t y = ctx->y; + gcry_mpi_t r1 = ctx->r1; + gcry_mpi_t r2 = ctx->r2; + + mpi_normalize (x); + if (mpi_get_nlimbs (x) > 2*k ) + { + mpi_mod (r, x, m); + return; + } + + /* 1. q1 = floor( x / b^k-1) + * q2 = q1 * y + * q3 = floor( q2 / b^k+1 ) + * Actually, we don't need qx, we can work direct on r2 + */ + mpi_set ( r2, x ); + mpi_rshift_limbs ( r2, k-1 ); + mpi_mul ( r2, r2, y ); + mpi_rshift_limbs ( r2, k+1 ); + + /* 2. r1 = x mod b^k+1 + * r2 = q3 * m mod b^k+1 + * r = r1 - r2 + * 3. if r < 0 then r = r + b^k+1 + */ + mpi_set ( r1, x ); + if ( r1->nlimbs > k+1 ) /* Quick modulo operation. */ + r1->nlimbs = k+1; + mpi_mul ( r2, r2, m ); + if ( r2->nlimbs > k+1 ) /* Quick modulo operation. */ + r2->nlimbs = k+1; + mpi_sub ( r, r1, r2 ); + + if ( mpi_is_neg( r ) ) + { + if (!ctx->r3) + { + ctx->r3 = mpi_alloc ( k + 2 ); + mpi_set_ui (ctx->r3, 1); + mpi_lshift_limbs (ctx->r3, k + 1 ); + } + mpi_add ( r, r, ctx->r3 ); + } + + /* 4. while r >= m do r = r - m */ + while ( mpi_cmp( r, m ) >= 0 ) + mpi_sub ( r, r, m ); + +} + + +void +_gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, + mpi_barrett_t ctx) +{ + gcry_mpi_mul (w, u, v); + mpi_mod_barrett (w, w, ctx); +} + @@ -162,7 +162,6 @@ void _gcry_mpi_set_buffer ( gcry_mpi_t a, const void *buffer, #define mpi_tdiv_qr(a,b,c,d) _gcry_mpi_tdiv_qr((a),(b),(c),(d)) #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c)) #define mpi_divisible_ui(a,b) _gcry_mpi_divisible_ui((a),(b)) -#define mpi_mod(r,a,m) _gcry_mpi_mod ((r), (a), (m)) ulong _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor ); void _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor ); @@ -172,8 +171,29 @@ void _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den); void _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count ); int _gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor ); + + +/*-- mpi-mod.c --*/ +#define mpi_mod(r,a,m) _gcry_mpi_mod ((r), (a), (m)) +#define mpi_barrett_init(m,f) _gcry_mpi_barrett_init ((m),(f)) +#define mpi_barrett_free(c) _gcry_mpi_barrett_free ((c)) +#define mpi_mod_barrett(r,a,c) _gcry_mpi_mod_barrett ((r), (a), (c)) +#define mpi_mul_barrett(r,u,v,c) _gcry_mpi_mul_barrett ((r), (u), (v), (c)) + void _gcry_mpi_mod (gcry_mpi_t r, gcry_mpi_t dividend, gcry_mpi_t divisor); +/* Context used with Barrett reduction. */ +struct barrett_ctx_s; +typedef struct barrett_ctx_s *mpi_barrett_t; + +mpi_barrett_t _gcry_mpi_barrett_init (gcry_mpi_t m, int copy); +void _gcry_mpi_barrett_free (mpi_barrett_t ctx); +void _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, mpi_barrett_t ctx); +void _gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, + mpi_barrett_t ctx); + + + /*-- mpi-gcd.c --*/ /*-- mpi-mpow.c --*/ diff --git a/tests/benchmark.c b/tests/benchmark.c index 9a5dd9fc..5cdf4f0e 100644 --- a/tests/benchmark.c +++ b/tests/benchmark.c @@ -640,7 +640,7 @@ ecc_bench (void) { #if USE_ECC gpg_error_t err; - int p_sizes[] = { 192, 256, 521 }; + int p_sizes[] = { 192, 256, 384 /*, 521*/ }; int testno; |