diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-08-31 12:48:30 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-08-31 13:31:34 +0300 |
commit | 03da7f8ba3ec24d4639a2bcebbc0d9d831734c08 (patch) | |
tree | a540051b35fe502c4dde4a9adc2e3e7b45950921 /cipher/sha512.c | |
parent | 9c95be105f518d18407115c2c06893857c24b116 (diff) | |
download | libgcrypt-03da7f8ba3ec24d4639a2bcebbc0d9d831734c08.tar.gz |
sha512: reduce stack use in transform function by 512 bytes
* cipher/sha512.c (transform): Change 'u64 w[80]' to 'u64 w[16]' and
inline input expansion to first 64 rounds.
(sha512_write, sha512_final): Reduce burn_stack depth by 512 bytes.
--
The input expansion to w[] array can be inlined with rounds and size of array
reduced from u64[80] to u64[16]. On Cortex-A8, this change gives small boost,
possibly thanks to reduced burn_stack depth.
New vs old (tests/benchmark md sha512 sha384):
SHA512 1.09x 1.11x 1.06x 1.09x 1.08x
SHA384 1.09x 1.11x 1.06x 1.09x 1.09x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r-- | cipher/sha512.c | 191 |
1 files changed, 173 insertions, 18 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c index 2163e600..1bbcd111 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -135,7 +135,7 @@ static void transform (SHA512_CONTEXT *hd, const unsigned char *data) { u64 a, b, c, d, e, f, g, h; - u64 w[80]; + u64 w[16]; int t; static const u64 k[] = { @@ -215,11 +215,8 @@ transform (SHA512_CONTEXT *hd, const unsigned char *data) #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) - for (t = 16; t < 80; t++) - w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16]; - - for (t = 0; t < 80; ) + for (t = 0; t < 80 - 16; ) { u64 t1, t2; @@ -232,7 +229,125 @@ transform (SHA512_CONTEXT *hd, const unsigned char *data) Unrolled with inline: 330ms */ #if 0 /* Not unrolled. */ - t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; + w[t%16] += S1 (w[(t - 2)%16]) + w[(t - 7)%16] + S0 (w[(t - 15)%16]); + t2 = Sum0 (a) + Maj (a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + t++; +#else /* Unrolled to interweave the chain variables. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; + w[0] += S1 (w[14]) + w[9] + S0 (w[1]); + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; + w[1] += S1 (w[15]) + w[10] + S0 (w[2]); + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; + w[2] += S1 (w[0]) + w[11] + S0 (w[3]); + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; + w[3] += S1 (w[1]) + w[12] + S0 (w[4]); + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; + w[4] += S1 (w[2]) + w[13] + S0 (w[5]); + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; + w[5] += S1 (w[3]) + w[14] + S0 (w[6]); + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; + w[6] += S1 (w[4]) + w[15] + S0 (w[7]); + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; + w[7] += S1 (w[5]) + w[0] + S0 (w[8]); + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; + w[8] += S1 (w[6]) + w[1] + S0 (w[9]); + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; + w[9] += S1 (w[7]) + w[2] + S0 (w[10]); + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; + w[10] += S1 (w[8]) + w[3] + S0 (w[11]); + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; + w[11] += S1 (w[9]) + w[4] + S0 (w[12]); + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; + w[12] += S1 (w[10]) + w[5] + S0 (w[13]); + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; + w[13] += S1 (w[11]) + w[6] + S0 (w[14]); + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; + w[14] += S1 (w[12]) + w[7] + S0 (w[15]); + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; + w[15] += S1 (w[13]) + w[8] + S0 (w[0]); + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t += 16; +#endif + } + + for (; t < 80; ) + { + u64 t1, t2; + +#if 0 /* Not unrolled. */ + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t%16]; t2 = Sum0 (a) + Maj (a, b, c); h = g; g = f; @@ -244,47 +359,87 @@ transform (SHA512_CONTEXT *hd, const unsigned char *data) a = t1 + t2; t++; #else /* Unrolled to interweave the chain variables. */ - t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0]; + t2 = Sum0 (a) + Maj (a, b, c); + d += t1; + h = t1 + t2; + + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1]; + t2 = Sum0 (h) + Maj (h, a, b); + c += t1; + g = t1 + t2; + + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2]; + t2 = Sum0 (g) + Maj (g, h, a); + b += t1; + f = t1 + t2; + + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3]; + t2 = Sum0 (f) + Maj (f, g, h); + a += t1; + e = t1 + t2; + + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4]; + t2 = Sum0 (e) + Maj (e, f, g); + h += t1; + d = t1 + t2; + + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5]; + t2 = Sum0 (d) + Maj (d, e, f); + g += t1; + c = t1 + t2; + + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6]; + t2 = Sum0 (c) + Maj (c, d, e); + f += t1; + b = t1 + t2; + + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7]; + t2 = Sum0 (b) + Maj (b, c, d); + e += t1; + a = t1 + t2; + + t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8]; t2 = Sum0 (a) + Maj (a, b, c); d += t1; h = t1 + t2; - t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[t+1]; + t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9]; t2 = Sum0 (h) + Maj (h, a, b); c += t1; g = t1 + t2; - t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[t+2]; + t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10]; t2 = Sum0 (g) + Maj (g, h, a); b += t1; f = t1 + t2; - t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[t+3]; + t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11]; t2 = Sum0 (f) + Maj (f, g, h); a += t1; e = t1 + t2; - t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[t+4]; + t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12]; t2 = Sum0 (e) + Maj (e, f, g); h += t1; d = t1 + t2; - t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[t+5]; + t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13]; t2 = Sum0 (d) + Maj (d, e, f); g += t1; c = t1 + t2; - t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[t+6]; + t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14]; t2 = Sum0 (c) + Maj (c, d, e); f += t1; b = t1 + t2; - t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[t+7]; + t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15]; t2 = Sum0 (b) + Maj (b, c, d); e += t1; a = t1 + t2; - t += 8; + t += 16; #endif } @@ -312,7 +467,7 @@ sha512_write (void *context, const void *inbuf_arg, size_t inlen) if (hd->count == 128) { /* flush the buffer */ transform (hd, hd->buf); - _gcry_burn_stack (768); + _gcry_burn_stack (256); hd->count = 0; hd->nblocks++; } @@ -335,7 +490,7 @@ sha512_write (void *context, const void *inbuf_arg, size_t inlen) inlen -= 128; inbuf += 128; } - _gcry_burn_stack (768); + _gcry_burn_stack (256); for (; inlen && hd->count < 128; inlen--) hd->buf[hd->count++] = *inbuf++; } @@ -405,7 +560,7 @@ sha512_final (void *context) hd->buf[126] = lsb >> 8; hd->buf[127] = lsb; transform (hd, hd->buf); - _gcry_burn_stack (768); + _gcry_burn_stack (256); p = hd->buf; #ifdef WORDS_BIGENDIAN |