diff options
author | Andrei Scherer <andsch@inbox.com> | 2014-08-28 09:45:35 -0800 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2014-10-04 14:49:57 +0300 |
commit | 30bd759f398f45b04d0a783b875f59ce9bd1e51d (patch) | |
tree | 19c2f40d3cf8b8efc7e8f412f7cd84e4b14b8513 /cipher/rmd160.c | |
parent | 0ecd136a6ca02252f63ad229fa5240897bfe6544 (diff) | |
download | libgcrypt-30bd759f398f45b04d0a783b875f59ce9bd1e51d.tar.gz |
Improved ripemd160 performance
* cipher/rmd160.c (transform): Interleave the left and right lane
rounds to introduce more instruction level parallelism.
--
The benchmarks on different systems:
Intel(R) Atom(TM) CPU N570 @ 1.66GHz
before:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
RIPEMD160 | 13.07 ns/B 72.97 MiB/s - c/B
after:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
RIPEMD160 | 11.37 ns/B 83.84 MiB/s - c/B
Intel(R) Core(TM) i5-4670 CPU @ 3.40GHz
before:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
RIPEMD160 | 3.31 ns/B 288.0 MiB/s - c/B
after:
Hash:
| nanosecs/byte mebibytes/sec cycles/byte
RIPEMD160 | 2.08 ns/B 458.5 MiB/s - c/B
Signed-off-by: Andrei Scherer <andsch@inbox.com>
Diffstat (limited to 'cipher/rmd160.c')
-rw-r--r-- | cipher/rmd160.c | 367 |
1 files changed, 178 insertions, 189 deletions
diff --git a/cipher/rmd160.c b/cipher/rmd160.c index 2aba0feb..e6d02f54 100644 --- a/cipher/rmd160.c +++ b/cipher/rmd160.c @@ -178,8 +178,7 @@ static unsigned int transform_blk ( void *ctx, const unsigned char *data ) { RMD160_CONTEXT *hd = ctx; - register u32 a,b,c,d,e; - u32 aa,bb,cc,dd,ee,t; + register u32 al, ar, bl, br, cl, cr, dl, dr, el, er; u32 x[16]; int i; @@ -201,196 +200,186 @@ transform_blk ( void *ctx, const unsigned char *data ) #define F2(x,y,z) ( ((x) | ~(y)) ^ (z) ) #define F3(x,y,z) ( ((x) & (z)) | ((y) & ~(z)) ) #define F4(x,y,z) ( (x) ^ ((y) | ~(z)) ) -#define R(a,b,c,d,e,f,k,r,s) do { t = a + f(b,c,d) + k + x[r]; \ - a = rol(t,s) + e; \ +#define R(a,b,c,d,e,f,k,r,s) do { a += f(b,c,d) + k + x[r]; \ + a = rol(a,s) + e; \ c = rol(c,10); \ } while(0) - /* left lane */ - a = hd->h0; - b = hd->h1; - c = hd->h2; - d = hd->h3; - e = hd->h4; - R( a, b, c, d, e, F0, K0, 0, 11 ); - R( e, a, b, c, d, F0, K0, 1, 14 ); - R( d, e, a, b, c, F0, K0, 2, 15 ); - R( c, d, e, a, b, F0, K0, 3, 12 ); - R( b, c, d, e, a, F0, K0, 4, 5 ); - R( a, b, c, d, e, F0, K0, 5, 8 ); - R( e, a, b, c, d, F0, K0, 6, 7 ); - R( d, e, a, b, c, F0, K0, 7, 9 ); - R( c, d, e, a, b, F0, K0, 8, 11 ); - R( b, c, d, e, a, F0, K0, 9, 13 ); - R( a, b, c, d, e, F0, K0, 10, 14 ); - R( e, a, b, c, d, F0, K0, 11, 15 ); - R( d, e, a, b, c, F0, K0, 12, 6 ); - R( c, d, e, a, b, F0, K0, 13, 7 ); - R( b, c, d, e, a, F0, K0, 14, 9 ); - R( a, b, c, d, e, F0, K0, 15, 8 ); - R( e, a, b, c, d, F1, K1, 7, 7 ); - R( d, e, a, b, c, F1, K1, 4, 6 ); - R( c, d, e, a, b, F1, K1, 13, 8 ); - R( b, c, d, e, a, F1, K1, 1, 13 ); - R( a, b, c, d, e, F1, K1, 10, 11 ); - R( e, a, b, c, d, F1, K1, 6, 9 ); - R( d, e, a, b, c, F1, K1, 15, 7 ); - R( c, d, e, a, b, F1, K1, 3, 15 ); - R( b, c, d, e, a, F1, K1, 12, 7 ); - R( a, b, c, d, e, F1, K1, 0, 12 ); - R( e, a, b, c, d, F1, K1, 9, 15 ); - R( d, e, a, b, c, F1, K1, 5, 9 ); - R( c, d, e, a, b, F1, K1, 2, 11 ); - R( b, c, d, e, a, F1, K1, 14, 7 ); - R( a, b, c, d, e, F1, K1, 11, 13 ); - R( e, a, b, c, d, F1, K1, 8, 12 ); - R( d, e, a, b, c, F2, K2, 3, 11 ); - R( c, d, e, a, b, F2, K2, 10, 13 ); - R( b, c, d, e, a, F2, K2, 14, 6 ); - R( a, b, c, d, e, F2, K2, 4, 7 ); - R( e, a, b, c, d, F2, K2, 9, 14 ); - R( d, e, a, b, c, F2, K2, 15, 9 ); - R( c, d, e, a, b, F2, K2, 8, 13 ); - R( b, c, d, e, a, F2, K2, 1, 15 ); - R( a, b, c, d, e, F2, K2, 2, 14 ); - R( e, a, b, c, d, F2, K2, 7, 8 ); - R( d, e, a, b, c, F2, K2, 0, 13 ); - R( c, d, e, a, b, F2, K2, 6, 6 ); - R( b, c, d, e, a, F2, K2, 13, 5 ); - R( a, b, c, d, e, F2, K2, 11, 12 ); - R( e, a, b, c, d, F2, K2, 5, 7 ); - R( d, e, a, b, c, F2, K2, 12, 5 ); - R( c, d, e, a, b, F3, K3, 1, 11 ); - R( b, c, d, e, a, F3, K3, 9, 12 ); - R( a, b, c, d, e, F3, K3, 11, 14 ); - R( e, a, b, c, d, F3, K3, 10, 15 ); - R( d, e, a, b, c, F3, K3, 0, 14 ); - R( c, d, e, a, b, F3, K3, 8, 15 ); - R( b, c, d, e, a, F3, K3, 12, 9 ); - R( a, b, c, d, e, F3, K3, 4, 8 ); - R( e, a, b, c, d, F3, K3, 13, 9 ); - R( d, e, a, b, c, F3, K3, 3, 14 ); - R( c, d, e, a, b, F3, K3, 7, 5 ); - R( b, c, d, e, a, F3, K3, 15, 6 ); - R( a, b, c, d, e, F3, K3, 14, 8 ); - R( e, a, b, c, d, F3, K3, 5, 6 ); - R( d, e, a, b, c, F3, K3, 6, 5 ); - R( c, d, e, a, b, F3, K3, 2, 12 ); - R( b, c, d, e, a, F4, K4, 4, 9 ); - R( a, b, c, d, e, F4, K4, 0, 15 ); - R( e, a, b, c, d, F4, K4, 5, 5 ); - R( d, e, a, b, c, F4, K4, 9, 11 ); - R( c, d, e, a, b, F4, K4, 7, 6 ); - R( b, c, d, e, a, F4, K4, 12, 8 ); - R( a, b, c, d, e, F4, K4, 2, 13 ); - R( e, a, b, c, d, F4, K4, 10, 12 ); - R( d, e, a, b, c, F4, K4, 14, 5 ); - R( c, d, e, a, b, F4, K4, 1, 12 ); - R( b, c, d, e, a, F4, K4, 3, 13 ); - R( a, b, c, d, e, F4, K4, 8, 14 ); - R( e, a, b, c, d, F4, K4, 11, 11 ); - R( d, e, a, b, c, F4, K4, 6, 8 ); - R( c, d, e, a, b, F4, K4, 15, 5 ); - R( b, c, d, e, a, F4, K4, 13, 6 ); - - aa = a; bb = b; cc = c; dd = d; ee = e; - - /* right lane */ - a = hd->h0; - b = hd->h1; - c = hd->h2; - d = hd->h3; - e = hd->h4; - R( a, b, c, d, e, F4, KK0, 5, 8); - R( e, a, b, c, d, F4, KK0, 14, 9); - R( d, e, a, b, c, F4, KK0, 7, 9); - R( c, d, e, a, b, F4, KK0, 0, 11); - R( b, c, d, e, a, F4, KK0, 9, 13); - R( a, b, c, d, e, F4, KK0, 2, 15); - R( e, a, b, c, d, F4, KK0, 11, 15); - R( d, e, a, b, c, F4, KK0, 4, 5); - R( c, d, e, a, b, F4, KK0, 13, 7); - R( b, c, d, e, a, F4, KK0, 6, 7); - R( a, b, c, d, e, F4, KK0, 15, 8); - R( e, a, b, c, d, F4, KK0, 8, 11); - R( d, e, a, b, c, F4, KK0, 1, 14); - R( c, d, e, a, b, F4, KK0, 10, 14); - R( b, c, d, e, a, F4, KK0, 3, 12); - R( a, b, c, d, e, F4, KK0, 12, 6); - R( e, a, b, c, d, F3, KK1, 6, 9); - R( d, e, a, b, c, F3, KK1, 11, 13); - R( c, d, e, a, b, F3, KK1, 3, 15); - R( b, c, d, e, a, F3, KK1, 7, 7); - R( a, b, c, d, e, F3, KK1, 0, 12); - R( e, a, b, c, d, F3, KK1, 13, 8); - R( d, e, a, b, c, F3, KK1, 5, 9); - R( c, d, e, a, b, F3, KK1, 10, 11); - R( b, c, d, e, a, F3, KK1, 14, 7); - R( a, b, c, d, e, F3, KK1, 15, 7); - R( e, a, b, c, d, F3, KK1, 8, 12); - R( d, e, a, b, c, F3, KK1, 12, 7); - R( c, d, e, a, b, F3, KK1, 4, 6); - R( b, c, d, e, a, F3, KK1, 9, 15); - R( a, b, c, d, e, F3, KK1, 1, 13); - R( e, a, b, c, d, F3, KK1, 2, 11); - R( d, e, a, b, c, F2, KK2, 15, 9); - R( c, d, e, a, b, F2, KK2, 5, 7); - R( b, c, d, e, a, F2, KK2, 1, 15); - R( a, b, c, d, e, F2, KK2, 3, 11); - R( e, a, b, c, d, F2, KK2, 7, 8); - R( d, e, a, b, c, F2, KK2, 14, 6); - R( c, d, e, a, b, F2, KK2, 6, 6); - R( b, c, d, e, a, F2, KK2, 9, 14); - R( a, b, c, d, e, F2, KK2, 11, 12); - R( e, a, b, c, d, F2, KK2, 8, 13); - R( d, e, a, b, c, F2, KK2, 12, 5); - R( c, d, e, a, b, F2, KK2, 2, 14); - R( b, c, d, e, a, F2, KK2, 10, 13); - R( a, b, c, d, e, F2, KK2, 0, 13); - R( e, a, b, c, d, F2, KK2, 4, 7); - R( d, e, a, b, c, F2, KK2, 13, 5); - R( c, d, e, a, b, F1, KK3, 8, 15); - R( b, c, d, e, a, F1, KK3, 6, 5); - R( a, b, c, d, e, F1, KK3, 4, 8); - R( e, a, b, c, d, F1, KK3, 1, 11); - R( d, e, a, b, c, F1, KK3, 3, 14); - R( c, d, e, a, b, F1, KK3, 11, 14); - R( b, c, d, e, a, F1, KK3, 15, 6); - R( a, b, c, d, e, F1, KK3, 0, 14); - R( e, a, b, c, d, F1, KK3, 5, 6); - R( d, e, a, b, c, F1, KK3, 12, 9); - R( c, d, e, a, b, F1, KK3, 2, 12); - R( b, c, d, e, a, F1, KK3, 13, 9); - R( a, b, c, d, e, F1, KK3, 9, 12); - R( e, a, b, c, d, F1, KK3, 7, 5); - R( d, e, a, b, c, F1, KK3, 10, 15); - R( c, d, e, a, b, F1, KK3, 14, 8); - R( b, c, d, e, a, F0, KK4, 12, 8); - R( a, b, c, d, e, F0, KK4, 15, 5); - R( e, a, b, c, d, F0, KK4, 10, 12); - R( d, e, a, b, c, F0, KK4, 4, 9); - R( c, d, e, a, b, F0, KK4, 1, 12); - R( b, c, d, e, a, F0, KK4, 5, 5); - R( a, b, c, d, e, F0, KK4, 8, 14); - R( e, a, b, c, d, F0, KK4, 7, 6); - R( d, e, a, b, c, F0, KK4, 6, 8); - R( c, d, e, a, b, F0, KK4, 2, 13); - R( b, c, d, e, a, F0, KK4, 13, 6); - R( a, b, c, d, e, F0, KK4, 14, 5); - R( e, a, b, c, d, F0, KK4, 0, 15); - R( d, e, a, b, c, F0, KK4, 3, 13); - R( c, d, e, a, b, F0, KK4, 9, 11); - R( b, c, d, e, a, F0, KK4, 11, 11); - - - t = hd->h1 + d + cc; - hd->h1 = hd->h2 + e + dd; - hd->h2 = hd->h3 + a + ee; - hd->h3 = hd->h4 + b + aa; - hd->h4 = hd->h0 + c + bb; - hd->h0 = t; - - return /*burn_stack*/ 108+5*sizeof(void*); + /* left lane and right lanes interleaved */ + al = ar = hd->h0; + bl = br = hd->h1; + cl = cr = hd->h2; + dl = dr = hd->h3; + el = er = hd->h4; + R( al, bl, cl, dl, el, F0, K0, 0, 11 ); + R( ar, br, cr, dr, er, F4, KK0, 5, 8); + R( el, al, bl, cl, dl, F0, K0, 1, 14 ); + R( er, ar, br, cr, dr, F4, KK0, 14, 9); + R( dl, el, al, bl, cl, F0, K0, 2, 15 ); + R( dr, er, ar, br, cr, F4, KK0, 7, 9); + R( cl, dl, el, al, bl, F0, K0, 3, 12 ); + R( cr, dr, er, ar, br, F4, KK0, 0, 11); + R( bl, cl, dl, el, al, F0, K0, 4, 5 ); + R( br, cr, dr, er, ar, F4, KK0, 9, 13); + R( al, bl, cl, dl, el, F0, K0, 5, 8 ); + R( ar, br, cr, dr, er, F4, KK0, 2, 15); + R( el, al, bl, cl, dl, F0, K0, 6, 7 ); + R( er, ar, br, cr, dr, F4, KK0, 11, 15); + R( dl, el, al, bl, cl, F0, K0, 7, 9 ); + R( dr, er, ar, br, cr, F4, KK0, 4, 5); + R( cl, dl, el, al, bl, F0, K0, 8, 11 ); + R( cr, dr, er, ar, br, F4, KK0, 13, 7); + R( bl, cl, dl, el, al, F0, K0, 9, 13 ); + R( br, cr, dr, er, ar, F4, KK0, 6, 7); + R( al, bl, cl, dl, el, F0, K0, 10, 14 ); + R( ar, br, cr, dr, er, F4, KK0, 15, 8); + R( el, al, bl, cl, dl, F0, K0, 11, 15 ); + R( er, ar, br, cr, dr, F4, KK0, 8, 11); + R( dl, el, al, bl, cl, F0, K0, 12, 6 ); + R( dr, er, ar, br, cr, F4, KK0, 1, 14); + R( cl, dl, el, al, bl, F0, K0, 13, 7 ); + R( cr, dr, er, ar, br, F4, KK0, 10, 14); + R( bl, cl, dl, el, al, F0, K0, 14, 9 ); + R( br, cr, dr, er, ar, F4, KK0, 3, 12); + R( al, bl, cl, dl, el, F0, K0, 15, 8 ); + R( ar, br, cr, dr, er, F4, KK0, 12, 6); + R( el, al, bl, cl, dl, F1, K1, 7, 7 ); + R( er, ar, br, cr, dr, F3, KK1, 6, 9); + R( dl, el, al, bl, cl, F1, K1, 4, 6 ); + R( dr, er, ar, br, cr, F3, KK1, 11, 13); + R( cl, dl, el, al, bl, F1, K1, 13, 8 ); + R( cr, dr, er, ar, br, F3, KK1, 3, 15); + R( bl, cl, dl, el, al, F1, K1, 1, 13 ); + R( br, cr, dr, er, ar, F3, KK1, 7, 7); + R( al, bl, cl, dl, el, F1, K1, 10, 11 ); + R( ar, br, cr, dr, er, F3, KK1, 0, 12); + R( el, al, bl, cl, dl, F1, K1, 6, 9 ); + R( er, ar, br, cr, dr, F3, KK1, 13, 8); + R( dl, el, al, bl, cl, F1, K1, 15, 7 ); + R( dr, er, ar, br, cr, F3, KK1, 5, 9); + R( cl, dl, el, al, bl, F1, K1, 3, 15 ); + R( cr, dr, er, ar, br, F3, KK1, 10, 11); + R( bl, cl, dl, el, al, F1, K1, 12, 7 ); + R( br, cr, dr, er, ar, F3, KK1, 14, 7); + R( al, bl, cl, dl, el, F1, K1, 0, 12 ); + R( ar, br, cr, dr, er, F3, KK1, 15, 7); + R( el, al, bl, cl, dl, F1, K1, 9, 15 ); + R( er, ar, br, cr, dr, F3, KK1, 8, 12); + R( dl, el, al, bl, cl, F1, K1, 5, 9 ); + R( dr, er, ar, br, cr, F3, KK1, 12, 7); + R( cl, dl, el, al, bl, F1, K1, 2, 11 ); + R( cr, dr, er, ar, br, F3, KK1, 4, 6); + R( bl, cl, dl, el, al, F1, K1, 14, 7 ); + R( br, cr, dr, er, ar, F3, KK1, 9, 15); + R( al, bl, cl, dl, el, F1, K1, 11, 13 ); + R( ar, br, cr, dr, er, F3, KK1, 1, 13); + R( el, al, bl, cl, dl, F1, K1, 8, 12 ); + R( er, ar, br, cr, dr, F3, KK1, 2, 11); + R( dl, el, al, bl, cl, F2, K2, 3, 11 ); + R( dr, er, ar, br, cr, F2, KK2, 15, 9); + R( cl, dl, el, al, bl, F2, K2, 10, 13 ); + R( cr, dr, er, ar, br, F2, KK2, 5, 7); + R( bl, cl, dl, el, al, F2, K2, 14, 6 ); + R( br, cr, dr, er, ar, F2, KK2, 1, 15); + R( al, bl, cl, dl, el, F2, K2, 4, 7 ); + R( ar, br, cr, dr, er, F2, KK2, 3, 11); + R( el, al, bl, cl, dl, F2, K2, 9, 14 ); + R( er, ar, br, cr, dr, F2, KK2, 7, 8); + R( dl, el, al, bl, cl, F2, K2, 15, 9 ); + R( dr, er, ar, br, cr, F2, KK2, 14, 6); + R( cl, dl, el, al, bl, F2, K2, 8, 13 ); + R( cr, dr, er, ar, br, F2, KK2, 6, 6); + R( bl, cl, dl, el, al, F2, K2, 1, 15 ); + R( br, cr, dr, er, ar, F2, KK2, 9, 14); + R( al, bl, cl, dl, el, F2, K2, 2, 14 ); + R( ar, br, cr, dr, er, F2, KK2, 11, 12); + R( el, al, bl, cl, dl, F2, K2, 7, 8 ); + R( er, ar, br, cr, dr, F2, KK2, 8, 13); + R( dl, el, al, bl, cl, F2, K2, 0, 13 ); + R( dr, er, ar, br, cr, F2, KK2, 12, 5); + R( cl, dl, el, al, bl, F2, K2, 6, 6 ); + R( cr, dr, er, ar, br, F2, KK2, 2, 14); + R( bl, cl, dl, el, al, F2, K2, 13, 5 ); + R( br, cr, dr, er, ar, F2, KK2, 10, 13); + R( al, bl, cl, dl, el, F2, K2, 11, 12 ); + R( ar, br, cr, dr, er, F2, KK2, 0, 13); + R( el, al, bl, cl, dl, F2, K2, 5, 7 ); + R( er, ar, br, cr, dr, F2, KK2, 4, 7); + R( dl, el, al, bl, cl, F2, K2, 12, 5 ); + R( dr, er, ar, br, cr, F2, KK2, 13, 5); + R( cl, dl, el, al, bl, F3, K3, 1, 11 ); + R( cr, dr, er, ar, br, F1, KK3, 8, 15); + R( bl, cl, dl, el, al, F3, K3, 9, 12 ); + R( br, cr, dr, er, ar, F1, KK3, 6, 5); + R( al, bl, cl, dl, el, F3, K3, 11, 14 ); + R( ar, br, cr, dr, er, F1, KK3, 4, 8); + R( el, al, bl, cl, dl, F3, K3, 10, 15 ); + R( er, ar, br, cr, dr, F1, KK3, 1, 11); + R( dl, el, al, bl, cl, F3, K3, 0, 14 ); + R( dr, er, ar, br, cr, F1, KK3, 3, 14); + R( cl, dl, el, al, bl, F3, K3, 8, 15 ); + R( cr, dr, er, ar, br, F1, KK3, 11, 14); + R( bl, cl, dl, el, al, F3, K3, 12, 9 ); + R( br, cr, dr, er, ar, F1, KK3, 15, 6); + R( al, bl, cl, dl, el, F3, K3, 4, 8 ); + R( ar, br, cr, dr, er, F1, KK3, 0, 14); + R( el, al, bl, cl, dl, F3, K3, 13, 9 ); + R( er, ar, br, cr, dr, F1, KK3, 5, 6); + R( dl, el, al, bl, cl, F3, K3, 3, 14 ); + R( dr, er, ar, br, cr, F1, KK3, 12, 9); + R( cl, dl, el, al, bl, F3, K3, 7, 5 ); + R( cr, dr, er, ar, br, F1, KK3, 2, 12); + R( bl, cl, dl, el, al, F3, K3, 15, 6 ); + R( br, cr, dr, er, ar, F1, KK3, 13, 9); + R( al, bl, cl, dl, el, F3, K3, 14, 8 ); + R( ar, br, cr, dr, er, F1, KK3, 9, 12); + R( el, al, bl, cl, dl, F3, K3, 5, 6 ); + R( er, ar, br, cr, dr, F1, KK3, 7, 5); + R( dl, el, al, bl, cl, F3, K3, 6, 5 ); + R( dr, er, ar, br, cr, F1, KK3, 10, 15); + R( cl, dl, el, al, bl, F3, K3, 2, 12 ); + R( cr, dr, er, ar, br, F1, KK3, 14, 8); + R( bl, cl, dl, el, al, F4, K4, 4, 9 ); + R( br, cr, dr, er, ar, F0, KK4, 12, 8); + R( al, bl, cl, dl, el, F4, K4, 0, 15 ); + R( ar, br, cr, dr, er, F0, KK4, 15, 5); + R( el, al, bl, cl, dl, F4, K4, 5, 5 ); + R( er, ar, br, cr, dr, F0, KK4, 10, 12); + R( dl, el, al, bl, cl, F4, K4, 9, 11 ); + R( dr, er, ar, br, cr, F0, KK4, 4, 9); + R( cl, dl, el, al, bl, F4, K4, 7, 6 ); + R( cr, dr, er, ar, br, F0, KK4, 1, 12); + R( bl, cl, dl, el, al, F4, K4, 12, 8 ); + R( br, cr, dr, er, ar, F0, KK4, 5, 5); + R( al, bl, cl, dl, el, F4, K4, 2, 13 ); + R( ar, br, cr, dr, er, F0, KK4, 8, 14); + R( el, al, bl, cl, dl, F4, K4, 10, 12 ); + R( er, ar, br, cr, dr, F0, KK4, 7, 6); + R( dl, el, al, bl, cl, F4, K4, 14, 5 ); + R( dr, er, ar, br, cr, F0, KK4, 6, 8); + R( cl, dl, el, al, bl, F4, K4, 1, 12 ); + R( cr, dr, er, ar, br, F0, KK4, 2, 13); + R( bl, cl, dl, el, al, F4, K4, 3, 13 ); + R( br, cr, dr, er, ar, F0, KK4, 13, 6); + R( al, bl, cl, dl, el, F4, K4, 8, 14 ); + R( ar, br, cr, dr, er, F0, KK4, 14, 5); + R( el, al, bl, cl, dl, F4, K4, 11, 11 ); + R( er, ar, br, cr, dr, F0, KK4, 0, 15); + R( dl, el, al, bl, cl, F4, K4, 6, 8 ); + R( dr, er, ar, br, cr, F0, KK4, 3, 13); + R( cl, dl, el, al, bl, F4, K4, 15, 5 ); + R( cr, dr, er, ar, br, F0, KK4, 9, 11); + R( bl, cl, dl, el, al, F4, K4, 13, 6 ); + R( br, cr, dr, er, ar, F0, KK4, 11, 11); + + dr += cl + hd->h1; + hd->h1 = hd->h2 + dl + er; + hd->h2 = hd->h3 + el + ar; + hd->h3 = hd->h4 + al + br; + hd->h4 = hd->h0 + bl + cr; + hd->h0 = dr; + + return /*burn_stack*/ 104+5*sizeof(void*); } |