diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-11 19:32:08 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-12-12 23:41:37 +0200 |
commit | e1a3931263e67aacec3c0bfcaa86c7d1441d5c6a (patch) | |
tree | e38a42a3222c07e83bf1e438e4d02092392fa221 /cipher/sha256.c | |
parent | 5e1239b1e2948211ff2675f45cce2b28c3379cfb (diff) | |
download | libgcrypt-e1a3931263e67aacec3c0bfcaa86c7d1441d5c6a.tar.gz |
SHA-256: Add SSSE3 implementation for x86-64
* cipher/Makefile.am: Add 'sha256-ssse3-amd64.S'.
* cipher/sha256-ssse3-amd64.S: New.
* cipher/sha256.c (USE_SSSE3): New.
(SHA256_CONTEXT) [USE_SSSE3]: Add 'use_ssse3'.
(sha256_init, sha224_init) [USE_SSSE3]: Initialize 'use_ssse3'.
(transform): Rename to...
(_transform): This.
[USE_SSSE3] (_gcry_sha256_transform_amd64_ssse3): New.
(transform): New.
* configure.ac (HAVE_INTEL_SYNTAX_PLATFORM_AS): New check.
(sha256): Add 'sha256-ssse3-amd64.lo'.
* doc/gcrypt.texi: Document 'intel-ssse3'.
* src/g10lib.h (HWF_INTEL_SSSE3): New.
* src/hwfeatures.c (hwflist): Add "intel-ssse3".
* src/hwf-x86.c (detect_x86_gnuc): Test for SSSE3.
--
Patch adds fast SSSE3 implementation of SHA-256 by Intel Corporation. The
assembly source is licensed under 3-clause BSD license, thus compatible
with LGPL2.1+. Original source can be accessed at:
http://www.intel.com/p/en_US/embedded/hwsw/technology/packet-processing#docs
Implementation is described in white paper
"Fast SHA - 256 Implementations on IntelĀ® Architecture Processors"
http://www.intel.com/content/www/us/en/intelligent-systems/intel-technology/sha-256-implementations-paper.html
Benchmarks:
cpu Old New Diff
Intel i5-4570 13.99 c/B 10.66 c/B 1.31x
Intel i5-2450M 21.53 c/B 15.79 c/B 1.36x
Intel Core2 T8100 20.84 c/B 15.07 c/B 1.38x
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha256.c')
-rw-r--r-- | cipher/sha256.c | 45 |
1 files changed, 44 insertions, 1 deletions
diff --git a/cipher/sha256.c b/cipher/sha256.c index bd5a4128..f3c1d62c 100644 --- a/cipher/sha256.c +++ b/cipher/sha256.c @@ -46,11 +46,25 @@ #include "cipher.h" #include "hash-common.h" + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +# define USE_SSSE3 1 +#endif + + typedef struct { gcry_md_block_ctx_t bctx; u32 h0,h1,h2,h3,h4,h5,h6,h7; +#ifdef USE_SSSE3 + unsigned int use_ssse3:1; +#endif } SHA256_CONTEXT; + static unsigned int transform (void *c, const unsigned char *data); @@ -74,6 +88,10 @@ sha256_init (void *context) hd->bctx.count = 0; hd->bctx.blocksize = 64; hd->bctx.bwrite = transform; + +#ifdef USE_SSSE3 + hd->use_ssse3 = (_gcry_get_hw_features () & HWF_INTEL_SSSE3) != 0; +#endif } @@ -96,6 +114,10 @@ sha224_init (void *context) hd->bctx.count = 0; hd->bctx.blocksize = 64; hd->bctx.bwrite = transform; + +#ifdef USE_SSSE3 + hd->use_ssse3 = (_gcry_get_hw_features () & HWF_INTEL_SSSE3) != 0; +#endif } @@ -148,7 +170,7 @@ Sum1 (u32 x) static unsigned int -transform (void *ctx, const unsigned char *data) +_transform (void *ctx, const unsigned char *data) { SHA256_CONTEXT *hd = ctx; static const u32 K[64] = { @@ -254,6 +276,27 @@ transform (void *ctx, const unsigned char *data) #undef R +#ifdef USE_SSSE3 +unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, + u32 state[8], size_t num_blks); +#endif + + +static unsigned int +transform (void *ctx, const unsigned char *data) +{ + SHA256_CONTEXT *hd = ctx; + +#ifdef USE_SSSE3 + if (hd->use_ssse3) + return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, 1) + + 4 * sizeof(void*); +#endif + + return _transform (hd, data); +} + + /* The routine finally terminates the computation and returns the digest. The handle is prepared for a new cycle, but adding bytes |