diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2016-03-12 17:07:21 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2016-03-12 17:16:41 +0200 |
commit | 5d601dd57fcb41aa2015ab655fd6fc51537da667 (patch) | |
tree | f607471051b8cec46a8421e1d59a66c48aa72d4c /cipher/crc.c | |
parent | a8b803d9e4bea2b779385ec9e9a579acc64431e9 (diff) | |
download | libgcrypt-5d601dd57fcb41aa2015ab655fd6fc51537da667.tar.gz |
Add Intel PCLMUL implementations of CRC algorithms
* cipher/Makefile.am: Add 'crc-intel-pclmul.c'.
* cipher/crc-intel-pclmul.c: New.
* cipher/crc.c (USE_INTEL_PCLMUL): New macro.
(CRC_CONTEXT) [USE_INTEL_PCLMUL]: Add 'use_pclmul'.
[USE_INTEL_PCLMUL] (_gcry_crc32_intel_pclmul)
(gcry_crc24rfc2440_intel_pclmul): New.
(crc32_init, crc32rfc1510_init, crc24rfc2440_init)
[USE_INTEL_PCLMUL]: Select PCLMUL implementation if SSE4.1 and PCLMUL
HW features detected.
(crc32_write, crc24rfc2440_write) [USE_INTEL_PCLMUL]: Use PCLMUL
implementation if enabled.
(crc24_init): Document storage format of 24-bit CRC.
(crc24_next4): Use only 'data' for last table look-up.
* configure.ac: Add 'crc-intel-pclmul.lo'.
* src/g10lib.h (HWF_*, HWF_INTEL_SSE4_1): Update HWF flags to include
Intel SSE4.1.
* src/hwf-x86.c (detect_x86_gnuc): Add SSE4.1 detection.
* src/hwfeatures.c (hwflist): Add 'intel-sse4.1'.
* tests/basic.c (fillbuf_count): New.
(check_one_md): Add "?" check (million byte data-set with byte pattern
0x00,0x01,0x02,...); Test all buffer sizes 1 to 1000, for "!" and "?"
checks.
(check_one_md_multi): Skip "?".
(check_digests): Add "?" test-vectors for MD5, SHA1, SHA224, SHA256,
SHA384, SHA512, SHA3_224, SHA3_256, SHA3_384, SHA3_512, RIPEMD160,
CRC32, CRC32_RFC1510, CRC24_RFC2440, TIGER1 and WHIRLPOOL; Add "!"
test-vectors for CRC32_RFC1510 and CRC24_RFC2440.
--
Add Intel PCLMUL accelerated implmentations of CRC algorithms.
CRC performance is improved ~11x on x86_64 and i386 on Intel
Haswell, and ~2.7x on Intel Sandy-bridge.
Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.865 ns/B 1103.0 MiB/s 2.77 c/B
CRC32RFC1510 | 0.865 ns/B 1102.7 MiB/s 2.77 c/B
CRC24RFC2440 | 0.865 ns/B 1103.0 MiB/s 2.77 c/B
After:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.079 ns/B 12051.7 MiB/s 0.253 c/B
CRC32RFC1510 | 0.079 ns/B 12050.6 MiB/s 0.253 c/B
CRC24RFC2440 | 0.079 ns/B 12100.0 MiB/s 0.252 c/B
Benchmark on Intel Core i5-4570 (i386, 3.2 Ghz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.860 ns/B 1109.0 MiB/s 2.75 c/B
CRC32RFC1510 | 0.861 ns/B 1108.3 MiB/s 2.75 c/B
CRC24RFC2440 | 0.860 ns/B 1108.6 MiB/s 2.75 c/B
After:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.078 ns/B 12207.0 MiB/s 0.250 c/B
CRC32RFC1510 | 0.078 ns/B 12207.0 MiB/s 0.250 c/B
CRC24RFC2440 | 0.080 ns/B 11925.6 MiB/s 0.256 c/B
Benchmark on Intel Core i5-2450M (x86_64, 2.5 Ghz):
Before:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 1.25 ns/B 762.3 MiB/s 3.13 c/B
CRC32RFC1510 | 1.26 ns/B 759.1 MiB/s 3.14 c/B
CRC24RFC2440 | 1.25 ns/B 764.9 MiB/s 3.12 c/B
After:
| nanosecs/byte mebibytes/sec cycles/byte
CRC32 | 0.451 ns/B 2114.3 MiB/s 1.13 c/B
CRC32RFC1510 | 0.451 ns/B 2114.6 MiB/s 1.13 c/B
CRC24RFC2440 | 0.457 ns/B 2085.0 MiB/s 1.14 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/crc.c')
-rw-r--r-- | cipher/crc.c | 59 |
1 files changed, 57 insertions, 2 deletions
diff --git a/cipher/crc.c b/cipher/crc.c index 46a185a8..ee0e4e2e 100644 --- a/cipher/crc.c +++ b/cipher/crc.c @@ -31,14 +31,37 @@ #include "bufhelp.h" +/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL + * code. */ +#undef USE_INTEL_PCLMUL +#ifdef ENABLE_PCLMUL_SUPPORT +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* USE_INTEL_PCLMUL */ + + typedef struct { u32 CRC; +#ifdef USE_INTEL_PCLMUL + unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */ +#endif byte buf[4]; } CRC_CONTEXT; +#ifdef USE_INTEL_PCLMUL +/*-- crc-intel-pclmul.c --*/ +void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + + /* * Code generated by universal_crc by Danjel McGougan * @@ -338,6 +361,11 @@ static void crc32_init (void *context, unsigned int flags) { CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif (void)flags; @@ -351,6 +379,14 @@ crc32_write (void *context, const void *inbuf_arg, size_t inlen) const byte *inbuf = inbuf_arg; u32 crc; +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif + if (!inbuf || !inlen) return; @@ -403,6 +439,11 @@ static void crc32rfc1510_init (void *context, unsigned int flags) { CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif (void)flags; @@ -694,7 +735,8 @@ static const u32 crc24_table[1024] = static inline u32 crc24_init (void) { - return 0xce04b7; + /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */ + return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */ } static inline @@ -713,7 +755,7 @@ u32 crc24_next4 (u32 crc, u32 data) crc = crc24_table[(crc & 0xff) + 0x300] ^ crc24_table[((crc >> 8) & 0xff) + 0x200] ^ crc24_table[((crc >> 16) & 0xff) + 0x100] ^ - crc24_table[(crc >> 24) & 0xff]; + crc24_table[(data >> 24) & 0xff]; return crc; } @@ -727,6 +769,11 @@ static void crc24rfc2440_init (void *context, unsigned int flags) { CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; +#ifdef USE_INTEL_PCLMUL + u32 hwf = _gcry_get_hw_features (); + + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif (void)flags; @@ -740,6 +787,14 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen) CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; u32 crc; +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif + if (!inbuf || !inlen) return; |