summaryrefslogtreecommitdiff
path: root/cipher/crc.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2016-03-12 17:07:21 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2016-03-12 17:16:41 +0200
commit5d601dd57fcb41aa2015ab655fd6fc51537da667 (patch)
treef607471051b8cec46a8421e1d59a66c48aa72d4c /cipher/crc.c
parenta8b803d9e4bea2b779385ec9e9a579acc64431e9 (diff)
downloadlibgcrypt-5d601dd57fcb41aa2015ab655fd6fc51537da667.tar.gz
Add Intel PCLMUL implementations of CRC algorithms
* cipher/Makefile.am: Add 'crc-intel-pclmul.c'. * cipher/crc-intel-pclmul.c: New. * cipher/crc.c (USE_INTEL_PCLMUL): New macro. (CRC_CONTEXT) [USE_INTEL_PCLMUL]: Add 'use_pclmul'. [USE_INTEL_PCLMUL] (_gcry_crc32_intel_pclmul) (gcry_crc24rfc2440_intel_pclmul): New. (crc32_init, crc32rfc1510_init, crc24rfc2440_init) [USE_INTEL_PCLMUL]: Select PCLMUL implementation if SSE4.1 and PCLMUL HW features detected. (crc32_write, crc24rfc2440_write) [USE_INTEL_PCLMUL]: Use PCLMUL implementation if enabled. (crc24_init): Document storage format of 24-bit CRC. (crc24_next4): Use only 'data' for last table look-up. * configure.ac: Add 'crc-intel-pclmul.lo'. * src/g10lib.h (HWF_*, HWF_INTEL_SSE4_1): Update HWF flags to include Intel SSE4.1. * src/hwf-x86.c (detect_x86_gnuc): Add SSE4.1 detection. * src/hwfeatures.c (hwflist): Add 'intel-sse4.1'. * tests/basic.c (fillbuf_count): New. (check_one_md): Add "?" check (million byte data-set with byte pattern 0x00,0x01,0x02,...); Test all buffer sizes 1 to 1000, for "!" and "?" checks. (check_one_md_multi): Skip "?". (check_digests): Add "?" test-vectors for MD5, SHA1, SHA224, SHA256, SHA384, SHA512, SHA3_224, SHA3_256, SHA3_384, SHA3_512, RIPEMD160, CRC32, CRC32_RFC1510, CRC24_RFC2440, TIGER1 and WHIRLPOOL; Add "!" test-vectors for CRC32_RFC1510 and CRC24_RFC2440. -- Add Intel PCLMUL accelerated implmentations of CRC algorithms. CRC performance is improved ~11x on x86_64 and i386 on Intel Haswell, and ~2.7x on Intel Sandy-bridge. Benchmark on Intel Core i5-4570 (x86_64, 3.2 Ghz): Before: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.865 ns/B 1103.0 MiB/s 2.77 c/B CRC32RFC1510 | 0.865 ns/B 1102.7 MiB/s 2.77 c/B CRC24RFC2440 | 0.865 ns/B 1103.0 MiB/s 2.77 c/B After: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.079 ns/B 12051.7 MiB/s 0.253 c/B CRC32RFC1510 | 0.079 ns/B 12050.6 MiB/s 0.253 c/B CRC24RFC2440 | 0.079 ns/B 12100.0 MiB/s 0.252 c/B Benchmark on Intel Core i5-4570 (i386, 3.2 Ghz): Before: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.860 ns/B 1109.0 MiB/s 2.75 c/B CRC32RFC1510 | 0.861 ns/B 1108.3 MiB/s 2.75 c/B CRC24RFC2440 | 0.860 ns/B 1108.6 MiB/s 2.75 c/B After: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.078 ns/B 12207.0 MiB/s 0.250 c/B CRC32RFC1510 | 0.078 ns/B 12207.0 MiB/s 0.250 c/B CRC24RFC2440 | 0.080 ns/B 11925.6 MiB/s 0.256 c/B Benchmark on Intel Core i5-2450M (x86_64, 2.5 Ghz): Before: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 1.25 ns/B 762.3 MiB/s 3.13 c/B CRC32RFC1510 | 1.26 ns/B 759.1 MiB/s 3.14 c/B CRC24RFC2440 | 1.25 ns/B 764.9 MiB/s 3.12 c/B After: | nanosecs/byte mebibytes/sec cycles/byte CRC32 | 0.451 ns/B 2114.3 MiB/s 1.13 c/B CRC32RFC1510 | 0.451 ns/B 2114.6 MiB/s 1.13 c/B CRC24RFC2440 | 0.457 ns/B 2085.0 MiB/s 1.14 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/crc.c')
-rw-r--r--cipher/crc.c59
1 files changed, 57 insertions, 2 deletions
diff --git a/cipher/crc.c b/cipher/crc.c
index 46a185a8..ee0e4e2e 100644
--- a/cipher/crc.c
+++ b/cipher/crc.c
@@ -31,14 +31,37 @@
#include "bufhelp.h"
+/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL
+ * code. */
+#undef USE_INTEL_PCLMUL
+#ifdef ENABLE_PCLMUL_SUPPORT
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+# if __GNUC__ >= 4
+# define USE_INTEL_PCLMUL 1
+# endif
+# endif
+#endif /* USE_INTEL_PCLMUL */
+
+
typedef struct
{
u32 CRC;
+#ifdef USE_INTEL_PCLMUL
+ unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */
+#endif
byte buf[4];
}
CRC_CONTEXT;
+#ifdef USE_INTEL_PCLMUL
+/*-- crc-intel-pclmul.c --*/
+void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf,
+ size_t inlen);
+#endif
+
+
/*
* Code generated by universal_crc by Danjel McGougan
*
@@ -338,6 +361,11 @@ static void
crc32_init (void *context, unsigned int flags)
{
CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+#ifdef USE_INTEL_PCLMUL
+ u32 hwf = _gcry_get_hw_features ();
+
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
(void)flags;
@@ -351,6 +379,14 @@ crc32_write (void *context, const void *inbuf_arg, size_t inlen)
const byte *inbuf = inbuf_arg;
u32 crc;
+#ifdef USE_INTEL_PCLMUL
+ if (ctx->use_pclmul)
+ {
+ _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+
if (!inbuf || !inlen)
return;
@@ -403,6 +439,11 @@ static void
crc32rfc1510_init (void *context, unsigned int flags)
{
CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+#ifdef USE_INTEL_PCLMUL
+ u32 hwf = _gcry_get_hw_features ();
+
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
(void)flags;
@@ -694,7 +735,8 @@ static const u32 crc24_table[1024] =
static inline
u32 crc24_init (void)
{
- return 0xce04b7;
+ /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */
+ return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */
}
static inline
@@ -713,7 +755,7 @@ u32 crc24_next4 (u32 crc, u32 data)
crc = crc24_table[(crc & 0xff) + 0x300] ^
crc24_table[((crc >> 8) & 0xff) + 0x200] ^
crc24_table[((crc >> 16) & 0xff) + 0x100] ^
- crc24_table[(crc >> 24) & 0xff];
+ crc24_table[(data >> 24) & 0xff];
return crc;
}
@@ -727,6 +769,11 @@ static void
crc24rfc2440_init (void *context, unsigned int flags)
{
CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+#ifdef USE_INTEL_PCLMUL
+ u32 hwf = _gcry_get_hw_features ();
+
+ ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
(void)flags;
@@ -740,6 +787,14 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen)
CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
u32 crc;
+#ifdef USE_INTEL_PCLMUL
+ if (ctx->use_pclmul)
+ {
+ _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen);
+ return;
+ }
+#endif
+
if (!inbuf || !inlen)
return;