summaryrefslogtreecommitdiff
path: root/src/hwf-x86.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-10-23 22:39:47 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-10-28 20:08:54 +0200
commit909644ef5883927262366c356eed530e55aba478 (patch)
tree71c03dfeae98a5a7ab1118663a877cd3941f1dba /src/hwf-x86.c
parent16fd540f4d01eb6dc23d9509ae549353617c7a67 (diff)
downloadlibgcrypt-909644ef5883927262366c356eed530e55aba478.tar.gz
hwf-x86: add detection for Intel CPUs with fast SHLD instruction
* cipher/sha1.c (sha1_init): Use HWF_INTEL_FAST_SHLD instead of HWF_INTEL_CPU. * cipher/sha256.c (sha256_init, sha224_init): Ditto. * cipher/sha512.c (sha512_init, sha384_init): Ditto. * src/g10lib.h (HWF_INTEL_FAST_SHLD): New. (HWF_INTEL_BMI2, HWF_INTEL_SSSE3, HWF_INTEL_PCLMUL, HWF_INTEL_AESNI) (HWF_INTEL_RDRAND, HWF_INTEL_AVX, HWF_INTEL_AVX2) (HWF_ARM_NEON): Update. * src/hwf-x86.c (detect_x86_gnuc): Add detection of Intel Core CPUs with fast SHLD/SHRD instruction. * src/hwfeatures.c (hwflist): Add "intel-fast-shld". -- Intel Core CPUs since codename sandy-bridge have been able to execute SHLD/SHRD instructions faster than rotate instructions ROL/ROR. Since SHLD/SHRD can be used to do rotation, some optimized implementations (SHA1/SHA256/SHA512) use SHLD/SHRD instructions in-place of ROL/ROR. This patch provides more accurate detection of CPUs with fast SHLD implementation. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'src/hwf-x86.c')
-rw-r--r--src/hwf-x86.c34
1 files changed, 32 insertions, 2 deletions
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 399952c4..fbd63315 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -174,6 +174,7 @@ detect_x86_gnuc (void)
unsigned int features;
unsigned int os_supports_avx_avx2_registers = 0;
unsigned int max_cpuid_level;
+ unsigned int fms, family, model;
unsigned int result = 0;
(void)os_supports_avx_avx2_registers;
@@ -236,8 +237,37 @@ detect_x86_gnuc (void)
/* Detect Intel features, that might also be supported by other
vendors. */
- /* Get CPU info and Intel feature flags (ECX). */
- get_cpuid(1, NULL, NULL, &features, NULL);
+ /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX). */
+ get_cpuid(1, &fms, NULL, &features, NULL);
+
+ family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
+ model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
+
+ if ((result & HWF_INTEL_CPU) && family == 6)
+ {
+ /* These Intel Core processor models have SHLD/SHRD instruction that
+ * can do integer rotation faster actual ROL/ROR instructions. */
+ switch (model)
+ {
+ case 0x2A:
+ case 0x2D:
+ case 0x3A:
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ case 0x3D:
+ case 0x4F:
+ case 0x56:
+ case 0x47:
+ case 0x4E:
+ case 0x5E:
+ case 0x55:
+ case 0x66:
+ result |= HWF_INTEL_FAST_SHLD;
+ break;
+ }
+ }
#ifdef ENABLE_PCLMUL_SUPPORT
/* Test bit 1 for PCLMUL. */