summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cipher/sha1.c2
-rw-r--r--cipher/sha256.c4
-rw-r--r--cipher/sha512.c4
-rw-r--r--src/g10lib.h21
-rw-r--r--src/hwf-x86.c34
-rw-r--r--src/hwfeatures.c27
6 files changed, 62 insertions, 30 deletions
diff --git a/cipher/sha1.c b/cipher/sha1.c
index eb428835..554d55ce 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -136,7 +136,7 @@ sha1_init (void *context, unsigned int flags)
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
#endif
#ifdef USE_BMI2
hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2);
diff --git a/cipher/sha256.c b/cipher/sha256.c
index 59ffa434..63869d54 100644
--- a/cipher/sha256.c
+++ b/cipher/sha256.c
@@ -124,7 +124,7 @@ sha256_init (void *context, unsigned int flags)
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
#endif
#ifdef USE_AVX2
hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
@@ -162,7 +162,7 @@ sha224_init (void *context, unsigned int flags)
#ifdef USE_AVX
/* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
* Therefore use this implementation on Intel CPUs only. */
- hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+ hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
#endif
#ifdef USE_AVX2
hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
diff --git a/cipher/sha512.c b/cipher/sha512.c
index 029f8f02..4be1cab2 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -154,7 +154,7 @@ sha512_init (void *context, unsigned int flags)
ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+ ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
#endif
#ifdef USE_AVX2
ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
@@ -194,7 +194,7 @@ sha384_init (void *context, unsigned int flags)
ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
#endif
#ifdef USE_AVX
- ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU);
+ ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD);
#endif
#ifdef USE_AVX2
ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2);
diff --git a/src/g10lib.h b/src/g10lib.h
index d1f94268..a579e945 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -197,16 +197,17 @@ int _gcry_log_verbosity( int level );
#define HWF_PADLOCK_SHA 4
#define HWF_PADLOCK_MMUL 8
-#define HWF_INTEL_CPU 16
-#define HWF_INTEL_BMI2 32
-#define HWF_INTEL_SSSE3 64
-#define HWF_INTEL_PCLMUL 128
-#define HWF_INTEL_AESNI 256
-#define HWF_INTEL_RDRAND 512
-#define HWF_INTEL_AVX 1024
-#define HWF_INTEL_AVX2 2048
-
-#define HWF_ARM_NEON 4096
+#define HWF_INTEL_CPU 16
+#define HWF_INTEL_FAST_SHLD 32
+#define HWF_INTEL_BMI2 64
+#define HWF_INTEL_SSSE3 128
+#define HWF_INTEL_PCLMUL 256
+#define HWF_INTEL_AESNI 512
+#define HWF_INTEL_RDRAND 1024
+#define HWF_INTEL_AVX 2048
+#define HWF_INTEL_AVX2 4096
+
+#define HWF_ARM_NEON 8192
gpg_err_code_t _gcry_disable_hw_feature (const char *name);
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 399952c4..fbd63315 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -174,6 +174,7 @@ detect_x86_gnuc (void)
unsigned int features;
unsigned int os_supports_avx_avx2_registers = 0;
unsigned int max_cpuid_level;
+ unsigned int fms, family, model;
unsigned int result = 0;
(void)os_supports_avx_avx2_registers;
@@ -236,8 +237,37 @@ detect_x86_gnuc (void)
/* Detect Intel features, that might also be supported by other
vendors. */
- /* Get CPU info and Intel feature flags (ECX). */
- get_cpuid(1, NULL, NULL, &features, NULL);
+ /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX). */
+ get_cpuid(1, &fms, NULL, &features, NULL);
+
+ family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
+ model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
+
+ if ((result & HWF_INTEL_CPU) && family == 6)
+ {
+ /* These Intel Core processor models have SHLD/SHRD instruction that
+ * can do integer rotation faster actual ROL/ROR instructions. */
+ switch (model)
+ {
+ case 0x2A:
+ case 0x2D:
+ case 0x3A:
+ case 0x3C:
+ case 0x3F:
+ case 0x45:
+ case 0x46:
+ case 0x3D:
+ case 0x4F:
+ case 0x56:
+ case 0x47:
+ case 0x4E:
+ case 0x5E:
+ case 0x55:
+ case 0x66:
+ result |= HWF_INTEL_FAST_SHLD;
+ break;
+ }
+ }
#ifdef ENABLE_PCLMUL_SUPPORT
/* Test bit 1 for PCLMUL. */
diff --git a/src/hwfeatures.c b/src/hwfeatures.c
index 58099c49..e7c55cc3 100644
--- a/src/hwfeatures.c
+++ b/src/hwfeatures.c
@@ -42,19 +42,20 @@ static struct
const char *desc;
} hwflist[] =
{
- { HWF_PADLOCK_RNG, "padlock-rng" },
- { HWF_PADLOCK_AES, "padlock-aes" },
- { HWF_PADLOCK_SHA, "padlock-sha" },
- { HWF_PADLOCK_MMUL,"padlock-mmul"},
- { HWF_INTEL_CPU, "intel-cpu" },
- { HWF_INTEL_BMI2, "intel-bmi2" },
- { HWF_INTEL_SSSE3, "intel-ssse3" },
- { HWF_INTEL_PCLMUL,"intel-pclmul" },
- { HWF_INTEL_AESNI, "intel-aesni" },
- { HWF_INTEL_RDRAND,"intel-rdrand" },
- { HWF_INTEL_AVX, "intel-avx" },
- { HWF_INTEL_AVX2, "intel-avx2" },
- { HWF_ARM_NEON, "arm-neon" }
+ { HWF_PADLOCK_RNG, "padlock-rng" },
+ { HWF_PADLOCK_AES, "padlock-aes" },
+ { HWF_PADLOCK_SHA, "padlock-sha" },
+ { HWF_PADLOCK_MMUL, "padlock-mmul"},
+ { HWF_INTEL_CPU, "intel-cpu" },
+ { HWF_INTEL_FAST_SHLD, "intel-fast-shld" },
+ { HWF_INTEL_BMI2, "intel-bmi2" },
+ { HWF_INTEL_SSSE3, "intel-ssse3" },
+ { HWF_INTEL_PCLMUL, "intel-pclmul" },
+ { HWF_INTEL_AESNI, "intel-aesni" },
+ { HWF_INTEL_RDRAND, "intel-rdrand" },
+ { HWF_INTEL_AVX, "intel-avx" },
+ { HWF_INTEL_AVX2, "intel-avx2" },
+ { HWF_ARM_NEON, "arm-neon" }
};
/* A bit vector with the hardware features which shall not be used.