From 909644ef5883927262366c356eed530e55aba478 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 23 Oct 2015 22:39:47 +0300 Subject: hwf-x86: add detection for Intel CPUs with fast SHLD instruction * cipher/sha1.c (sha1_init): Use HWF_INTEL_FAST_SHLD instead of HWF_INTEL_CPU. * cipher/sha256.c (sha256_init, sha224_init): Ditto. * cipher/sha512.c (sha512_init, sha384_init): Ditto. * src/g10lib.h (HWF_INTEL_FAST_SHLD): New. (HWF_INTEL_BMI2, HWF_INTEL_SSSE3, HWF_INTEL_PCLMUL, HWF_INTEL_AESNI) (HWF_INTEL_RDRAND, HWF_INTEL_AVX, HWF_INTEL_AVX2) (HWF_ARM_NEON): Update. * src/hwf-x86.c (detect_x86_gnuc): Add detection of Intel Core CPUs with fast SHLD/SHRD instruction. * src/hwfeatures.c (hwflist): Add "intel-fast-shld". -- Intel Core CPUs since codename sandy-bridge have been able to execute SHLD/SHRD instructions faster than rotate instructions ROL/ROR. Since SHLD/SHRD can be used to do rotation, some optimized implementations (SHA1/SHA256/SHA512) use SHLD/SHRD instructions in-place of ROL/ROR. This patch provides more accurate detection of CPUs with fast SHLD implementation. Signed-off-by: Jussi Kivilinna --- cipher/sha1.c | 2 +- cipher/sha256.c | 4 ++-- cipher/sha512.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'cipher') diff --git a/cipher/sha1.c b/cipher/sha1.c index eb428835..554d55ce 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -136,7 +136,7 @@ sha1_init (void *context, unsigned int flags) #ifdef USE_AVX /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. * Therefore use this implementation on Intel CPUs only. */ - hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_BMI2 hd->use_bmi2 = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2); diff --git a/cipher/sha256.c b/cipher/sha256.c index 59ffa434..63869d54 100644 --- a/cipher/sha256.c +++ b/cipher/sha256.c @@ -124,7 +124,7 @@ sha256_init (void *context, unsigned int flags) #ifdef USE_AVX /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. * Therefore use this implementation on Intel CPUs only. */ - hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); @@ -162,7 +162,7 @@ sha224_init (void *context, unsigned int flags) #ifdef USE_AVX /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs. * Therefore use this implementation on Intel CPUs only. */ - hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); + hd->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 hd->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); diff --git a/cipher/sha512.c b/cipher/sha512.c index 029f8f02..4be1cab2 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -154,7 +154,7 @@ sha512_init (void *context, unsigned int flags) ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX - ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); + ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); @@ -194,7 +194,7 @@ sha384_init (void *context, unsigned int flags) ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; #endif #ifdef USE_AVX - ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_CPU); + ctx->use_avx = (features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD); #endif #ifdef USE_AVX2 ctx->use_avx2 = (features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2); -- cgit v1.2.1