summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-30 11:57:57 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-12-30 11:57:57 +0200
commitbbcb12187afb1756cb27296166b57fa19ee45d4d (patch)
treef45a1f9c932bbd4cf1620cf31f241ff5ec78f402
parentb7e814f93ee40fcfe17a187a8989c07fde2ba0cd (diff)
downloadlibgcrypt-bbcb12187afb1756cb27296166b57fa19ee45d4d.tar.gz
Fix buggy/incomplete detection of AVX/AVX2 support
* configure.ac: Also check for 'xgetbv' instruction in AVX and AVX2 inline assembly checks. * src/hwf-x86.c [__i386__] (get_xgetbv): New function. [__x86_64__] (get_xgetbv): New function. [HAS_X86_CPUID] (detect_x86_gnuc): Check for OSXSAVE and OS support for XMM&YMM registers and enable AVX/AVX2 only if XMM&YMM registers are supported by OS. -- This patch is based on original patch and bug report by Panagiotis Christopoulos: Adding better detection of AVX/AVX2 support After upgrading libgcrypt from 1.5.3 to 1.6.0 on a remote XEN system (linode) my gpg2 stopped working properly, throwing SIGILL signals when doing sha512 operations etc. I managed to debug this with the help of Doublas Freed (dwfreed at mtu.edu) and it seems that the current AVX detection just checks for bit 28 on cpuid but the check still works on systems that have disabled the avx/avx2 instructions for some reason (eg. performance/unstability) resulting in SIGILLs (eg. when trying _gcry_sha512_transform_amd64_avx() ). From Intel resources[1][2], I found additional checks for better AVX detection and applied them in the following patch. Please review/change accordingly and commit some better AVX detection mechanism. The AVX part is tested but could not test the AVX2 one, because I lack proper hardware. I can provide additional information upon request. Use the patch only as a guideline, as it's not thoroughly tested. [1] http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled [2] http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf (sections 14.3 and 14.7.1) Reported-by: Panagiotis Christopoulos (pchrist) <pchrist@gentoo.org> Cc: Doublas Freed <dwfreed@mtu.edu> Cc: Tim Harder <radhermit@gentoo.org> Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--configure.ac4
-rw-r--r--src/hwf-x86.c46
2 files changed, 47 insertions, 3 deletions
diff --git a/configure.ac b/configure.ac
index 27de850e..8b43d9a8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1033,7 +1033,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions],
[gcry_cv_gcc_inline_asm_avx=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[[void a(void) {
- __asm__("vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
+ __asm__("xgetbv; vaesdeclast (%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
}]])],
[gcry_cv_gcc_inline_asm_avx=yes])])
if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then
@@ -1050,7 +1050,7 @@ AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions],
[gcry_cv_gcc_inline_asm_avx2=no
AC_COMPILE_IFELSE([AC_LANG_SOURCE(
[[void a(void) {
- __asm__("vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
+ __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
}]])],
[gcry_cv_gcc_inline_asm_avx2=yes])])
if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 4e825588..0591b4fd 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -95,6 +95,21 @@ get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
if (edx)
*edx = regs[3];
}
+
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+
#endif /* i386 && GNUC */
@@ -129,6 +144,21 @@ get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
if (edx)
*edx = regs[3];
}
+
+static unsigned int
+get_xgetbv(void)
+{
+ unsigned int t_eax;
+
+ asm volatile
+ ("xgetbv\n\t"
+ : "=a" (t_eax)
+ : "c" (0)
+ );
+
+ return t_eax;
+}
+
#endif /* x86-64 && GNUC */
@@ -138,9 +168,12 @@ detect_x86_gnuc (void)
{
char vendor_id[12+1];
unsigned int features;
+ unsigned int os_supports_avx_avx2_registers = 0;
unsigned int max_cpuid_level;
unsigned int result = 0;
+ (void)os_supports_avx_avx2_registers;
+
if (!is_cpuid_available())
return 0;
@@ -215,10 +248,20 @@ detect_x86_gnuc (void)
if (features & 0x02000000)
result |= HWF_INTEL_AESNI;
#endif /*ENABLE_AESNI_SUPPORT*/
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+ /* Test bit 27 for OSXSAVE (required for AVX/AVX2). */
+ if (features & 0x08000000)
+ {
+ /* Check that OS has enabled both XMM and YMM state support. */
+ if ((get_xgetbv() & 0x6) == 0x6)
+ os_supports_avx_avx2_registers = 1;
+ }
+#endif
#ifdef ENABLE_AVX_SUPPORT
/* Test bit 28 for AVX. */
if (features & 0x10000000)
- result |= HWF_INTEL_AVX;
+ if (os_supports_avx_avx2_registers)
+ result |= HWF_INTEL_AVX;
#endif /*ENABLE_AVX_SUPPORT*/
#ifdef ENABLE_DRNG_SUPPORT
/* Test bit 30 for RDRAND. */
@@ -242,6 +285,7 @@ detect_x86_gnuc (void)
#ifdef ENABLE_AVX2_SUPPORT
/* Test bit 5 for AVX2. */
if (features & 0x00000020)
+ if (os_supports_avx_avx2_registers)
result |= HWF_INTEL_AVX2;
#endif /*ENABLE_AVX_SUPPORT*/
}