From 4e09aaa36d151c3312019724a77fc09aa345b82f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Wed, 29 Apr 2015 18:18:07 +0300 Subject: Enable AES/AES-NI, AES/SSSE3 and GCM/PCLMUL implementations on WIN64 * cipher/cipher-gcm-intel-pclmul.c (_gcry_ghash_intel_pclmul) ( _gcry_ghash_intel_pclmul) [__WIN64__]: Store non-volatile vector registers before use and restore after. * cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): Remove dependency on !defined(__WIN64__). * cipher/rijndael-aesni.c [__WIN64__] (aesni_prepare_2_6_variable, aesni_prepare, aesni_prepare_2_6, aesni_cleanup) ( aesni_cleanup_2_6): New. [!__WIN64__] (aesni_prepare_2_6_variable, aesni_prepare_2_6): New. (_gcry_aes_aesni_do_setkey, _gcry_aes_aesni_cbc_enc) (_gcry_aesni_ctr_enc, _gcry_aesni_cfb_dec, _gcry_aesni_cbc_dec) (_gcry_aesni_ocb_crypt, _gcry_aesni_ocb_auth): Use 'aesni_prepare_2_6'. * cipher/rijndael-internal.h (USE_SSSE3): Enable if HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS or HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS. (USE_AESNI): Remove dependency on !defined(__WIN64__) * cipher/rijndael-ssse3-amd64.c [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare, vpaes_ssse3_cleanup): New. [!HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (vpaes_ssse3_prepare): New. (vpaes_ssse3_prepare_enc, vpaes_ssse3_prepare_dec): Use 'vpaes_ssse3_prepare'. (_gcry_aes_ssse3_do_setkey, _gcry_aes_ssse3_prepare_decryption): Use 'vpaes_ssse3_prepare' and 'vpaes_ssse3_cleanup'. [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (X): Add masking macro to exclude '.type' and '.size' markers from assembly code, as they are not support on WIN64/COFF objects. * configure.ac (gcry_cv_gcc_attribute_ms_abi) (gcry_cv_gcc_attribute_sysv_abi, gcry_cv_gcc_default_abi_is_ms_abi) (gcry_cv_gcc_default_abi_is_sysv_abi) (gcry_cv_gcc_win64_platform_as_ok): New checks. -- Signed-off-by: Jussi Kivilinna --- cipher/cipher-gcm-intel-pclmul.c | 72 ++++++++++++++++++++++++++ cipher/cipher-internal.h | 4 +- cipher/rijndael-aesni.c | 73 +++++++++++++++++++++----- cipher/rijndael-internal.h | 9 ++-- cipher/rijndael-ssse3-amd64.c | 94 +++++++++++++++++++++++++++------- configure.ac | 108 +++++++++++++++++++++++++++++++++++++-- 6 files changed, 317 insertions(+), 43 deletions(-) diff --git a/cipher/cipher-gcm-intel-pclmul.c b/cipher/cipher-gcm-intel-pclmul.c index 79648ce9..a3272497 100644 --- a/cipher/cipher-gcm-intel-pclmul.c +++ b/cipher/cipher-gcm-intel-pclmul.c @@ -249,6 +249,17 @@ void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) { u64 tmp[2]; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[3 * 16]; + + /* XMM6-XMM8 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif /* Swap endianness of hsub. */ tmp[0] = buf_get_be64(c->u_mode.gcm.u_ghash_key.key + 8); @@ -285,6 +296,21 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) : [h_234] "r" (c->u_mode.gcm.gcm_table) : "memory"); +#ifdef __WIN64__ + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + : + : "r" (win64tmp) + : "memory"); +#else /* Clear used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" @@ -296,6 +322,7 @@ _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) "pxor %%xmm7, %%xmm7\n\t" "pxor %%xmm8, %%xmm8\n\t" ::: "cc" ); +#endif #endif wipememory (tmp, sizeof(tmp)); @@ -309,10 +336,30 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; +#ifdef __WIN64__ + char win64tmp[10 * 16]; +#endif if (nblocks == 0) return 0; +#ifdef __WIN64__ + /* XMM8-XMM15 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + "movdqu %%xmm9, 3*16(%0)\n\t" + "movdqu %%xmm10, 4*16(%0)\n\t" + "movdqu %%xmm11, 5*16(%0)\n\t" + "movdqu %%xmm12, 6*16(%0)\n\t" + "movdqu %%xmm13, 7*16(%0)\n\t" + "movdqu %%xmm14, 8*16(%0)\n\t" + "movdqu %%xmm15, 9*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory" ); +#endif + /* Preload hash and H1. */ asm volatile ("movdqu %[hash], %%xmm1\n\t" "movdqa %[hsub], %%xmm0\n\t" @@ -353,6 +400,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, } while (nblocks >= 4); +#ifndef __WIN64__ /* Clear used x86-64/XMM registers. */ asm volatile( "pxor %%xmm8, %%xmm8\n\t" "pxor %%xmm9, %%xmm9\n\t" @@ -363,6 +411,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, "pxor %%xmm14, %%xmm14\n\t" "pxor %%xmm15, %%xmm15\n\t" ::: "cc" ); +#endif } #endif @@ -385,6 +434,28 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, : [hash] "=m" (*result) : [be_mask] "m" (*be_mask)); +#ifdef __WIN64__ + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + "movdqu 3*16(%0), %%xmm9\n\t" + "movdqu 4*16(%0), %%xmm10\n\t" + "movdqu 5*16(%0), %%xmm11\n\t" + "movdqu 6*16(%0), %%xmm12\n\t" + "movdqu 7*16(%0), %%xmm13\n\t" + "movdqu 8*16(%0), %%xmm14\n\t" + "movdqu 9*16(%0), %%xmm15\n\t" + : + : "r" (win64tmp) + : "memory" ); +#else /* Clear used registers. */ asm volatile( "pxor %%xmm0, %%xmm0\n\t" "pxor %%xmm1, %%xmm1\n\t" @@ -395,6 +466,7 @@ _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, "pxor %%xmm6, %%xmm6\n\t" "pxor %%xmm7, %%xmm7\n\t" ::: "cc" ); +#endif return 0; } diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index 693f2189..e20ea562 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -67,9 +67,7 @@ #if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) # if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) # if __GNUC__ >= 4 -# ifndef __WIN64__ -# define GCM_USE_INTEL_PCLMUL 1 -# endif +# define GCM_USE_INTEL_PCLMUL 1 # endif # endif #endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/cipher/rijndael-aesni.c b/cipher/rijndael-aesni.c index 147679f7..910bc681 100644 --- a/cipher/rijndael-aesni.c +++ b/cipher/rijndael-aesni.c @@ -49,24 +49,54 @@ typedef struct u128_s { u32 a, b, c, d; } u128_t; the use of these macros. There purpose is to make sure that the SSE regsiters are cleared and won't reveal any information about the key or the data. */ -#define aesni_prepare() do { } while (0) -#define aesni_cleanup() \ - do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ - "pxor %%xmm1, %%xmm1\n" :: ); \ - } while (0) -#define aesni_cleanup_2_6() \ - do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ - "pxor %%xmm3, %%xmm3\n" \ - "pxor %%xmm4, %%xmm4\n" \ - "pxor %%xmm5, %%xmm5\n" \ - "pxor %%xmm6, %%xmm6\n":: ); \ - } while (0) - +#ifdef __WIN64__ +/* XMM6-XMM15 are callee-saved registers on WIN64. */ +# define aesni_prepare_2_6_variable char win64tmp[16] +# define aesni_prepare() do { } while (0) +# define aesni_prepare_2_6() \ + do { asm volatile ("movdqu %%xmm6, %0\n\t" \ + : "=m" (*win64tmp) \ + : \ + : "memory"); \ + } while (0) +# define aesni_cleanup() \ + do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ + "pxor %%xmm1, %%xmm1\n" :: ); \ + } while (0) +# define aesni_cleanup_2_6() \ + do { asm volatile ("movdqu %0, %%xmm6\n\t" \ + "pxor %%xmm2, %%xmm2\n" \ + "pxor %%xmm3, %%xmm3\n" \ + "pxor %%xmm4, %%xmm4\n" \ + "pxor %%xmm5, %%xmm5\n" \ + : \ + : "m" (*win64tmp) \ + : "memory"); \ + } while (0) +#else +# define aesni_prepare_2_6_variable +# define aesni_prepare() do { } while (0) +# define aesni_prepare_2_6() do { } while (0) +# define aesni_cleanup() \ + do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ + "pxor %%xmm1, %%xmm1\n" :: ); \ + } while (0) +# define aesni_cleanup_2_6() \ + do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ + "pxor %%xmm3, %%xmm3\n" \ + "pxor %%xmm4, %%xmm4\n" \ + "pxor %%xmm5, %%xmm5\n" \ + "pxor %%xmm6, %%xmm6\n":: ); \ + } while (0) +#endif void _gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key) { + aesni_prepare_2_6_variable; + aesni_prepare(); + aesni_prepare_2_6(); if (ctx->rounds < 12) { @@ -999,7 +1029,10 @@ _gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks, int cbc_mac) { + aesni_prepare_2_6_variable; + aesni_prepare (); + aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm5\n\t" : /* No output */ @@ -1044,8 +1077,10 @@ _gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *outbuf, { static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + aesni_prepare_2_6_variable; aesni_prepare (); + aesni_prepare_2_6(); asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */ "movdqa %[ctr], %%xmm5\n\t" /* Preload CTR */ @@ -1095,7 +1130,10 @@ _gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks) { + aesni_prepare_2_6_variable; + aesni_prepare (); + aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm6\n\t" : /* No output */ @@ -1177,7 +1215,10 @@ _gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, const unsigned char *inbuf, unsigned char *iv, size_t nblocks) { + aesni_prepare_2_6_variable; + aesni_prepare (); + aesni_prepare_2_6(); asm volatile ("movdqu %[iv], %%xmm5\n\t" /* use xmm5 as fast IV storage */ @@ -1331,8 +1372,10 @@ aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg, unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; + aesni_prepare_2_6_variable; aesni_prepare (); + aesni_prepare_2_6 (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" @@ -1473,8 +1516,10 @@ aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg, unsigned char *outbuf = outbuf_arg; const unsigned char *inbuf = inbuf_arg; u64 n = c->u_mode.ocb.data_nblocks; + aesni_prepare_2_6_variable; aesni_prepare (); + aesni_prepare_2_6 (); /* Preload Offset and Checksum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" @@ -1625,8 +1670,10 @@ _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, RIJNDAEL_context *ctx = (void *)&c->context.c; const unsigned char *abuf = abuf_arg; u64 n = c->u_mode.ocb.aad_nblocks; + aesni_prepare_2_6_variable; aesni_prepare (); + aesni_prepare_2_6 (); /* Preload Offset and Sum */ asm volatile ("movdqu %[iv], %%xmm5\n\t" diff --git a/cipher/rijndael-internal.h b/cipher/rijndael-internal.h index bd247a95..33ca53f6 100644 --- a/cipher/rijndael-internal.h +++ b/cipher/rijndael-internal.h @@ -44,8 +44,9 @@ #endif /* USE_SSSE3 indicates whether to use SSSE3 code. */ -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_SSSE3) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif @@ -75,9 +76,7 @@ #ifdef ENABLE_AESNI_SUPPORT # if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) # if __GNUC__ >= 4 -# ifndef __WIN64__ -# define USE_AESNI 1 -# endif +# define USE_AESNI 1 # endif # endif #endif /* ENABLE_AESNI_SUPPORT */ diff --git a/cipher/rijndael-ssse3-amd64.c b/cipher/rijndael-ssse3-amd64.c index 3f1b352c..21438dc9 100644 --- a/cipher/rijndael-ssse3-amd64.c +++ b/cipher/rijndael-ssse3-amd64.c @@ -61,7 +61,60 @@ the use of these macros. There purpose is to make sure that the SSE registers are cleared and won't reveal any information about the key or the data. */ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +/* XMM6-XMM15 are callee-saved registers on WIN64. */ +# define vpaes_ssse3_prepare() \ + char win64tmp[16 * 10]; \ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" \ + "movdqu %%xmm7, 1*16(%0)\n\t" \ + "movdqu %%xmm8, 2*16(%0)\n\t" \ + "movdqu %%xmm9, 3*16(%0)\n\t" \ + "movdqu %%xmm10, 4*16(%0)\n\t" \ + "movdqu %%xmm11, 5*16(%0)\n\t" \ + "movdqu %%xmm12, 6*16(%0)\n\t" \ + "movdqu %%xmm13, 7*16(%0)\n\t" \ + "movdqu %%xmm14, 8*16(%0)\n\t" \ + "movdqu %%xmm15, 9*16(%0)\n\t" \ + : \ + : "r" (win64tmp) \ + : "memory" ) +# define vpaes_ssse3_cleanup() \ + asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ + "pxor %%xmm1, %%xmm1 \n\t" \ + "pxor %%xmm2, %%xmm2 \n\t" \ + "pxor %%xmm3, %%xmm3 \n\t" \ + "pxor %%xmm4, %%xmm4 \n\t" \ + "pxor %%xmm5, %%xmm5 \n\t" \ + "movdqu 0*16(%0), %%xmm6 \n\t" \ + "movdqu 1*16(%0), %%xmm7 \n\t" \ + "movdqu 2*16(%0), %%xmm8 \n\t" \ + "movdqu 3*16(%0), %%xmm9 \n\t" \ + "movdqu 4*16(%0), %%xmm10 \n\t" \ + "movdqu 5*16(%0), %%xmm11 \n\t" \ + "movdqu 6*16(%0), %%xmm12 \n\t" \ + "movdqu 7*16(%0), %%xmm13 \n\t" \ + "movdqu 8*16(%0), %%xmm14 \n\t" \ + "movdqu 9*16(%0), %%xmm15 \n\t" \ + : \ + : "r" (win64tmp) \ + : "memory" ) +#else +# define vpaes_ssse3_prepare() /*_*/ +# define vpaes_ssse3_cleanup() \ + asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ + "pxor %%xmm1, %%xmm1 \n\t" \ + "pxor %%xmm2, %%xmm2 \n\t" \ + "pxor %%xmm3, %%xmm3 \n\t" \ + "pxor %%xmm4, %%xmm4 \n\t" \ + "pxor %%xmm5, %%xmm5 \n\t" \ + "pxor %%xmm6, %%xmm6 \n\t" \ + "pxor %%xmm7, %%xmm7 \n\t" \ + "pxor %%xmm8, %%xmm8 \n\t" \ + ::: "memory" ) +#endif + #define vpaes_ssse3_prepare_enc(const_ptr) \ + vpaes_ssse3_prepare(); \ asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ "movdqa (%q0), %%xmm9 # 0F \n\t" \ "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ @@ -75,6 +128,7 @@ : "memory" ) #define vpaes_ssse3_prepare_dec(const_ptr) \ + vpaes_ssse3_prepare(); \ asm volatile ("lea .Laes_consts(%%rip), %q0 \n\t" \ "movdqa (%q0), %%xmm9 # 0F \n\t" \ "movdqa .Lk_inv (%q0), %%xmm10 # inv \n\t" \ @@ -88,17 +142,6 @@ : \ : "memory" ) -#define vpaes_ssse3_cleanup() \ - asm volatile ("pxor %%xmm0, %%xmm0 \n\t" \ - "pxor %%xmm1, %%xmm1 \n\t" \ - "pxor %%xmm2, %%xmm2 \n\t" \ - "pxor %%xmm3, %%xmm3 \n\t" \ - "pxor %%xmm4, %%xmm4 \n\t" \ - "pxor %%xmm5, %%xmm5 \n\t" \ - "pxor %%xmm6, %%xmm6 \n\t" \ - "pxor %%xmm7, %%xmm7 \n\t" \ - "pxor %%xmm8, %%xmm8 \n\t" \ - ::: "memory" ) void @@ -106,6 +149,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + vpaes_ssse3_prepare(); + asm volatile ("leaq %q[key], %%rdi" "\n\t" "movl %[bits], %%esi" "\n\t" "leaq %[buf], %%rdx" "\n\t" @@ -121,6 +166,8 @@ _gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key) : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", "cc", "memory"); + vpaes_ssse3_cleanup(); + /* Save key for setting up decryption. */ memcpy(&ctx->keyschdec32[0][0], key, keybits / 8); } @@ -132,6 +179,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) { unsigned int keybits = (ctx->rounds - 10) * 32 + 128; + vpaes_ssse3_prepare(); + asm volatile ("leaq %q[key], %%rdi" "\n\t" "movl %[bits], %%esi" "\n\t" "leaq %[buf], %%rdx" "\n\t" @@ -146,6 +195,8 @@ _gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx) [rotoffs] "g" ((keybits == 192) ? 0 : 32) : "r8", "r9", "r10", "r11", "rax", "rcx", "rdx", "rdi", "rsi", "cc", "memory"); + + vpaes_ssse3_cleanup(); } @@ -465,6 +516,11 @@ _gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *outbuf, } +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define X(...) +#else +# define X(...) __VA_ARGS__ +#endif asm ( "\n\t" "##" @@ -494,7 +550,7 @@ asm ( "\n\t" "##" "\n\t" "##" "\n\t" ".align 16" - "\n\t" ".type _aes_encrypt_core,@function" +X("\n\t" ".type _aes_encrypt_core,@function") "\n\t" "_aes_encrypt_core:" "\n\t" " leaq .Lk_mc_backward(%rcx), %rdi" "\n\t" " mov $16, %rsi" @@ -570,7 +626,7 @@ asm ( "\n\t" " pxor %xmm4, %xmm0 # 0 = A" "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" "\n\t" " ret" - "\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core" +X("\n\t" ".size _aes_encrypt_core,.-_aes_encrypt_core") "\n\t" "##" "\n\t" "## Decryption core" @@ -578,7 +634,7 @@ asm ( "\n\t" "## Same API as encryption core." "\n\t" "##" "\n\t" ".align 16" - "\n\t" ".type _aes_decrypt_core,@function" +X("\n\t" ".type _aes_decrypt_core,@function") "\n\t" "_aes_decrypt_core:" "\n\t" " movl %eax, %esi" "\n\t" " shll $4, %esi" @@ -670,7 +726,7 @@ asm ( "\n\t" " pxor %xmm4, %xmm0 # 0 = A" "\n\t" " pshufb .Lk_sr(%rsi,%rcx), %xmm0" "\n\t" " ret" - "\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core" +X("\n\t" ".size _aes_decrypt_core,.-_aes_decrypt_core") "\n\t" "########################################################" "\n\t" "## ##" @@ -679,7 +735,7 @@ asm ( "\n\t" "########################################################" "\n\t" ".align 16" - "\n\t" ".type _aes_schedule_core,@function" +X("\n\t" ".type _aes_schedule_core,@function") "\n\t" "_aes_schedule_core:" "\n\t" " # rdi = key" "\n\t" " # rsi = size in bits" @@ -1039,7 +1095,7 @@ asm ( "\n\t" " pxor %xmm7, %xmm7" "\n\t" " pxor %xmm8, %xmm8" "\n\t" " ret" - "\n\t" ".size _aes_schedule_core,.-_aes_schedule_core" +X("\n\t" ".size _aes_schedule_core,.-_aes_schedule_core") "\n\t" "########################################################" "\n\t" "## ##" @@ -1048,7 +1104,7 @@ asm ( "\n\t" "########################################################" "\n\t" ".align 16" - "\n\t" ".type _aes_consts,@object" +X("\n\t" ".type _aes_consts,@object") "\n\t" ".Laes_consts:" "\n\t" "_aes_consts:" "\n\t" " # s0F" @@ -1226,7 +1282,7 @@ asm ( "\n\t" " .quad 0xC7AA6DB9D4943E2D" "\n\t" " .quad 0x12D7560F93441D00" "\n\t" " .quad 0xCA4B8159D8C58E9C" - "\n\t" ".size _aes_consts,.-_aes_consts" +X("\n\t" ".size _aes_consts,.-_aes_consts") ); #endif /* USE_SSSE3 */ diff --git a/configure.ac b/configure.ac index 594209ff..0f16175b 100644 --- a/configure.ac +++ b/configure.ac @@ -1127,6 +1127,93 @@ fi #### #### ############################################# + +# Following tests depend on warnings to cause compile to fail, so set -Werror +# temporarily. +_gcc_cflags_save=$CFLAGS +CFLAGS="$CFLAGS -Werror" + + +# +# Check whether compiler supports 'ms_abi' function attribute. +# +AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute], + [gcry_cv_gcc_attribute_ms_abi], + [gcry_cv_gcc_attribute_ms_abi=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[int __attribute__ ((ms_abi)) proto(int);]])], + [gcry_cv_gcc_attribute_ms_abi=yes])]) +if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then + AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1, + [Defined if compiler supports "__attribute__ ((ms_abi))" function attribute]) +fi + + +# +# Check whether compiler supports 'sysv_abi' function attribute. +# +AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute], + [gcry_cv_gcc_attribute_sysv_abi], + [gcry_cv_gcc_attribute_sysv_abi=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[int __attribute__ ((sysv_abi)) proto(int);]])], + [gcry_cv_gcc_attribute_sysv_abi=yes])]) +if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then + AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1, + [Defined if compiler supports "__attribute__ ((sysv_abi))" function attribute]) +fi + + +# +# Check whether default calling convention is 'ms_abi'. +# +if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then + AC_CACHE_CHECK([whether default calling convention is 'ms_abi'], + [gcry_cv_gcc_default_abi_is_ms_abi], + [gcry_cv_gcc_default_abi_is_ms_abi=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[void *test(void) { + void *(*def_func)(void) = test; + void *__attribute__((ms_abi))(*msabi_func)(void); + /* warning on SysV abi targets, passes on Windows based targets */ + msabi_func = def_func; + return msabi_func; + }]])], + [gcry_cv_gcc_default_abi_is_ms_abi=yes])]) + if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then + AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1, + [Defined if default calling convention is 'ms_abi']) + fi +fi + + +# +# Check whether default calling convention is 'sysv_abi'. +# +if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then + AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'], + [gcry_cv_gcc_default_abi_is_sysv_abi], + [gcry_cv_gcc_default_abi_is_sysv_abi=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[void *test(void) { + void *(*def_func)(void) = test; + void *__attribute__((sysv_abi))(*sysvabi_func)(void); + /* warning on MS ABI targets, passes on SysV ABI targets */ + sysvabi_func = def_func; + return sysvabi_func; + }]])], + [gcry_cv_gcc_default_abi_is_sysv_abi=yes])]) + if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then + AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1, + [Defined if default calling convention is 'sysv_abi']) + fi +fi + + +# Restore flags. +CFLAGS=$_gcc_cflags_save; + + # # Check whether GCC inline assembler supports SSSE3 instructions # This is required for the AES-NI instructions. @@ -1281,9 +1368,6 @@ if test $amd64_as_feature_detection = yes; then [[__asm__( /* Test if '.type' and '.size' are supported. */ /* These work only on ELF targets. */ - /* TODO: add COFF (mingw64, cygwin64) support to assembly - * implementations. Mingw64/cygwin64 also require additional - * work because they use different calling convention. */ "asmfunc:\n\t" ".size asmfunc,.-asmfunc;\n\t" ".type asmfunc,@function;\n\t" @@ -1299,6 +1383,24 @@ if test $amd64_as_feature_detection = yes; then AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1, [Defined if underlying assembler is compatible with amd64 assembly implementations]) fi + if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" && + test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" && + test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then + AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly implementations], + [gcry_cv_gcc_win64_platform_as_ok], + [gcry_cv_gcc_win64_platform_as_ok=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[__asm__( + ".globl asmfunc\n\t" + "asmfunc:\n\t" + "xorq \$(1234), %rbp;\n\t" + );]])], + [gcry_cv_gcc_win64_platform_as_ok=yes])]) + if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then + AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1, + [Defined if underlying assembler is compatible with WIN64 assembly implementations]) + fi + fi fi -- cgit v1.2.1