diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-05-02 13:05:02 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2015-05-02 14:39:03 +0300 |
commit | 022959099644f64df5f2a83ade21159864f64837 (patch) | |
tree | ac25166a0d67a63f0a53fdced194c5b77fae644f | |
parent | e433676a899fa0d274d40547166b03c7c8bd8e78 (diff) | |
download | libgcrypt-022959099644f64df5f2a83ade21159864f64837.tar.gz |
Enable AMD64 SHA256 implementations for WIN64
* cipher/sha256-avx-amd64.S: Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
(ELF): New macro to mask lines with ELF specific commands.
* cipher/sha256-avx2-bmi2-amd64.S: Ditto.
* cipher/sha256-ssse3-amd64.S: Ditto.
* cipher/sha256.c (USE_SSSE3, USE_AVX, USE_AVX2): Enable when
HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined.
[USE_SSSE3 || USE_AVX || USE_AVX2] (ASM_FUNC_ABI)
(ASM_EXTRA_STACK): New.
(_gcry_sha256_transform_amd64_ssse3, _gcry_sha256_transform_amd64_avx)
(_gcry_sha256_transform_amd64_avx2): Add ASM_FUNC_ABI to prototypes.
(transform): Add ASM_EXTRA_STACK to stack burn value.
--
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | cipher/sha256-avx-amd64.S | 11 | ||||
-rw-r--r-- | cipher/sha256-avx2-bmi2-amd64.S | 11 | ||||
-rw-r--r-- | cipher/sha256-ssse3-amd64.S | 11 | ||||
-rw-r--r-- | cipher/sha256.c | 60 |
4 files changed, 72 insertions, 21 deletions
diff --git a/cipher/sha256-avx-amd64.S b/cipher/sha256-avx-amd64.S index 3912db7d..8bf26bd7 100644 --- a/cipher/sha256-avx-amd64.S +++ b/cipher/sha256-avx-amd64.S @@ -54,7 +54,8 @@ #ifdef __x86_64 #include <config.h> -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256) @@ -64,6 +65,12 @@ # define ADD_RIP #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + .intel_syntax noprefix #define VMOVDQ vmovdqu /* assume buffers not aligned */ @@ -370,7 +377,7 @@ rotate_Xs */ .text .globl _gcry_sha256_transform_amd64_avx -.type _gcry_sha256_transform_amd64_avx,@function; +ELF(.type _gcry_sha256_transform_amd64_avx,@function;) .align 16 _gcry_sha256_transform_amd64_avx: vzeroupper diff --git a/cipher/sha256-avx2-bmi2-amd64.S b/cipher/sha256-avx2-bmi2-amd64.S index 09df711f..74b60631 100644 --- a/cipher/sha256-avx2-bmi2-amd64.S +++ b/cipher/sha256-avx2-bmi2-amd64.S @@ -54,7 +54,8 @@ #ifdef __x86_64 #include <config.h> -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(USE_SHA256) @@ -65,6 +66,12 @@ # define ADD_RIP #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + .intel_syntax noprefix #define VMOVDQ vmovdqu /* ; assume buffers not aligned */ @@ -555,7 +562,7 @@ rotate_Xs */ .text .globl _gcry_sha256_transform_amd64_avx2 -.type _gcry_sha256_transform_amd64_avx2,@function +ELF(.type _gcry_sha256_transform_amd64_avx2,@function) .align 32 _gcry_sha256_transform_amd64_avx2: push rbx diff --git a/cipher/sha256-ssse3-amd64.S b/cipher/sha256-ssse3-amd64.S index 80b1cec4..9ec87e46 100644 --- a/cipher/sha256-ssse3-amd64.S +++ b/cipher/sha256-ssse3-amd64.S @@ -55,7 +55,8 @@ #ifdef __x86_64 #include <config.h> -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256) @@ -65,6 +66,12 @@ # define ADD_RIP #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + .intel_syntax noprefix #define MOVDQ movdqu /* assume buffers not aligned */ @@ -376,7 +383,7 @@ rotate_Xs */ .text .globl _gcry_sha256_transform_amd64_ssse3 -.type _gcry_sha256_transform_amd64_ssse3,@function; +ELF(.type _gcry_sha256_transform_amd64_ssse3,@function;) .align 16 _gcry_sha256_transform_amd64_ssse3: push rbx diff --git a/cipher/sha256.c b/cipher/sha256.c index d3af1722..59ffa434 100644 --- a/cipher/sha256.c +++ b/cipher/sha256.c @@ -49,25 +49,29 @@ /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */ #undef USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif @@ -322,19 +326,37 @@ transform_blk (void *ctx, const unsigned char *data) #undef R +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + #ifdef USE_SSSE3 unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data, - u32 state[8], size_t num_blks); + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data, - u32 state[8], size_t num_blks); + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data, - u32 state[8], size_t num_blks); + u32 state[8], + size_t num_blks) ASM_FUNC_ABI; #endif @@ -347,19 +369,19 @@ transform (void *ctx, const unsigned char *data, size_t nblks) #ifdef USE_AVX2 if (hd->use_avx2) return _gcry_sha256_transform_amd64_avx2 (data, &hd->h0, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_AVX if (hd->use_avx) return _gcry_sha256_transform_amd64_avx (data, &hd->h0, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_SSSE3 if (hd->use_ssse3) return _gcry_sha256_transform_amd64_ssse3 (data, &hd->h0, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif do @@ -369,6 +391,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks) } while (--nblks); +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif + return burn; } |