From e433676a899fa0d274d40547166b03c7c8bd8e78 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 2 May 2015 12:57:07 +0300 Subject: Enable AMD64 SHA1 implementations for WIN64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * cipher/sha1-avx-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/sha1-avx-bmi2-amd64.S: Ditto. * cipher/sha1-ssse3-amd64.S: Ditto. * cipher/sha1.c (USE_SSSE3, USE_AVX, USE_BMI2): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [USE_SSSE3 || USE_AVX || USE_BMI2] (ASM_FUNC_ABI) (ASM_EXTRA_STACK): New. (_gcry_sha1_transform_amd64_ssse3, _gcry_sha1_transform_amd64_avx) (_gcry_sha1_transform_amd64_avx_bmi2): Add ASM_FUNC_ABI to prototypes. (transform): Add ASM_EXTRA_STACK to stack burn value. -- Signed-off-by: Jussi Kivilinna --- cipher/sha1-avx-amd64.S | 12 +++++++++-- cipher/sha1-avx-bmi2-amd64.S | 12 +++++++++-- cipher/sha1-ssse3-amd64.S | 12 +++++++++-- cipher/sha1.c | 51 +++++++++++++++++++++++++++++++++----------- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S index 6bec3895..062a45b1 100644 --- a/cipher/sha1-avx-amd64.S +++ b/cipher/sha1-avx-amd64.S @@ -29,7 +29,8 @@ #ifdef __x86_64__ #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1) @@ -40,6 +41,13 @@ #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + /* Context structure */ #define state_h0 0 @@ -209,7 +217,7 @@ */ .text .globl _gcry_sha1_transform_amd64_avx -.type _gcry_sha1_transform_amd64_avx,@function +ELF(.type _gcry_sha1_transform_amd64_avx,@function) .align 16 _gcry_sha1_transform_amd64_avx: /* input: diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S index cd5af5bb..22bcbb3c 100644 --- a/cipher/sha1-avx-bmi2-amd64.S +++ b/cipher/sha1-avx-bmi2-amd64.S @@ -29,7 +29,8 @@ #ifdef __x86_64__ #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_GCC_INLINE_ASM_BMI2) && \ defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1) @@ -40,6 +41,13 @@ #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + /* Context structure */ #define state_h0 0 @@ -206,7 +214,7 @@ */ .text .globl _gcry_sha1_transform_amd64_avx_bmi2 -.type _gcry_sha1_transform_amd64_avx_bmi2,@function +ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function) .align 16 _gcry_sha1_transform_amd64_avx_bmi2: /* input: diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S index 226988da..98a19e60 100644 --- a/cipher/sha1-ssse3-amd64.S +++ b/cipher/sha1-ssse3-amd64.S @@ -29,7 +29,8 @@ #ifdef __x86_64__ #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1) #ifdef __PIC__ @@ -39,6 +40,13 @@ #endif +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + /* Context structure */ #define state_h0 0 @@ -220,7 +228,7 @@ */ .text .globl _gcry_sha1_transform_amd64_ssse3 -.type _gcry_sha1_transform_amd64_ssse3,@function +ELF(.type _gcry_sha1_transform_amd64_ssse3,@function) .align 16 _gcry_sha1_transform_amd64_ssse3: /* input: diff --git a/cipher/sha1.c b/cipher/sha1.c index 6ccf0e8e..eb428835 100644 --- a/cipher/sha1.c +++ b/cipher/sha1.c @@ -45,22 +45,26 @@ /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_SSSE3) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */ #undef USE_BMI2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_BMI2 1 #endif @@ -287,22 +291,37 @@ transform_blk (void *ctx, const unsigned char *data) } +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + #ifdef USE_SSSE3 unsigned int _gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data, - size_t nblks); + size_t nblks) ASM_FUNC_ABI; #endif #ifdef USE_AVX unsigned int _gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data, - size_t nblks); + size_t nblks) ASM_FUNC_ABI; #endif #ifdef USE_BMI2 unsigned int _gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data, - size_t nblks); + size_t nblks) ASM_FUNC_ABI; #endif @@ -315,17 +334,17 @@ transform (void *ctx, const unsigned char *data, size_t nblks) #ifdef USE_BMI2 if (hd->use_bmi2) return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_AVX if (hd->use_avx) return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_SSSE3 if (hd->use_ssse3) return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_NEON if (hd->use_neon) @@ -340,6 +359,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks) } while (--nblks); +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif + return burn; } -- cgit v1.2.1