diff options
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r-- | cipher/sha512.c | 60 |
1 files changed, 45 insertions, 15 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c index 5a6af808..029f8f02 100644 --- a/cipher/sha512.c +++ b/cipher/sha512.c @@ -68,27 +68,31 @@ /* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ #undef USE_SSSE3 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSSE3 1 #endif /* USE_AVX indicates whether to compile with Intel AVX code. */ #undef USE_AVX -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX 1 #endif /* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */ #undef USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ - defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ - defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AVX2 1 #endif @@ -543,6 +547,21 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data) } +/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + + #ifdef USE_ARM_NEON_ASM void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, const unsigned char *data, @@ -551,17 +570,20 @@ void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd, #ifdef USE_SSSE3 unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data, - void *state, size_t num_blks); + void *state, + size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data, - void *state, size_t num_blks); + void *state, + size_t num_blks) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data, - void *state, size_t num_blks); + void *state, + size_t num_blks) ASM_FUNC_ABI; #endif @@ -574,19 +596,19 @@ transform (void *context, const unsigned char *data, size_t nblks) #ifdef USE_AVX2 if (ctx->use_avx2) return _gcry_sha512_transform_amd64_avx2 (data, &ctx->state, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_AVX if (ctx->use_avx) return _gcry_sha512_transform_amd64_avx (data, &ctx->state, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_SSSE3 if (ctx->use_ssse3) return _gcry_sha512_transform_amd64_ssse3 (data, &ctx->state, nblks) - + 4 * sizeof(void*); + + 4 * sizeof(void*) + ASM_EXTRA_STACK; #endif #ifdef USE_ARM_NEON_ASM @@ -607,6 +629,14 @@ transform (void *context, const unsigned char *data, size_t nblks) } while (--nblks); +#ifdef ASM_EXTRA_STACK + /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at + * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to + * here too. + */ + burn += ASM_EXTRA_STACK; +#endif + return burn; } |