summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-05-02 12:57:07 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-05-02 12:57:07 +0300
commite433676a899fa0d274d40547166b03c7c8bd8e78 (patch)
tree4c952ee4a2bbd8bd7b4aee698ecf408fdc4a6c36
parent4e09aaa36d151c3312019724a77fc09aa345b82f (diff)
downloadlibgcrypt-e433676a899fa0d274d40547166b03c7c8bd8e78.tar.gz
Enable AMD64 SHA1 implementations for WIN64
* cipher/sha1-avx-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/sha1-avx-bmi2-amd64.S: Ditto. * cipher/sha1-ssse3-amd64.S: Ditto. * cipher/sha1.c (USE_SSSE3, USE_AVX, USE_BMI2): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [USE_SSSE3 || USE_AVX || USE_BMI2] (ASM_FUNC_ABI) (ASM_EXTRA_STACK): New. (_gcry_sha1_transform_amd64_ssse3, _gcry_sha1_transform_amd64_avx) (_gcry_sha1_transform_amd64_avx_bmi2): Add ASM_FUNC_ABI to prototypes. (transform): Add ASM_EXTRA_STACK to stack burn value. -- Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--cipher/sha1-avx-amd64.S12
-rw-r--r--cipher/sha1-avx-bmi2-amd64.S12
-rw-r--r--cipher/sha1-ssse3-amd64.S12
-rw-r--r--cipher/sha1.c51
4 files changed, 69 insertions, 18 deletions
diff --git a/cipher/sha1-avx-amd64.S b/cipher/sha1-avx-amd64.S
index 6bec3895..062a45b1 100644
--- a/cipher/sha1-avx-amd64.S
+++ b/cipher/sha1-avx-amd64.S
@@ -29,7 +29,8 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
@@ -40,6 +41,13 @@
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
/* Context structure */
#define state_h0 0
@@ -209,7 +217,7 @@
*/
.text
.globl _gcry_sha1_transform_amd64_avx
-.type _gcry_sha1_transform_amd64_avx,@function
+ELF(.type _gcry_sha1_transform_amd64_avx,@function)
.align 16
_gcry_sha1_transform_amd64_avx:
/* input:
diff --git a/cipher/sha1-avx-bmi2-amd64.S b/cipher/sha1-avx-bmi2-amd64.S
index cd5af5bb..22bcbb3c 100644
--- a/cipher/sha1-avx-bmi2-amd64.S
+++ b/cipher/sha1-avx-bmi2-amd64.S
@@ -29,7 +29,8 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_BMI2) && \
defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
@@ -40,6 +41,13 @@
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
/* Context structure */
#define state_h0 0
@@ -206,7 +214,7 @@
*/
.text
.globl _gcry_sha1_transform_amd64_avx_bmi2
-.type _gcry_sha1_transform_amd64_avx_bmi2,@function
+ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function)
.align 16
_gcry_sha1_transform_amd64_avx_bmi2:
/* input:
diff --git a/cipher/sha1-ssse3-amd64.S b/cipher/sha1-ssse3-amd64.S
index 226988da..98a19e60 100644
--- a/cipher/sha1-ssse3-amd64.S
+++ b/cipher/sha1-ssse3-amd64.S
@@ -29,7 +29,8 @@
#ifdef __x86_64__
#include <config.h>
-#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
#ifdef __PIC__
@@ -39,6 +40,13 @@
#endif
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
/* Context structure */
#define state_h0 0
@@ -220,7 +228,7 @@
*/
.text
.globl _gcry_sha1_transform_amd64_ssse3
-.type _gcry_sha1_transform_amd64_ssse3,@function
+ELF(.type _gcry_sha1_transform_amd64_ssse3,@function)
.align 16
_gcry_sha1_transform_amd64_ssse3:
/* input:
diff --git a/cipher/sha1.c b/cipher/sha1.c
index 6ccf0e8e..eb428835 100644
--- a/cipher/sha1.c
+++ b/cipher/sha1.c
@@ -45,22 +45,26 @@
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
- defined(HAVE_GCC_INLINE_ASM_SSSE3)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_SSSE3 1
#endif
/* USE_AVX indicates whether to compile with Intel AVX code. */
#undef USE_AVX
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
- defined(HAVE_GCC_INLINE_ASM_AVX)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_AVX 1
#endif
/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
#undef USE_BMI2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
- defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2)
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+ defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+ (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define USE_BMI2 1
#endif
@@ -287,22 +291,37 @@ transform_blk (void *ctx, const unsigned char *data)
}
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
#ifdef USE_SSSE3
unsigned int
_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
- size_t nblks);
+ size_t nblks) ASM_FUNC_ABI;
#endif
#ifdef USE_AVX
unsigned int
_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
- size_t nblks);
+ size_t nblks) ASM_FUNC_ABI;
#endif
#ifdef USE_BMI2
unsigned int
_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
- size_t nblks);
+ size_t nblks) ASM_FUNC_ABI;
#endif
@@ -315,17 +334,17 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
#ifdef USE_BMI2
if (hd->use_bmi2)
return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
- + 4 * sizeof(void*);
+ + 4 * sizeof(void*) + ASM_EXTRA_STACK;
#endif
#ifdef USE_AVX
if (hd->use_avx)
return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
- + 4 * sizeof(void*);
+ + 4 * sizeof(void*) + ASM_EXTRA_STACK;
#endif
#ifdef USE_SSSE3
if (hd->use_ssse3)
return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
- + 4 * sizeof(void*);
+ + 4 * sizeof(void*) + ASM_EXTRA_STACK;
#endif
#ifdef USE_NEON
if (hd->use_neon)
@@ -340,6 +359,14 @@ transform (void *ctx, const unsigned char *data, size_t nblks)
}
while (--nblks);
+#ifdef ASM_EXTRA_STACK
+ /* 'transform_blk' is typically inlined and XMM6-XMM15 are stored at
+ * the prologue of this function. Therefore need to add ASM_EXTRA_STACK to
+ * here too.
+ */
+ burn += ASM_EXTRA_STACK;
+#endif
+
return burn;
}