summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2015-05-14 12:39:39 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2015-05-14 14:10:28 +0300
commit8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f (patch)
treee6edfaad820279019bb0e2b705cbaee9086802c4
parentb65e9e71d5ee992db5c96793c6af999545daad28 (diff)
downloadlibgcrypt-8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f.tar.gz
Enable AMD64 Poly1305 implementations on WIN64
* cipher/poly1305-avx2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/poly1305-sse2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New. (POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (OPS_FUNC_ABI): New. (poly1305_ops_t): Use OPS_FUNC_ABI. * cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext) (_gcry_poly1305_amd64_sse2_finish_ext) (_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext) (_gcry_poly1305_amd64_avx2_finish_ext) (_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext) (_gcry_poly1305_armv7_neon_finish_ext) (_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32) (poly1305_blocks_ref32, poly1305_finish_ext_ref32) (poly1305_init_ext_ref8, poly1305_blocks_ref8) (poly1305_finish_ext_ref8): Use OPS_FUNC_ABI. -- Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--cipher/poly1305-avx2-amd64.S22
-rw-r--r--cipher/poly1305-internal.h27
-rw-r--r--cipher/poly1305-sse2-amd64.S22
-rw-r--r--cipher/poly1305.c33
4 files changed, 70 insertions, 34 deletions
diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S
index 0ba7e761..9362a5ae 100644
--- a/cipher/poly1305-avx2-amd64.S
+++ b/cipher/poly1305-avx2-amd64.S
@@ -25,15 +25,23 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+
.text
.align 8
.globl _gcry_poly1305_amd64_avx2_init_ext
-.type _gcry_poly1305_amd64_avx2_init_ext,@function;
+ELF(.type _gcry_poly1305_amd64_avx2_init_ext,@function;)
_gcry_poly1305_amd64_avx2_init_ext:
.Lpoly1305_init_ext_avx2_local:
xor %edx, %edx
@@ -391,12 +399,12 @@ _gcry_poly1305_amd64_avx2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_avx2_blocks
-.type _gcry_poly1305_amd64_avx2_blocks,@function;
+ELF(.type _gcry_poly1305_amd64_avx2_blocks,@function;)
_gcry_poly1305_amd64_avx2_blocks:
.Lpoly1305_blocks_avx2_local:
vzeroupper
@@ -717,12 +725,12 @@ _gcry_poly1305_amd64_avx2_blocks:
leave
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;
+ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;)
.align 8
.globl _gcry_poly1305_amd64_avx2_finish_ext
-.type _gcry_poly1305_amd64_avx2_finish_ext,@function;
+ELF(.type _gcry_poly1305_amd64_avx2_finish_ext,@function;)
_gcry_poly1305_amd64_avx2_finish_ext:
.Lpoly1305_finish_ext_avx2_local:
vzeroupper
@@ -949,6 +957,6 @@ _gcry_poly1305_amd64_avx2_finish_ext:
popq %rbp
addq $(8*5), %rax
ret
-.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;)
#endif
diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h
index dfc0c048..bcbe5df7 100644
--- a/cipher/poly1305-internal.h
+++ b/cipher/poly1305-internal.h
@@ -44,24 +44,30 @@
#define POLY1305_REF_ALIGNMENT sizeof(void *)
+#undef POLY1305_SYSV_FUNC_ABI
+
/* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
#undef POLY1305_USE_SSE2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
# define POLY1305_USE_SSE2 1
# define POLY1305_SSE2_BLOCKSIZE 32
# define POLY1305_SSE2_STATESIZE 248
# define POLY1305_SSE2_ALIGNMENT 16
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
/* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
#undef POLY1305_USE_AVX2
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
defined(ENABLE_AVX2_SUPPORT)
# define POLY1305_USE_AVX2 1
# define POLY1305_AVX2_BLOCKSIZE 64
# define POLY1305_AVX2_STATESIZE 328
# define POLY1305_AVX2_ALIGNMENT 32
+# define POLY1305_SYSV_FUNC_ABI 1
#endif
@@ -112,6 +118,17 @@
#endif
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef OPS_FUNC_ABI
+#if defined(POLY1305_SYSV_FUNC_ABI) && \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define OPS_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define OPS_FUNC_ABI
+#endif
+
+
typedef struct poly1305_key_s
{
byte b[POLY1305_KEYLEN];
@@ -121,10 +138,10 @@ typedef struct poly1305_key_s
typedef struct poly1305_ops_s
{
size_t block_size;
- void (*init_ext) (void *ctx, const poly1305_key_t * key);
- unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes);
+ void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI;
+ unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI;
unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining,
- byte mac[POLY1305_TAGLEN]);
+ byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI;
} poly1305_ops_t;
diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S
index 106b1197..219eb077 100644
--- a/cipher/poly1305-sse2-amd64.S
+++ b/cipher/poly1305-sse2-amd64.S
@@ -25,14 +25,22 @@
#include <config.h>
-#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
.text
.align 8
.globl _gcry_poly1305_amd64_sse2_init_ext
-.type _gcry_poly1305_amd64_sse2_init_ext,@function;
+ELF(.type _gcry_poly1305_amd64_sse2_init_ext,@function;)
_gcry_poly1305_amd64_sse2_init_ext:
.Lpoly1305_init_ext_x86_local:
xor %edx, %edx
@@ -273,12 +281,12 @@ _gcry_poly1305_amd64_sse2_init_ext:
popq %r13
popq %r12
ret
-.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_finish_ext
-.type _gcry_poly1305_amd64_sse2_finish_ext,@function;
+ELF(.type _gcry_poly1305_amd64_sse2_finish_ext,@function;)
_gcry_poly1305_amd64_sse2_finish_ext:
.Lpoly1305_finish_ext_x86_local:
pushq %rbp
@@ -424,12 +432,12 @@ _gcry_poly1305_amd64_sse2_finish_ext:
popq %rbp
addq $8, %rax
ret
-.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;
+ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;)
.align 8
.globl _gcry_poly1305_amd64_sse2_blocks
-.type _gcry_poly1305_amd64_sse2_blocks,@function;
+ELF(.type _gcry_poly1305_amd64_sse2_blocks,@function;)
_gcry_poly1305_amd64_sse2_blocks:
.Lpoly1305_blocks_x86_local:
pushq %rbp
@@ -1030,6 +1038,6 @@ _gcry_poly1305_amd64_sse2_blocks:
pxor %xmm8, %xmm8
pxor %xmm0, %xmm0
ret
-.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;
+ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;)
#endif
diff --git a/cipher/poly1305.c b/cipher/poly1305.c
index 28dbbf8f..1adf0e7b 100644
--- a/cipher/poly1305.c
+++ b/cipher/poly1305.c
@@ -40,12 +40,13 @@ static const char *selftest (void);
#ifdef POLY1305_USE_SSE2
-void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_sse2_ops = {
POLY1305_SSE2_BLOCKSIZE,
@@ -59,12 +60,13 @@ static const poly1305_ops_t poly1305_amd64_sse2_ops = {
#ifdef POLY1305_USE_AVX2
-void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_amd64_avx2_ops = {
POLY1305_AVX2_BLOCKSIZE,
@@ -78,12 +80,13 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = {
#ifdef POLY1305_USE_NEON
-void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key);
+void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key)
+ OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m,
size_t remaining,
- byte mac[16]);
+ byte mac[16]) OPS_FUNC_ABI;
unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m,
- size_t bytes);
+ size_t bytes) OPS_FUNC_ABI;
static const poly1305_ops_t poly1305_armv7_neon_ops = {
POLY1305_NEON_BLOCKSIZE,
@@ -110,7 +113,7 @@ typedef struct poly1305_state_ref32_s
} poly1305_state_ref32_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -142,7 +145,7 @@ poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state;
@@ -230,7 +233,7 @@ poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref32 (void *state, const byte * m,
size_t remaining, byte mac[POLY1305_TAGLEN])
{
@@ -370,7 +373,7 @@ typedef struct poly1305_state_ref8_t
} poly1305_state_ref8_t;
-static void
+static OPS_FUNC_ABI void
poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -471,7 +474,7 @@ poly1305_freeze_ref8 (byte h[17])
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
{
poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state;
@@ -519,7 +522,7 @@ poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes)
}
-static unsigned int
+static OPS_FUNC_ABI unsigned int
poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining,
byte mac[POLY1305_TAGLEN])
{