From 8d7de4dbf7732c6eb9e9853ad7c19c89075ace6f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 May 2015 12:39:39 +0300 Subject: Enable AMD64 Poly1305 implementations on WIN64 * cipher/poly1305-avx2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/poly1305-sse2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/poly1305-internal.h (POLY1305_SYSV_FUNC_ABI): New. (POLY1305_USE_SSE2, POLY1305_USE_AVX2): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (OPS_FUNC_ABI): New. (poly1305_ops_t): Use OPS_FUNC_ABI. * cipher/poly1305.c (_gcry_poly1305_amd64_sse2_init_ext) (_gcry_poly1305_amd64_sse2_finish_ext) (_gcry_poly1305_amd64_sse2_blocks, _gcry_poly1305_amd64_avx2_init_ext) (_gcry_poly1305_amd64_avx2_finish_ext) (_gcry_poly1305_amd64_avx2_blocks, _gcry_poly1305_armv7_neon_init_ext) (_gcry_poly1305_armv7_neon_finish_ext) (_gcry_poly1305_armv7_neon_blocks, poly1305_init_ext_ref32) (poly1305_blocks_ref32, poly1305_finish_ext_ref32) (poly1305_init_ext_ref8, poly1305_blocks_ref8) (poly1305_finish_ext_ref8): Use OPS_FUNC_ABI. -- Signed-off-by: Jussi Kivilinna --- cipher/poly1305-avx2-amd64.S | 22 +++++++++++++++------- cipher/poly1305-internal.h | 27 ++++++++++++++++++++++----- cipher/poly1305-sse2-amd64.S | 22 +++++++++++++++------- cipher/poly1305.c | 33 ++++++++++++++++++--------------- 4 files changed, 70 insertions(+), 34 deletions(-) diff --git a/cipher/poly1305-avx2-amd64.S b/cipher/poly1305-avx2-amd64.S index 0ba7e761..9362a5ae 100644 --- a/cipher/poly1305-avx2-amd64.S +++ b/cipher/poly1305-avx2-amd64.S @@ -25,15 +25,23 @@ #include -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AVX2_SUPPORT) +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + + .text .align 8 .globl _gcry_poly1305_amd64_avx2_init_ext -.type _gcry_poly1305_amd64_avx2_init_ext,@function; +ELF(.type _gcry_poly1305_amd64_avx2_init_ext,@function;) _gcry_poly1305_amd64_avx2_init_ext: .Lpoly1305_init_ext_avx2_local: xor %edx, %edx @@ -391,12 +399,12 @@ _gcry_poly1305_amd64_avx2_init_ext: popq %r13 popq %r12 ret -.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext; +ELF(.size _gcry_poly1305_amd64_avx2_init_ext,.-_gcry_poly1305_amd64_avx2_init_ext;) .align 8 .globl _gcry_poly1305_amd64_avx2_blocks -.type _gcry_poly1305_amd64_avx2_blocks,@function; +ELF(.type _gcry_poly1305_amd64_avx2_blocks,@function;) _gcry_poly1305_amd64_avx2_blocks: .Lpoly1305_blocks_avx2_local: vzeroupper @@ -717,12 +725,12 @@ _gcry_poly1305_amd64_avx2_blocks: leave addq $8, %rax ret -.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks; +ELF(.size _gcry_poly1305_amd64_avx2_blocks,.-_gcry_poly1305_amd64_avx2_blocks;) .align 8 .globl _gcry_poly1305_amd64_avx2_finish_ext -.type _gcry_poly1305_amd64_avx2_finish_ext,@function; +ELF(.type _gcry_poly1305_amd64_avx2_finish_ext,@function;) _gcry_poly1305_amd64_avx2_finish_ext: .Lpoly1305_finish_ext_avx2_local: vzeroupper @@ -949,6 +957,6 @@ _gcry_poly1305_amd64_avx2_finish_ext: popq %rbp addq $(8*5), %rax ret -.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext; +ELF(.size _gcry_poly1305_amd64_avx2_finish_ext,.-_gcry_poly1305_amd64_avx2_finish_ext;) #endif diff --git a/cipher/poly1305-internal.h b/cipher/poly1305-internal.h index dfc0c048..bcbe5df7 100644 --- a/cipher/poly1305-internal.h +++ b/cipher/poly1305-internal.h @@ -44,24 +44,30 @@ #define POLY1305_REF_ALIGNMENT sizeof(void *) +#undef POLY1305_SYSV_FUNC_ABI + /* POLY1305_USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ #undef POLY1305_USE_SSE2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define POLY1305_USE_SSE2 1 # define POLY1305_SSE2_BLOCKSIZE 32 # define POLY1305_SSE2_STATESIZE 248 # define POLY1305_SSE2_ALIGNMENT 16 +# define POLY1305_SYSV_FUNC_ABI 1 #endif /* POLY1305_USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ #undef POLY1305_USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ defined(ENABLE_AVX2_SUPPORT) # define POLY1305_USE_AVX2 1 # define POLY1305_AVX2_BLOCKSIZE 64 # define POLY1305_AVX2_STATESIZE 328 # define POLY1305_AVX2_ALIGNMENT 32 +# define POLY1305_SYSV_FUNC_ABI 1 #endif @@ -112,6 +118,17 @@ #endif +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef OPS_FUNC_ABI +#if defined(POLY1305_SYSV_FUNC_ABI) && \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define OPS_FUNC_ABI __attribute__((sysv_abi)) +#else +# define OPS_FUNC_ABI +#endif + + typedef struct poly1305_key_s { byte b[POLY1305_KEYLEN]; @@ -121,10 +138,10 @@ typedef struct poly1305_key_s typedef struct poly1305_ops_s { size_t block_size; - void (*init_ext) (void *ctx, const poly1305_key_t * key); - unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes); + void (*init_ext) (void *ctx, const poly1305_key_t * key) OPS_FUNC_ABI; + unsigned int (*blocks) (void *ctx, const byte * m, size_t bytes) OPS_FUNC_ABI; unsigned int (*finish_ext) (void *ctx, const byte * m, size_t remaining, - byte mac[POLY1305_TAGLEN]); + byte mac[POLY1305_TAGLEN]) OPS_FUNC_ABI; } poly1305_ops_t; diff --git a/cipher/poly1305-sse2-amd64.S b/cipher/poly1305-sse2-amd64.S index 106b1197..219eb077 100644 --- a/cipher/poly1305-sse2-amd64.S +++ b/cipher/poly1305-sse2-amd64.S @@ -25,14 +25,22 @@ #include -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + .text .align 8 .globl _gcry_poly1305_amd64_sse2_init_ext -.type _gcry_poly1305_amd64_sse2_init_ext,@function; +ELF(.type _gcry_poly1305_amd64_sse2_init_ext,@function;) _gcry_poly1305_amd64_sse2_init_ext: .Lpoly1305_init_ext_x86_local: xor %edx, %edx @@ -273,12 +281,12 @@ _gcry_poly1305_amd64_sse2_init_ext: popq %r13 popq %r12 ret -.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext; +ELF(.size _gcry_poly1305_amd64_sse2_init_ext,.-_gcry_poly1305_amd64_sse2_init_ext;) .align 8 .globl _gcry_poly1305_amd64_sse2_finish_ext -.type _gcry_poly1305_amd64_sse2_finish_ext,@function; +ELF(.type _gcry_poly1305_amd64_sse2_finish_ext,@function;) _gcry_poly1305_amd64_sse2_finish_ext: .Lpoly1305_finish_ext_x86_local: pushq %rbp @@ -424,12 +432,12 @@ _gcry_poly1305_amd64_sse2_finish_ext: popq %rbp addq $8, %rax ret -.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext; +ELF(.size _gcry_poly1305_amd64_sse2_finish_ext,.-_gcry_poly1305_amd64_sse2_finish_ext;) .align 8 .globl _gcry_poly1305_amd64_sse2_blocks -.type _gcry_poly1305_amd64_sse2_blocks,@function; +ELF(.type _gcry_poly1305_amd64_sse2_blocks,@function;) _gcry_poly1305_amd64_sse2_blocks: .Lpoly1305_blocks_x86_local: pushq %rbp @@ -1030,6 +1038,6 @@ _gcry_poly1305_amd64_sse2_blocks: pxor %xmm8, %xmm8 pxor %xmm0, %xmm0 ret -.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks; +ELF(.size _gcry_poly1305_amd64_sse2_blocks,.-_gcry_poly1305_amd64_sse2_blocks;) #endif diff --git a/cipher/poly1305.c b/cipher/poly1305.c index 28dbbf8f..1adf0e7b 100644 --- a/cipher/poly1305.c +++ b/cipher/poly1305.c @@ -40,12 +40,13 @@ static const char *selftest (void); #ifdef POLY1305_USE_SSE2 -void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key); +void _gcry_poly1305_amd64_sse2_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; unsigned int _gcry_poly1305_amd64_sse2_finish_ext(void *state, const byte *m, size_t remaining, - byte mac[16]); + byte mac[16]) OPS_FUNC_ABI; unsigned int _gcry_poly1305_amd64_sse2_blocks(void *ctx, const byte *m, - size_t bytes); + size_t bytes) OPS_FUNC_ABI; static const poly1305_ops_t poly1305_amd64_sse2_ops = { POLY1305_SSE2_BLOCKSIZE, @@ -59,12 +60,13 @@ static const poly1305_ops_t poly1305_amd64_sse2_ops = { #ifdef POLY1305_USE_AVX2 -void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key); +void _gcry_poly1305_amd64_avx2_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; unsigned int _gcry_poly1305_amd64_avx2_finish_ext(void *state, const byte *m, size_t remaining, - byte mac[16]); + byte mac[16]) OPS_FUNC_ABI; unsigned int _gcry_poly1305_amd64_avx2_blocks(void *ctx, const byte *m, - size_t bytes); + size_t bytes) OPS_FUNC_ABI; static const poly1305_ops_t poly1305_amd64_avx2_ops = { POLY1305_AVX2_BLOCKSIZE, @@ -78,12 +80,13 @@ static const poly1305_ops_t poly1305_amd64_avx2_ops = { #ifdef POLY1305_USE_NEON -void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key); +void _gcry_poly1305_armv7_neon_init_ext(void *state, const poly1305_key_t *key) + OPS_FUNC_ABI; unsigned int _gcry_poly1305_armv7_neon_finish_ext(void *state, const byte *m, size_t remaining, - byte mac[16]); + byte mac[16]) OPS_FUNC_ABI; unsigned int _gcry_poly1305_armv7_neon_blocks(void *ctx, const byte *m, - size_t bytes); + size_t bytes) OPS_FUNC_ABI; static const poly1305_ops_t poly1305_armv7_neon_ops = { POLY1305_NEON_BLOCKSIZE, @@ -110,7 +113,7 @@ typedef struct poly1305_state_ref32_s } poly1305_state_ref32_t; -static void +static OPS_FUNC_ABI void poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key) { poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; @@ -142,7 +145,7 @@ poly1305_init_ext_ref32 (void *state, const poly1305_key_t * key) } -static unsigned int +static OPS_FUNC_ABI unsigned int poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes) { poly1305_state_ref32_t *st = (poly1305_state_ref32_t *) state; @@ -230,7 +233,7 @@ poly1305_blocks_ref32 (void *state, const byte * m, size_t bytes) } -static unsigned int +static OPS_FUNC_ABI unsigned int poly1305_finish_ext_ref32 (void *state, const byte * m, size_t remaining, byte mac[POLY1305_TAGLEN]) { @@ -370,7 +373,7 @@ typedef struct poly1305_state_ref8_t } poly1305_state_ref8_t; -static void +static OPS_FUNC_ABI void poly1305_init_ext_ref8 (void *state, const poly1305_key_t * key) { poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state; @@ -471,7 +474,7 @@ poly1305_freeze_ref8 (byte h[17]) } -static unsigned int +static OPS_FUNC_ABI unsigned int poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes) { poly1305_state_ref8_t *st = (poly1305_state_ref8_t *) state; @@ -519,7 +522,7 @@ poly1305_blocks_ref8 (void *state, const byte * m, size_t bytes) } -static unsigned int +static OPS_FUNC_ABI unsigned int poly1305_finish_ext_ref8 (void *state, const byte * m, size_t remaining, byte mac[POLY1305_TAGLEN]) { -- cgit v1.2.1