From eb0ed576893b6c7990dbcb568510f831d246cea6 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 May 2015 13:07:48 +0300 Subject: Enable AMD64 Serpent implementations on WIN64 * cipher/serpent-avx2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/serpent-sse2-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/chacha20.c (USE_SSE2, USE_AVX2): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [USE_SSE2 || USE_AVX2] (ASM_FUNC_ABI): New. (_gcry_serpent_sse2_ctr_enc, _gcry_serpent_sse2_cbc_dec) (_gcry_serpent_sse2_cfb_dec, _gcry_serpent_avx2_ctr_enc) (_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Add ASM_FUNC_ABI. -- Signed-off-by: Jussi Kivilinna --- cipher/serpent-avx2-amd64.S | 29 ++++++++++++++++++----------- cipher/serpent-sse2-amd64.S | 29 ++++++++++++++++++----------- cipher/serpent.c | 30 ++++++++++++++++++++++-------- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S index 03d29aec..3f59f060 100644 --- a/cipher/serpent-avx2-amd64.S +++ b/cipher/serpent-avx2-amd64.S @@ -20,9 +20,16 @@ #ifdef __x86_64 #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT) && \ +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && \ defined(ENABLE_AVX2_SUPPORT) +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + #ifdef __PIC__ # define RIP (%rip) #else @@ -404,7 +411,7 @@ .text .align 8 -.type __serpent_enc_blk16,@function; +ELF(.type __serpent_enc_blk16,@function;) __serpent_enc_blk16: /* input: * %rdi: ctx, CTX @@ -489,10 +496,10 @@ __serpent_enc_blk16: transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); ret; -.size __serpent_enc_blk16,.-__serpent_enc_blk16; +ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;) .align 8 -.type __serpent_dec_blk16,@function; +ELF(.type __serpent_dec_blk16,@function;) __serpent_dec_blk16: /* input: * %rdi: ctx, CTX @@ -579,7 +586,7 @@ __serpent_dec_blk16: transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); ret; -.size __serpent_dec_blk16,.-__serpent_dec_blk16; +ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -589,7 +596,7 @@ __serpent_dec_blk16: .align 8 .globl _gcry_serpent_avx2_ctr_enc -.type _gcry_serpent_avx2_ctr_enc,@function; +ELF(.type _gcry_serpent_avx2_ctr_enc,@function;) _gcry_serpent_avx2_ctr_enc: /* input: * %rdi: ctx, CTX @@ -695,11 +702,11 @@ _gcry_serpent_avx2_ctr_enc: vzeroall; ret -.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc; +ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;) .align 8 .globl _gcry_serpent_avx2_cbc_dec -.type _gcry_serpent_avx2_cbc_dec,@function; +ELF(.type _gcry_serpent_avx2_cbc_dec,@function;) _gcry_serpent_avx2_cbc_dec: /* input: * %rdi: ctx, CTX @@ -746,11 +753,11 @@ _gcry_serpent_avx2_cbc_dec: vzeroall; ret -.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec; +ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;) .align 8 .globl _gcry_serpent_avx2_cfb_dec -.type _gcry_serpent_avx2_cfb_dec,@function; +ELF(.type _gcry_serpent_avx2_cfb_dec,@function;) _gcry_serpent_avx2_cfb_dec: /* input: * %rdi: ctx, CTX @@ -799,7 +806,7 @@ _gcry_serpent_avx2_cfb_dec: vzeroall; ret -.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec; +ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;) .data .align 16 diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S index 395f6603..adbf4e27 100644 --- a/cipher/serpent-sse2-amd64.S +++ b/cipher/serpent-sse2-amd64.S @@ -20,7 +20,14 @@ #ifdef __x86_64 #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SERPENT) +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif #ifdef __PIC__ # define RIP (%rip) @@ -427,7 +434,7 @@ .text .align 8 -.type __serpent_enc_blk8,@function; +ELF(.type __serpent_enc_blk8,@function;) __serpent_enc_blk8: /* input: * %rdi: ctx, CTX @@ -512,10 +519,10 @@ __serpent_enc_blk8: transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1); ret; -.size __serpent_enc_blk8,.-__serpent_enc_blk8; +ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;) .align 8 -.type __serpent_dec_blk8,@function; +ELF(.type __serpent_dec_blk8,@function;) __serpent_dec_blk8: /* input: * %rdi: ctx, CTX @@ -602,11 +609,11 @@ __serpent_dec_blk8: transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1); ret; -.size __serpent_dec_blk8,.-__serpent_dec_blk8; +ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;) .align 8 .globl _gcry_serpent_sse2_ctr_enc -.type _gcry_serpent_sse2_ctr_enc,@function; +ELF(.type _gcry_serpent_sse2_ctr_enc,@function;) _gcry_serpent_sse2_ctr_enc: /* input: * %rdi: ctx, CTX @@ -732,11 +739,11 @@ _gcry_serpent_sse2_ctr_enc: pxor RNOT, RNOT; ret -.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc; +ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;) .align 8 .globl _gcry_serpent_sse2_cbc_dec -.type _gcry_serpent_sse2_cbc_dec,@function; +ELF(.type _gcry_serpent_sse2_cbc_dec,@function;) _gcry_serpent_sse2_cbc_dec: /* input: * %rdi: ctx, CTX @@ -793,11 +800,11 @@ _gcry_serpent_sse2_cbc_dec: pxor RNOT, RNOT; ret -.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec; +ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;) .align 8 .globl _gcry_serpent_sse2_cfb_dec -.type _gcry_serpent_sse2_cfb_dec,@function; +ELF(.type _gcry_serpent_sse2_cfb_dec,@function;) _gcry_serpent_sse2_cfb_dec: /* input: * %rdi: ctx, CTX @@ -857,7 +864,7 @@ _gcry_serpent_sse2_cfb_dec: pxor RNOT, RNOT; ret -.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec; +ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;) #endif /*defined(USE_SERPENT)*/ #endif /*__x86_64*/ diff --git a/cipher/serpent.c b/cipher/serpent.c index 0be49da4..7d0e1127 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -34,13 +34,15 @@ /* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */ #undef USE_SSE2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_SSE2 1 #endif /* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */ #undef USE_AVX2 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # if defined(ENABLE_AVX2_SUPPORT) # define USE_AVX2 1 # endif @@ -86,6 +88,18 @@ typedef struct serpent_context } serpent_context_t; +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#if defined(USE_SSE2) || defined(USE_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# else +# define ASM_FUNC_ABI +# endif +#endif + + #ifdef USE_SSE2 /* Assembler implementations of Serpent using SSE2. Process 8 block in parallel. @@ -93,17 +107,17 @@ typedef struct serpent_context extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *ctr); + unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *iv); + unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *iv); + unsigned char *iv) ASM_FUNC_ABI; #endif #ifdef USE_AVX2 @@ -113,17 +127,17 @@ extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *ctr); + unsigned char *ctr) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *iv); + unsigned char *iv) ASM_FUNC_ABI; extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, - unsigned char *iv); + unsigned char *iv) ASM_FUNC_ABI; #endif #ifdef USE_NEON -- cgit v1.2.1