From 12bc93ca8187b8061c2e705427ef22f5a71d29b0 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 May 2015 12:37:21 +0300 Subject: Enable AMD64 Salsa20 implementation on WIN64 * cipher/salsa20-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/salsa20.c (USE_AMD64): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [USE_AMD64] (ASM_FUNC_ABI, ASM_EXTRA_STACK): New. (_gcry_salsa20_amd64_keysetup, _gcry_salsa20_amd64_ivsetup) (_gcry_salsa20_amd64_encrypt_blocks): Add ASM_FUNC_ABI. [USE_AMD64] (salsa20_core): Add ASM_EXTRA_STACK. (salsa20_do_encrypt_stream) [USE_AMD64]: Add ASM_EXTRA_STACK. -- Signed-off-by: Jussi Kivilinna --- cipher/salsa20-amd64.S | 17 ++++++++++++----- cipher/salsa20.c | 26 +++++++++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/cipher/salsa20-amd64.S b/cipher/salsa20-amd64.S index 7046dbbb..470c32aa 100644 --- a/cipher/salsa20-amd64.S +++ b/cipher/salsa20-amd64.S @@ -25,13 +25,20 @@ #ifdef __x86_64 #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_SALSA20) +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif .text .align 8 .globl _gcry_salsa20_amd64_keysetup -.type _gcry_salsa20_amd64_keysetup,@function; +ELF(.type _gcry_salsa20_amd64_keysetup,@function;) _gcry_salsa20_amd64_keysetup: movl 0(%rsi),%r8d movl 4(%rsi),%r9d @@ -83,7 +90,7 @@ _gcry_salsa20_amd64_keysetup: .align 8 .globl _gcry_salsa20_amd64_ivsetup -.type _gcry_salsa20_amd64_ivsetup,@function; +ELF(.type _gcry_salsa20_amd64_ivsetup,@function;) _gcry_salsa20_amd64_ivsetup: movl 0(%rsi),%r8d movl 4(%rsi),%esi @@ -97,7 +104,7 @@ _gcry_salsa20_amd64_ivsetup: .align 8 .globl _gcry_salsa20_amd64_encrypt_blocks -.type _gcry_salsa20_amd64_encrypt_blocks,@function; +ELF(.type _gcry_salsa20_amd64_encrypt_blocks,@function;) _gcry_salsa20_amd64_encrypt_blocks: /* * Modifications to original implementation: @@ -918,7 +925,7 @@ _gcry_salsa20_amd64_encrypt_blocks: add $64,%rdi add $64,%rsi jmp .L_bytes_are_64_128_or_192 -.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks; +ELF(.size _gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;) #endif /*defined(USE_SALSA20)*/ #endif /*__x86_64*/ diff --git a/cipher/salsa20.c b/cipher/salsa20.c index d75fe515..fa3d23b8 100644 --- a/cipher/salsa20.c +++ b/cipher/salsa20.c @@ -43,7 +43,8 @@ /* USE_AMD64 indicates whether to compile with AMD64 code. */ #undef USE_AMD64 -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64 1 #endif @@ -118,12 +119,25 @@ static const char *selftest (void); #ifdef USE_AMD64 + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + /* AMD64 assembly implementations of Salsa20. */ -void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits); -void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv); +void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits) + ASM_FUNC_ABI; +void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv) + ASM_FUNC_ABI; unsigned int _gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst, - size_t len, int rounds); + size_t len, int rounds) ASM_FUNC_ABI; static void salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen) @@ -141,7 +155,8 @@ static unsigned int salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds) { memset(dst, 0, SALSA20_BLOCK_SIZE); - return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds); + return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds) + + ASM_EXTRA_STACK; } #else /* USE_AMD64 */ @@ -418,6 +433,7 @@ salsa20_do_encrypt_stream (SALSA20_context_t *ctx, size_t nblocks = length / SALSA20_BLOCK_SIZE; burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf, nblocks, rounds); + burn += ASM_EXTRA_STACK; length -= SALSA20_BLOCK_SIZE * nblocks; outbuf += SALSA20_BLOCK_SIZE * nblocks; inbuf += SALSA20_BLOCK_SIZE * nblocks; -- cgit v1.2.1