From e05682093ffb003b589a697428d918d755ac631d Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sun, 3 May 2015 17:28:40 +0300 Subject: Enable AMD64 Blowfish implementation on WIN64 * cipher/blowfish-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/blowfish.c (USE_AMD64_ASM): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New. (do_encrypt, do_encrypt_block, do_decrypt_block) [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS]: Call assembly function through 'call_sysv_fn'. (blowfish_amd64_ctr_enc, blowfish_amd64_cbc_dec) (blowfish_amd64_cfb_dec): New wrapper functions for bulk assembly functions. .. Signed-off-by: Jussi Kivilinna --- cipher/blowfish-amd64.S | 46 +++++++++++++++++------------- cipher/blowfish.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 97 insertions(+), 23 deletions(-) diff --git a/cipher/blowfish-amd64.S b/cipher/blowfish-amd64.S index 87b676f3..21b63fc1 100644 --- a/cipher/blowfish-amd64.S +++ b/cipher/blowfish-amd64.S @@ -20,7 +20,15 @@ #ifdef __x86_64 #include -#if defined(USE_BLOWFISH) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(USE_BLOWFISH) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif .text @@ -120,7 +128,7 @@ movq RX0, (RIO); .align 8 -.type __blowfish_enc_blk1,@function; +ELF(.type __blowfish_enc_blk1,@function;) __blowfish_enc_blk1: /* input: @@ -145,11 +153,11 @@ __blowfish_enc_blk1: movq %r11, %rbp; ret; -.size __blowfish_enc_blk1,.-__blowfish_enc_blk1; +ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) .align 8 .globl _gcry_blowfish_amd64_do_encrypt -.type _gcry_blowfish_amd64_do_encrypt,@function; +ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;) _gcry_blowfish_amd64_do_encrypt: /* input: @@ -171,11 +179,11 @@ _gcry_blowfish_amd64_do_encrypt: movl RX0d, (RX2); ret; -.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt; +ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) .align 8 .globl _gcry_blowfish_amd64_encrypt_block -.type _gcry_blowfish_amd64_encrypt_block,@function; +ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;) _gcry_blowfish_amd64_encrypt_block: /* input: @@ -195,11 +203,11 @@ _gcry_blowfish_amd64_encrypt_block: write_block(); ret; -.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block; +ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) .align 8 .globl _gcry_blowfish_amd64_decrypt_block -.type _gcry_blowfish_amd64_decrypt_block,@function; +ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;) _gcry_blowfish_amd64_decrypt_block: /* input: @@ -231,7 +239,7 @@ _gcry_blowfish_amd64_decrypt_block: movq %r11, %rbp; ret; -.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block; +ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) /********************************************************************** 4-way blowfish, four blocks parallel @@ -319,7 +327,7 @@ _gcry_blowfish_amd64_decrypt_block: bswapq RX3; .align 8 -.type __blowfish_enc_blk4,@function; +ELF(.type __blowfish_enc_blk4,@function;) __blowfish_enc_blk4: /* input: @@ -343,10 +351,10 @@ __blowfish_enc_blk4: outbswap_block4(); ret; -.size __blowfish_enc_blk4,.-__blowfish_enc_blk4; +ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) .align 8 -.type __blowfish_dec_blk4,@function; +ELF(.type __blowfish_dec_blk4,@function;) __blowfish_dec_blk4: /* input: @@ -372,11 +380,11 @@ __blowfish_dec_blk4: outbswap_block4(); ret; -.size __blowfish_dec_blk4,.-__blowfish_dec_blk4; +ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) .align 8 .globl _gcry_blowfish_amd64_ctr_enc -.type _gcry_blowfish_amd64_ctr_enc,@function; +ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;) _gcry_blowfish_amd64_ctr_enc: /* input: * %rdi: ctx, CTX @@ -429,11 +437,11 @@ _gcry_blowfish_amd64_ctr_enc: popq %rbp; ret; -.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc; +ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) .align 8 .globl _gcry_blowfish_amd64_cbc_dec -.type _gcry_blowfish_amd64_cbc_dec,@function; +ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;) _gcry_blowfish_amd64_cbc_dec: /* input: * %rdi: ctx, CTX @@ -477,11 +485,11 @@ _gcry_blowfish_amd64_cbc_dec: popq %rbp; ret; -.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec; +ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) .align 8 .globl _gcry_blowfish_amd64_cfb_dec -.type _gcry_blowfish_amd64_cfb_dec,@function; +ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;) _gcry_blowfish_amd64_cfb_dec: /* input: * %rdi: ctx, CTX @@ -527,7 +535,7 @@ _gcry_blowfish_amd64_cfb_dec: popq %rbx; popq %rbp; ret; -.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec; +ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) #endif /*defined(USE_BLOWFISH)*/ #endif /*__x86_64*/ diff --git a/cipher/blowfish.c b/cipher/blowfish.c index ae470d8b..a3fc26ce 100644 --- a/cipher/blowfish.c +++ b/cipher/blowfish.c @@ -45,7 +45,8 @@ /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && \ +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ (BLOWFISH_ROUNDS == 16) # define USE_AMD64_ASM 1 #endif @@ -280,22 +281,87 @@ extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, byte *iv); +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + static void do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) { +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_do_encrypt, bc, ret_xl, ret_xr, NULL); +#else _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr); +#endif } static void do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_encrypt_block, context, outbuf, inbuf, + NULL); +#else _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf); +#endif } static void do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) { +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_decrypt_block, context, outbuf, inbuf, + NULL); +#else _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf); +#endif +} + +static inline void +blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_ctr_enc, ctx, out, in, ctr); +#else + _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr); +#endif +} + +static inline void +blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_cbc_dec, ctx, out, in, iv); +#else + _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv); +#endif +} + +static inline void +blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn (_gcry_blowfish_amd64_cfb_dec, ctx, out, in, iv); +#else + _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv); +#endif } static unsigned int @@ -605,7 +671,7 @@ _gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Process data in 4 block chunks. */ while (nblocks >= 4) { - _gcry_blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; @@ -674,7 +740,7 @@ _gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Process data in 4 block chunks. */ while (nblocks >= 4) { - _gcry_blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; @@ -734,7 +800,7 @@ _gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Process data in 4 block chunks. */ while (nblocks >= 4) { - _gcry_blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 4; outbuf += 4 * BLOWFISH_BLOCKSIZE; -- cgit v1.2.1