From 9b0c6c8141ae9bd056392a3f6b5704b505fc8501 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Thu, 14 May 2015 13:07:34 +0300 Subject: Enable AMD64 Twofish implementation on WIN64 * cipher/twofish-amd64.S: Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. (ELF): New macro to mask lines with ELF specific commands. * cipher/twofish.c (USE_AMD64_ASM): Enable when HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS defined. [HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS] (call_sysv_fn): New. (twofish_amd64_encrypt_block, twofish_amd64_decrypt_block) (twofish_amd64_ctr_enc, twofish_amd64_cbc_dec) (twofish_amd64_cfb_dec): New wrapper functions for AMD64 assembly functions. -- Signed-off-by: Jussi Kivilinna --- cipher/twofish-amd64.S | 37 +++++++++++++--------- cipher/twofish.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 100 insertions(+), 21 deletions(-) diff --git a/cipher/twofish-amd64.S b/cipher/twofish-amd64.S index a2253076..ea88b94e 100644 --- a/cipher/twofish-amd64.S +++ b/cipher/twofish-amd64.S @@ -20,7 +20,14 @@ #ifdef __x86_64 #include -#if defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) && defined(USE_TWOFISH) +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif #ifdef __PIC__ # define RIP %rip @@ -166,7 +173,7 @@ .align 8 .globl _gcry_twofish_amd64_encrypt_block -.type _gcry_twofish_amd64_encrypt_block,@function; +ELF(.type _gcry_twofish_amd64_encrypt_block,@function;) _gcry_twofish_amd64_encrypt_block: /* input: @@ -205,11 +212,11 @@ _gcry_twofish_amd64_encrypt_block: addq $(3 * 8), %rsp; ret; -.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block; +ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) .align 8 .globl _gcry_twofish_amd64_decrypt_block -.type _gcry_twofish_amd64_decrypt_block,@function; +ELF(.type _gcry_twofish_amd64_decrypt_block,@function;) _gcry_twofish_amd64_decrypt_block: /* input: @@ -248,7 +255,7 @@ _gcry_twofish_amd64_decrypt_block: addq $(3 * 8), %rsp; ret; -.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block; +ELF(.size _gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;) #undef CTX @@ -462,7 +469,7 @@ _gcry_twofish_amd64_decrypt_block: outunpack3(RAB, 2); .align 8 -.type __twofish_enc_blk3,@function; +ELF(.type __twofish_enc_blk3,@function;) __twofish_enc_blk3: /* input: @@ -485,10 +492,10 @@ __twofish_enc_blk3: outunpack_enc3(); ret; -.size __twofish_enc_blk3,.-__twofish_enc_blk3; +ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;) .align 8 -.type __twofish_dec_blk3,@function; +ELF(.type __twofish_dec_blk3,@function;) __twofish_dec_blk3: /* input: @@ -511,11 +518,11 @@ __twofish_dec_blk3: outunpack_dec3(); ret; -.size __twofish_dec_blk3,.-__twofish_dec_blk3; +ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;) .align 8 .globl _gcry_twofish_amd64_ctr_enc -.type _gcry_twofish_amd64_ctr_enc,@function; +ELF(.type _gcry_twofish_amd64_ctr_enc,@function;) _gcry_twofish_amd64_ctr_enc: /* input: * %rdi: ctx, CTX @@ -593,11 +600,11 @@ _gcry_twofish_amd64_ctr_enc: addq $(8 * 8), %rsp; ret; -.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc; +ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;) .align 8 .globl _gcry_twofish_amd64_cbc_dec -.type _gcry_twofish_amd64_cbc_dec,@function; +ELF(.type _gcry_twofish_amd64_cbc_dec,@function;) _gcry_twofish_amd64_cbc_dec: /* input: * %rdi: ctx, CTX @@ -659,11 +666,11 @@ _gcry_twofish_amd64_cbc_dec: addq $(9 * 8), %rsp; ret; -.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec; +ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;) .align 8 .globl _gcry_twofish_amd64_cfb_dec -.type _gcry_twofish_amd64_cfb_dec,@function; +ELF(.type _gcry_twofish_amd64_cfb_dec,@function;) _gcry_twofish_amd64_cfb_dec: /* input: * %rdi: ctx, CTX @@ -725,7 +732,7 @@ _gcry_twofish_amd64_cfb_dec: addq $(8 * 8), %rsp; ret; -.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec; +ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;) #endif /*USE_TWOFISH*/ #endif /*__x86_64*/ diff --git a/cipher/twofish.c b/cipher/twofish.c index ecd76e35..ce83fadf 100644 --- a/cipher/twofish.c +++ b/cipher/twofish.c @@ -53,7 +53,8 @@ /* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ #undef USE_AMD64_ASM -#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) # define USE_AMD64_ASM 1 #endif @@ -754,6 +755,77 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, byte *iv); +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +static inline void +call_sysv_fn (const void *fn, const void *arg1, const void *arg2, + const void *arg3, const void *arg4) +{ + /* Call SystemV ABI function without storing non-volatile XMM registers, + * as target function does not use vector instruction sets. */ + asm volatile ("callq *%0\n\t" + : "+a" (fn), + "+D" (arg1), + "+S" (arg2), + "+d" (arg3), + "+c" (arg4) + : + : "cc", "memory", "r8", "r9", "r10", "r11"); +} +#endif + +static inline void +twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte *in) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_encrypt_block, c, out, in, NULL); +#else + _gcry_twofish_amd64_encrypt_block(c, out, in); +#endif +} + +static inline void +twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte *in) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_decrypt_block, c, out, in, NULL); +#else + _gcry_twofish_amd64_decrypt_block(c, out, in); +#endif +} + +static inline void +twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in, + byte *ctr) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_ctr_enc, c, out, in, ctr); +#else + _gcry_twofish_amd64_ctr_enc(c, out, in, ctr); +#endif +} + +static inline void +twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_cbc_dec, c, out, in, iv); +#else + _gcry_twofish_amd64_cbc_dec(c, out, in, iv); +#endif +} + +static inline void +twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in, + byte *iv) +{ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS + call_sysv_fn(_gcry_twofish_amd64_cfb_dec, c, out, in, iv); +#else + _gcry_twofish_amd64_cfb_dec(c, out, in, iv); +#endif +} + #elif defined(USE_ARM_ASM) /* Assembly implementations of Twofish. */ @@ -833,7 +905,7 @@ static unsigned int twofish_encrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_amd64_encrypt_block(ctx, out, in); + twofish_amd64_encrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } @@ -900,7 +972,7 @@ static unsigned int twofish_decrypt (void *context, byte *out, const byte *in) { TWOFISH_context *ctx = context; - _gcry_twofish_amd64_decrypt_block(ctx, out, in); + twofish_amd64_decrypt_block(ctx, out, in); return /*burn_stack*/ (4*sizeof (void*)); } @@ -980,7 +1052,7 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, /* Process data in 3 block chunks. */ while (nblocks >= 3) { - _gcry_twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; @@ -1038,7 +1110,7 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Process data in 3 block chunks. */ while (nblocks >= 3) { - _gcry_twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; @@ -1087,7 +1159,7 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, /* Process data in 3 block chunks. */ while (nblocks >= 3) { - _gcry_twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); nblocks -= 3; outbuf += 3 * TWOFISH_BLOCKSIZE; -- cgit v1.2.1