summaryrefslogtreecommitdiff
path: root/cipher/twofish.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-10-22 17:07:53 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-10-22 19:57:27 +0300
commit98674fdaa30ab22a3ac86ca05d688b5b6112895d (patch)
tree937374c6701fa80161a727b200aaddf0933d37c5 /cipher/twofish.c
parente67c67321ce240c93dd0fa2b21c649c0a8e233f7 (diff)
downloadlibgcrypt-98674fdaa30ab22a3ac86ca05d688b5b6112895d.tar.gz
twofish: add ARMv6 assembly implementation
* cipher/Makefile.am: Add 'twofish-armv6.S'. * cipher/twofish-armv6.S: New. * cipher/twofish.c (USE_ARMV6_ASM): New macro. [USE_ARMV6_ASM] (_gcry_twofish_armv6_encrypt_block) (_gcry_twofish_armv6_decrypt_block): New prototypes. [USE_AMDV6_ASM] (twofish_encrypt, twofish_decrypt): Add. [USE_AMD64_ASM] (do_twofish_encrypt, do_twofish_decrypt): Remove. (_gcry_twofish_ctr_enc, _gcry_twofish_cfb_dec): Use 'twofish_encrypt' instead of 'do_twofish_encrypt'. (_gcry_twofish_cbc_dec): Use 'twofish_decrypt' instead of 'do_twofish_decrypt'. * configure.ac [arm]: Add 'twofish-armv6.lo'. -- Add optimized ARMv6 assembly implementation for Twofish. Implementation is tuned for Cortex-A8. Unaligned access handling is done in assembly part. For now, only enable this on little-endian systems as big-endian correctness have not been tested yet. Old (gcc-4.8) vs new (twofish-asm), Cortex-A8 (on armhf): ECB/Stream CBC CFB OFB CTR CCM --------------- --------------- --------------- --------------- --------------- --------------- TWOFISH 1.23x 1.25x 1.16x 1.26x 1.16x 1.30x 1.18x 1.17x 1.23x 1.23x 1.22x 1.22x Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/twofish.c')
-rw-r--r--cipher/twofish.c88
1 files changed, 62 insertions, 26 deletions
diff --git a/cipher/twofish.c b/cipher/twofish.c
index 993ad0f4..d2cabbe8 100644
--- a/cipher/twofish.c
+++ b/cipher/twofish.c
@@ -57,6 +57,14 @@
# define USE_AMD64_ASM 1
#endif
+/* USE_ARMV6_ASM indicates whether to use ARMv6 assembly code. */
+#undef USE_ARMV6_ASM
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARMV6_ASM 1
+# endif
+#endif
+
/* Prototype for the self-test function. */
static const char *selftest(void);
@@ -746,7 +754,16 @@ extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
const byte *in, byte *iv);
-#else /*!USE_AMD64_ASM*/
+#elif defined(USE_ARMV6_ASM)
+
+/* Assembly implementations of Twofish. */
+extern void _gcry_twofish_armv6_encrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+extern void _gcry_twofish_armv6_decrypt_block(const TWOFISH_context *c,
+ byte *out, const byte *in);
+
+#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
/* Macros to compute the g() function in the encryption and decryption
* rounds. G1 is the straight g() function; G2 includes the 8-bit
@@ -812,21 +829,25 @@ extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
#ifdef USE_AMD64_ASM
-static void
-do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
{
+ TWOFISH_context *ctx = context;
_gcry_twofish_amd64_encrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
}
+#elif defined(USE_ARMV6_ASM)
+
static unsigned int
twofish_encrypt (void *context, byte *out, const byte *in)
{
TWOFISH_context *ctx = context;
- _gcry_twofish_amd64_encrypt_block(ctx, out, in);
+ _gcry_twofish_armv6_encrypt_block(ctx, out, in);
return /*burn_stack*/ (4*sizeof (void*));
}
-#else /*!USE_AMD64_ASM*/
+#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
static void
do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
@@ -868,28 +889,32 @@ twofish_encrypt (void *context, byte *out, const byte *in)
return /*burn_stack*/ (24+3*sizeof (void*));
}
-#endif /*!USE_AMD64_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
/* Decrypt one block. in and out may be the same. */
#ifdef USE_AMD64_ASM
-static void
-do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
{
+ TWOFISH_context *ctx = context;
_gcry_twofish_amd64_decrypt_block(ctx, out, in);
+ return /*burn_stack*/ (4*sizeof (void*));
}
+#elif defined(USE_ARMV6_ASM)
+
static unsigned int
twofish_decrypt (void *context, byte *out, const byte *in)
{
TWOFISH_context *ctx = context;
- _gcry_twofish_amd64_decrypt_block(ctx, out, in);
+ _gcry_twofish_armv6_decrypt_block(ctx, out, in);
return /*burn_stack*/ (4*sizeof (void*));
}
-#else /*!USE_AMD64_ASM*/
+#else /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
static void
do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
@@ -932,7 +957,7 @@ twofish_decrypt (void *context, byte *out, const byte *in)
return /*burn_stack*/ (24+3*sizeof (void*));
}
-#endif /*!USE_AMD64_ASM*/
+#endif /*!USE_AMD64_ASM && !USE_ARMV6_ASM*/
@@ -947,14 +972,11 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
- int burn_stack_depth = 24 + 3 * sizeof (void*);
+ unsigned int burn, burn_stack_depth = 0;
int i;
#ifdef USE_AMD64_ASM
{
- if (nblocks >= 3 && burn_stack_depth < 8 * sizeof(void*))
- burn_stack_depth = 8 * sizeof(void*);
-
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
@@ -963,6 +985,10 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
}
/* Use generic code to handle smaller chunks... */
@@ -973,7 +999,10 @@ _gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
/* Encrypt the counter. */
- do_twofish_encrypt(ctx, tmpbuf, ctr);
+ burn = twofish_encrypt(ctx, tmpbuf, ctr);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+
/* XOR the input with the encrypted counter and store in output. */
buf_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
outbuf += TWOFISH_BLOCKSIZE;
@@ -1002,13 +1031,10 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
unsigned char savebuf[TWOFISH_BLOCKSIZE];
- int burn_stack_depth = 24 + 3 * sizeof (void*);
+ unsigned int burn, burn_stack_depth = 0;
#ifdef USE_AMD64_ASM
{
- if (nblocks >= 3 && burn_stack_depth < 9 * sizeof(void*))
- burn_stack_depth = 9 * sizeof(void*);
-
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
@@ -1017,6 +1043,10 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 9 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
}
/* Use generic code to handle smaller chunks... */
@@ -1029,7 +1059,9 @@ _gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
OUTBUF. */
memcpy(savebuf, inbuf, TWOFISH_BLOCKSIZE);
- do_twofish_decrypt (ctx, outbuf, inbuf);
+ burn = twofish_decrypt (ctx, outbuf, inbuf);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
buf_xor(outbuf, outbuf, iv, TWOFISH_BLOCKSIZE);
memcpy(iv, savebuf, TWOFISH_BLOCKSIZE);
@@ -1051,13 +1083,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
TWOFISH_context *ctx = context;
unsigned char *outbuf = outbuf_arg;
const unsigned char *inbuf = inbuf_arg;
- int burn_stack_depth = 24 + 3 * sizeof (void*);
+ unsigned int burn, burn_stack_depth = 0;
#ifdef USE_AMD64_ASM
{
- if (nblocks >= 3 && burn_stack_depth < 8 * sizeof(void*))
- burn_stack_depth = 8 * sizeof(void*);
-
/* Process data in 3 block chunks. */
while (nblocks >= 3)
{
@@ -1066,6 +1095,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
nblocks -= 3;
outbuf += 3 * TWOFISH_BLOCKSIZE;
inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+ burn = 8 * sizeof(void*);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
}
/* Use generic code to handle smaller chunks... */
@@ -1074,7 +1107,10 @@ _gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
for ( ;nblocks; nblocks-- )
{
- do_twofish_encrypt(ctx, iv, iv);
+ burn = twofish_encrypt(ctx, iv, iv);
+ if (burn > burn_stack_depth)
+ burn_stack_depth = burn;
+
buf_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
outbuf += TWOFISH_BLOCKSIZE;
inbuf += TWOFISH_BLOCKSIZE;