summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cipher/Makefile.am2
-rw-r--r--cipher/cipher-selftest.c18
-rw-r--r--cipher/cipher.c7
-rw-r--r--cipher/des-amd64.S1030
-rw-r--r--cipher/des.c292
-rw-r--r--configure.ac7
-rw-r--r--src/cipher.h13
7 files changed, 1362 insertions, 7 deletions
diff --git a/cipher/Makefile.am b/cipher/Makefile.am
index 462e6db5..3c20d3c3 100644
--- a/cipher/Makefile.am
+++ b/cipher/Makefile.am
@@ -60,7 +60,7 @@ arcfour.c arcfour-amd64.S \
blowfish.c blowfish-amd64.S blowfish-arm.S \
cast5.c cast5-amd64.S cast5-arm.S \
crc.c \
-des.c \
+des.c des-amd64.S \
dsa.c \
elgamal.c \
ecc.c ecc-curves.c ecc-misc.c ecc-common.h \
diff --git a/cipher/cipher-selftest.c b/cipher/cipher-selftest.c
index 5e958146..852368a0 100644
--- a/cipher/cipher-selftest.c
+++ b/cipher/cipher-selftest.c
@@ -82,7 +82,11 @@ _gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func,
ciphertext = plaintext2 + nblocks * blocksize;
/* Initialize ctx */
- setkey_func (ctx, key, sizeof(key));
+ if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
/* Test single block code path */
memset (iv, 0x4e, blocksize);
@@ -199,7 +203,11 @@ _gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func,
ciphertext = plaintext2 + nblocks * blocksize;
/* Initialize ctx */
- setkey_func (ctx, key, sizeof(key));
+ if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
/* Test single block code path */
memset(iv, 0xd3, blocksize);
@@ -316,7 +324,11 @@ _gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func,
ciphertext2 = ciphertext + nblocks * blocksize;
/* Initialize ctx */
- setkey_func (ctx, key, sizeof(key));
+ if (setkey_func (ctx, key, sizeof(key)) != GPG_ERR_NO_ERROR)
+ {
+ xfree(mem);
+ return "setkey failed";
+ }
/* Test single block code path */
memset (iv, 0xff, blocksize);
diff --git a/cipher/cipher.c b/cipher/cipher.c
index baa4720a..6552ed30 100644
--- a/cipher/cipher.c
+++ b/cipher/cipher.c
@@ -513,6 +513,13 @@ _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
h->bulk.ctr_enc = _gcry_camellia_ctr_enc;
break;
#endif /*USE_CAMELLIA*/
+#ifdef USE_DES
+ case GCRY_CIPHER_3DES:
+ h->bulk.cbc_dec = _gcry_3des_cbc_dec;
+ h->bulk.cfb_dec = _gcry_3des_cfb_dec;
+ h->bulk.ctr_enc = _gcry_3des_ctr_enc;
+ break;
+#endif /*USE_DES*/
#ifdef USE_SERPENT
case GCRY_CIPHER_SERPENT128:
case GCRY_CIPHER_SERPENT192:
diff --git a/cipher/des-amd64.S b/cipher/des-amd64.S
new file mode 100644
index 00000000..e8b2c568
--- /dev/null
+++ b/cipher/des-amd64.S
@@ -0,0 +1,1030 @@
+/* des-amd64.S - AMD64 assembly implementation of 3DES cipher
+ *
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_DES) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+
+#ifdef __PIC__
+# define RIP (%rip)
+#else
+# define RIP
+#endif
+
+.text
+
+#define s1 0
+#define s2 ((s1) + (64*8))
+#define s3 ((s2) + (64*8))
+#define s4 ((s3) + (64*8))
+#define s5 ((s4) + (64*8))
+#define s6 ((s5) + (64*8))
+#define s7 ((s6) + (64*8))
+#define s8 ((s7) + (64*8))
+
+/* register macros */
+#define CTX %rdi
+#define SBOXES %rbp
+
+#define RL0 %r8
+#define RL1 %r9
+#define RL2 %r10
+
+#define RL0d %r8d
+#define RL1d %r9d
+#define RL2d %r10d
+
+#define RR0 %r11
+#define RR1 %r12
+#define RR2 %r13
+
+#define RR0d %r11d
+#define RR1d %r12d
+#define RR2d %r13d
+
+#define RW0 %rax
+#define RW1 %rbx
+#define RW2 %rcx
+
+#define RW0d %eax
+#define RW1d %ebx
+#define RW2d %ecx
+
+#define RW0bl %al
+#define RW1bl %bl
+#define RW2bl %cl
+
+#define RW0bh %ah
+#define RW1bh %bh
+#define RW2bh %ch
+
+#define RT0 %r15
+#define RT1 %rsi
+#define RT2 %r14
+#define RT3 %rdx
+
+#define RT0d %r15d
+#define RT1d %esi
+#define RT2d %r14d
+#define RT3d %edx
+
+/***********************************************************************
+ * 1-way 3DES
+ ***********************************************************************/
+#define do_permutation(a, b, offset, mask) \
+ movl a, RT0d; \
+ shrl $(offset), RT0d; \
+ xorl b, RT0d; \
+ andl $(mask), RT0d; \
+ xorl RT0d, b; \
+ shll $(offset), RT0d; \
+ xorl RT0d, a;
+
+#define expand_to_64bits(val, mask) \
+ movl val##d, RT0d; \
+ rorl $4, RT0d; \
+ shlq $32, RT0; \
+ orq RT0, val; \
+ andq mask, val;
+
+#define compress_to_64bits(val) \
+ movq val, RT0; \
+ shrq $32, RT0; \
+ roll $4, RT0d; \
+ orl RT0d, val##d;
+
+#define initial_permutation(left, right) \
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
+ do_permutation(right##d, left##d, 2, 0x33333333); \
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+ movl left##d, RW0d; \
+ roll $1, right##d; \
+ xorl right##d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, left##d; \
+ xorl RW0d, right##d; \
+ roll $1, left##d; \
+ expand_to_64bits(right, RT3); \
+ expand_to_64bits(left, RT3);
+
+#define final_permutation(left, right) \
+ compress_to_64bits(right); \
+ compress_to_64bits(left); \
+ movl right##d, RW0d; \
+ rorl $1, left##d; \
+ xorl left##d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, right##d; \
+ xorl RW0d, left##d; \
+ rorl $1, right##d; \
+ do_permutation(right##d, left##d, 8, 0x00ff00ff); \
+ do_permutation(right##d, left##d, 2, 0x33333333); \
+ do_permutation(left##d, right##d, 16, 0x0000ffff); \
+ do_permutation(left##d, right##d, 4, 0x0f0f0f0f);
+
+#define round1(n, from, to, load_next_key) \
+ xorq from, RW0; \
+ \
+ movzbl RW0bl, RT0d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ movzbl RW0bl, RT2d; \
+ movzbl RW0bh, RT3d; \
+ shrq $16, RW0; \
+ movq s8(SBOXES, RT0, 8), RT0; \
+ xorq s6(SBOXES, RT1, 8), to; \
+ movzbl RW0bl, RL1d; \
+ movzbl RW0bh, RT1d; \
+ shrl $16, RW0d; \
+ xorq s4(SBOXES, RT2, 8), RT0; \
+ xorq s2(SBOXES, RT3, 8), to; \
+ movzbl RW0bl, RT2d; \
+ movzbl RW0bh, RT3d; \
+ xorq s7(SBOXES, RL1, 8), RT0; \
+ xorq s5(SBOXES, RT1, 8), to; \
+ xorq s3(SBOXES, RT2, 8), RT0; \
+ load_next_key(n, RW0); \
+ xorq RT0, to; \
+ xorq s1(SBOXES, RT3, 8), to; \
+
+#define load_next_key(n, RWx) \
+ movq (((n) + 1) * 8)(CTX), RWx;
+
+#define dummy2(a, b) /*_*/
+
+#define read_block(io, left, right) \
+ movl (io), left##d; \
+ movl 4(io), right##d; \
+ bswapl left##d; \
+ bswapl right##d;
+
+#define write_block(io, left, right) \
+ bswapl left##d; \
+ bswapl right##d; \
+ movl left##d, (io); \
+ movl right##d, 4(io);
+
+.align 8
+.globl _gcry_3des_amd64_crypt_block
+.type _gcry_3des_amd64_crypt_block,@function;
+
+_gcry_3des_amd64_crypt_block:
+ /* input:
+ * %rdi: round keys, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+ pushq %rbp;
+ pushq %rbx;
+ pushq %r12;
+ pushq %r13;
+ pushq %r14;
+ pushq %r15;
+ pushq %rsi; /*dst*/
+
+ leaq .L_s1 RIP, SBOXES;
+
+ read_block(%rdx, RL0, RR0);
+ initial_permutation(RL0, RR0);
+
+ movq (CTX), RW0;
+
+ round1(0, RR0, RL0, load_next_key);
+ round1(1, RL0, RR0, load_next_key);
+ round1(2, RR0, RL0, load_next_key);
+ round1(3, RL0, RR0, load_next_key);
+ round1(4, RR0, RL0, load_next_key);
+ round1(5, RL0, RR0, load_next_key);
+ round1(6, RR0, RL0, load_next_key);
+ round1(7, RL0, RR0, load_next_key);
+ round1(8, RR0, RL0, load_next_key);
+ round1(9, RL0, RR0, load_next_key);
+ round1(10, RR0, RL0, load_next_key);
+ round1(11, RL0, RR0, load_next_key);
+ round1(12, RR0, RL0, load_next_key);
+ round1(13, RL0, RR0, load_next_key);
+ round1(14, RR0, RL0, load_next_key);
+ round1(15, RL0, RR0, load_next_key);
+
+ round1(16+0, RL0, RR0, load_next_key);
+ round1(16+1, RR0, RL0, load_next_key);
+ round1(16+2, RL0, RR0, load_next_key);
+ round1(16+3, RR0, RL0, load_next_key);
+ round1(16+4, RL0, RR0, load_next_key);
+ round1(16+5, RR0, RL0, load_next_key);
+ round1(16+6, RL0, RR0, load_next_key);
+ round1(16+7, RR0, RL0, load_next_key);
+ round1(16+8, RL0, RR0, load_next_key);
+ round1(16+9, RR0, RL0, load_next_key);
+ round1(16+10, RL0, RR0, load_next_key);
+ round1(16+11, RR0, RL0, load_next_key);
+ round1(16+12, RL0, RR0, load_next_key);
+ round1(16+13, RR0, RL0, load_next_key);
+ round1(16+14, RL0, RR0, load_next_key);
+ round1(16+15, RR0, RL0, load_next_key);
+
+ round1(32+0, RR0, RL0, load_next_key);
+ round1(32+1, RL0, RR0, load_next_key);
+ round1(32+2, RR0, RL0, load_next_key);
+ round1(32+3, RL0, RR0, load_next_key);
+ round1(32+4, RR0, RL0, load_next_key);
+ round1(32+5, RL0, RR0, load_next_key);
+ round1(32+6, RR0, RL0, load_next_key);
+ round1(32+7, RL0, RR0, load_next_key);
+ round1(32+8, RR0, RL0, load_next_key);
+ round1(32+9, RL0, RR0, load_next_key);
+ round1(32+10, RR0, RL0, load_next_key);
+ round1(32+11, RL0, RR0, load_next_key);
+ round1(32+12, RR0, RL0, load_next_key);
+ round1(32+13, RL0, RR0, load_next_key);
+ round1(32+14, RR0, RL0, load_next_key);
+ round1(32+15, RL0, RR0, dummy2);
+
+ popq RW2; /*dst*/
+ final_permutation(RR0, RL0);
+ write_block(RW2, RR0, RL0);
+
+ popq %r15;
+ popq %r14;
+ popq %r13;
+ popq %r12;
+ popq %rbx;
+ popq %rbp;
+
+ ret;
+.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;
+
+/***********************************************************************
+ * 3-way 3DES
+ ***********************************************************************/
+#define expand_to_64bits(val, mask) \
+ movl val##d, RT0d; \
+ rorl $4, RT0d; \
+ shlq $32, RT0; \
+ orq RT0, val; \
+ andq mask, val;
+
+#define compress_to_64bits(val) \
+ movq val, RT0; \
+ shrq $32, RT0; \
+ roll $4, RT0d; \
+ orl RT0d, val##d;
+
+#define initial_permutation3(left, right) \
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+ \
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
+ \
+ movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+ \
+ movl left##0d, RW0d; \
+ roll $1, right##0d; \
+ xorl right##0d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, left##0d; \
+ xorl RW0d, right##0d; \
+ roll $1, left##0d; \
+ expand_to_64bits(right##0, RT3); \
+ expand_to_64bits(left##0, RT3); \
+ movl left##1d, RW1d; \
+ roll $1, right##1d; \
+ xorl right##1d, RW1d; \
+ andl $0xaaaaaaaa, RW1d; \
+ xorl RW1d, left##1d; \
+ xorl RW1d, right##1d; \
+ roll $1, left##1d; \
+ expand_to_64bits(right##1, RT3); \
+ expand_to_64bits(left##1, RT3); \
+ movl left##2d, RW2d; \
+ roll $1, right##2d; \
+ xorl right##2d, RW2d; \
+ andl $0xaaaaaaaa, RW2d; \
+ xorl RW2d, left##2d; \
+ xorl RW2d, right##2d; \
+ roll $1, left##2d; \
+ expand_to_64bits(right##2, RT3); \
+ expand_to_64bits(left##2, RT3);
+
+#define final_permutation3(left, right) \
+ compress_to_64bits(right##0); \
+ compress_to_64bits(left##0); \
+ movl right##0d, RW0d; \
+ rorl $1, left##0d; \
+ xorl left##0d, RW0d; \
+ andl $0xaaaaaaaa, RW0d; \
+ xorl RW0d, right##0d; \
+ xorl RW0d, left##0d; \
+ rorl $1, right##0d; \
+ compress_to_64bits(right##1); \
+ compress_to_64bits(left##1); \
+ movl right##1d, RW1d; \
+ rorl $1, left##1d; \
+ xorl left##1d, RW1d; \
+ andl $0xaaaaaaaa, RW1d; \
+ xorl RW1d, right##1d; \
+ xorl RW1d, left##1d; \
+ rorl $1, right##1d; \
+ compress_to_64bits(right##2); \
+ compress_to_64bits(left##2); \
+ movl right##2d, RW2d; \
+ rorl $1, left##2d; \
+ xorl left##2d, RW2d; \
+ andl $0xaaaaaaaa, RW2d; \
+ xorl RW2d, right##2d; \
+ xorl RW2d, left##2d; \
+ rorl $1, right##2d; \
+ \
+ do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \
+ do_permutation(right##0d, left##0d, 2, 0x33333333); \
+ do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \
+ do_permutation(right##1d, left##1d, 2, 0x33333333); \
+ do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \
+ do_permutation(right##2d, left##2d, 2, 0x33333333); \
+ \
+ do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+ do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \
+ do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+ do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \
+ do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+ do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f);
+
+#define round3(n, from, to, load_next_key, do_movq) \
+ xorq from##0, RW0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ xorq s8(SBOXES, RT3, 8), to##0; \
+ xorq s6(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrq $16, RW0; \
+ xorq s4(SBOXES, RT3, 8), to##0; \
+ xorq s2(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ shrl $16, RW0d; \
+ xorq s7(SBOXES, RT3, 8), to##0; \
+ xorq s5(SBOXES, RT1, 8), to##0; \
+ movzbl RW0bl, RT3d; \
+ movzbl RW0bh, RT1d; \
+ load_next_key(n, RW0); \
+ xorq s3(SBOXES, RT3, 8), to##0; \
+ xorq s1(SBOXES, RT1, 8), to##0; \
+ xorq from##1, RW1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrq $16, RW1; \
+ xorq s8(SBOXES, RT3, 8), to##1; \
+ xorq s6(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrq $16, RW1; \
+ xorq s4(SBOXES, RT3, 8), to##1; \
+ xorq s2(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ shrl $16, RW1d; \
+ xorq s7(SBOXES, RT3, 8), to##1; \
+ xorq s5(SBOXES, RT1, 8), to##1; \
+ movzbl RW1bl, RT3d; \
+ movzbl RW1bh, RT1d; \
+ do_movq(RW0, RW1); \
+ xorq s3(SBOXES, RT3, 8), to##1; \
+ xorq s1(SBOXES, RT1, 8), to##1; \
+ xorq from##2, RW2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrq $16, RW2; \
+ xorq s8(SBOXES, RT3, 8), to##2; \
+ xorq s6(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrq $16, RW2; \
+ xorq s4(SBOXES, RT3, 8), to##2; \
+ xorq s2(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ shrl $16, RW2d; \
+ xorq s7(SBOXES, RT3, 8), to##2; \
+ xorq s5(SBOXES, RT1, 8), to##2; \
+ movzbl RW2bl, RT3d; \
+ movzbl RW2bh, RT1d; \
+ do_movq(RW0, RW2); \
+ xorq s3(SBOXES, RT3, 8), to##2; \
+ xorq s1(SBOXES, RT1, 8), to##2;
+
+#define __movq(src, dst) \
+ movq src, dst;
+
+#define read_block(io, left, right) \
+ movl (io), left##d; \
+ movl 4(io), right##d; \
+ bswapl left##d; \
+ bswapl right##d;
+
+#define write_block(io, left, right) \
+ bswapl left##d; \
+ bswapl right##d; \
+ movl left##d, (io); \
+ movl right##d, 4(io);
+
+.align 8
+.type _gcry_3des_amd64_crypt_blk3,@function;
+_gcry_3des_amd64_crypt_blk3:
+ /* input:
+ * %rdi: round keys, CTX
+ * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks
+ * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks
+ */
+
+ leaq .L_s1 RIP, SBOXES;
+
+ initial_permutation3(RL, RR);
+
+ movq 0(CTX), RW0;
+ movq RW0, RW1;
+ movq RW0, RW2;
+
+ round3(0, RR, RL, load_next_key, __movq);
+ round3(1, RL, RR, load_next_key, __movq);
+ round3(2, RR, RL, load_next_key, __movq);
+ round3(3, RL, RR, load_next_key, __movq);
+ round3(4, RR, RL, load_next_key, __movq);
+ round3(5, RL, RR, load_next_key, __movq);
+ round3(6, RR, RL, load_next_key, __movq);
+ round3(7, RL, RR, load_next_key, __movq);
+ round3(8, RR, RL, load_next_key, __movq);
+ round3(9, RL, RR, load_next_key, __movq);
+ round3(10, RR, RL, load_next_key, __movq);
+ round3(11, RL, RR, load_next_key, __movq);
+ round3(12, RR, RL, load_next_key, __movq);
+ round3(13, RL, RR, load_next_key, __movq);
+ round3(14, RR, RL, load_next_key, __movq);
+ round3(15, RL, RR, load_next_key, __movq);
+
+ round3(16+0, RL, RR, load_next_key, __movq);
+ round3(16+1, RR, RL, load_next_key, __movq);
+ round3(16+2, RL, RR, load_next_key, __movq);
+ round3(16+3, RR, RL, load_next_key, __movq);
+ round3(16+4, RL, RR, load_next_key, __movq);
+ round3(16+5, RR, RL, load_next_key, __movq);
+ round3(16+6, RL, RR, load_next_key, __movq);
+ round3(16+7, RR, RL, load_next_key, __movq);
+ round3(16+8, RL, RR, load_next_key, __movq);
+ round3(16+9, RR, RL, load_next_key, __movq);
+ round3(16+10, RL, RR, load_next_key, __movq);
+ round3(16+11, RR, RL, load_next_key, __movq);
+ round3(16+12, RL, RR, load_next_key, __movq);
+ round3(16+13, RR, RL, load_next_key, __movq);
+ round3(16+14, RL, RR, load_next_key, __movq);
+ round3(16+15, RR, RL, load_next_key, __movq);
+
+ round3(32+0, RR, RL, load_next_key, __movq);
+ round3(32+1, RL, RR, load_next_key, __movq);
+ round3(32+2, RR, RL, load_next_key, __movq);
+ round3(32+3, RL, RR, load_next_key, __movq);
+ round3(32+4, RR, RL, load_next_key, __movq);
+ round3(32+5, RL, RR, load_next_key, __movq);
+ round3(32+6, RR, RL, load_next_key, __movq);
+ round3(32+7, RL, RR, load_next_key, __movq);
+ round3(32+8, RR, RL, load_next_key, __movq);
+ round3(32+9, RL, RR, load_next_key, __movq);
+ round3(32+10, RR, RL, load_next_key, __movq);
+ round3(32+11, RL, RR, load_next_key, __movq);
+ round3(32+12, RR, RL, load_next_key, __movq);
+ round3(32+13, RL, RR, load_next_key, __movq);
+ round3(32+14, RR, RL, load_next_key, __movq);
+ round3(32+15, RL, RR, dummy2, dummy2);
+
+ final_permutation3(RR, RL);
+
+ ret;
+.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;
+
+.align 8
+.globl _gcry_3des_amd64_cbc_dec
+.type _gcry_3des_amd64_cbc_dec,@function;
+_gcry_3des_amd64_cbc_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+
+ pushq %rbp;
+ pushq %rbx;
+ pushq %r12;
+ pushq %r13;
+ pushq %r14;
+ pushq %r15;
+
+ pushq %rsi; /*dst*/
+ pushq %rdx; /*src*/
+ pushq %rcx; /*iv*/
+
+ /* load input */
+ movl 0 * 4(%rdx), RL0d;
+ movl 1 * 4(%rdx), RR0d;
+ movl 2 * 4(%rdx), RL1d;
+ movl 3 * 4(%rdx), RR1d;
+ movl 4 * 4(%rdx), RL2d;
+ movl 5 * 4(%rdx), RR2d;
+
+ bswapl RL0d;
+ bswapl RR0d;
+ bswapl RL1d;
+ bswapl RR1d;
+ bswapl RL2d;
+ bswapl RR2d;
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rcx; /*iv*/
+ popq %rdx; /*src*/
+ popq %rsi; /*dst*/
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ movq 2 * 8(%rdx), RT0;
+ xorl 0 * 4(%rcx), RR0d;
+ xorl 1 * 4(%rcx), RL0d;
+ xorl 0 * 4(%rdx), RR1d;
+ xorl 1 * 4(%rdx), RL1d;
+ xorl 2 * 4(%rdx), RR2d;
+ xorl 3 * 4(%rdx), RL2d;
+ movq RT0, (%rcx); /* store new IV */
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ popq %r14;
+ popq %r13;
+ popq %r12;
+ popq %rbx;
+ popq %rbp;
+
+ ret;
+.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+
+.align 8
+.globl _gcry_3des_amd64_ctr_enc
+.type _gcry_3des_amd64_ctr_enc,@function;
+_gcry_3des_amd64_ctr_enc:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+
+ pushq %rbp;
+ pushq %rbx;
+ pushq %r12;
+ pushq %r13;
+ pushq %r14;
+ pushq %r15;
+
+ pushq %rsi; /*dst*/
+ pushq %rdx; /*src*/
+ movq %rcx, RW2;
+
+ /* load IV and byteswap */
+ movq (RW2), RT0;
+ bswapq RT0;
+ movq RT0, RR0;
+
+ /* construct IVs */
+ leaq 1(RT0), RR1;
+ leaq 2(RT0), RR2;
+ leaq 3(RT0), RT0;
+ movq RR0, RL0;
+ movq RR1, RL1;
+ movq RR2, RL2;
+ bswapq RT0;
+ shrq $32, RL0;
+ shrq $32, RL1;
+ shrq $32, RL2;
+
+ /* store new IV */
+ movq RT0, (RW2);
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rdx; /*src*/
+ popq %rsi; /*dst*/
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ xorl 0 * 4(%rdx), RR0d;
+ xorl 1 * 4(%rdx), RL0d;
+ xorl 2 * 4(%rdx), RR1d;
+ xorl 3 * 4(%rdx), RL1d;
+ xorl 4 * 4(%rdx), RR2d;
+ xorl 5 * 4(%rdx), RL2d;
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ popq %r14;
+ popq %r13;
+ popq %r12;
+ popq %rbx;
+ popq %rbp;
+
+ ret;
+.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;
+
+.align 8
+.globl _gcry_3des_amd64_cfb_dec
+.type _gcry_3des_amd64_cfb_dec,@function;
+_gcry_3des_amd64_cfb_dec:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (3 blocks)
+ * %rdx: src (3 blocks)
+ * %rcx: iv (64bit)
+ */
+ pushq %rbp;
+ pushq %rbx;
+ pushq %r12;
+ pushq %r13;
+ pushq %r14;
+ pushq %r15;
+
+ pushq %rsi; /*dst*/
+ pushq %rdx; /*src*/
+ movq %rcx, RW2;
+
+ /* Load input */
+ movl 0 * 4(RW2), RL0d;
+ movl 1 * 4(RW2), RR0d;
+ movl 0 * 4(%rdx), RL1d;
+ movl 1 * 4(%rdx), RR1d;
+ movl 2 * 4(%rdx), RL2d;
+ movl 3 * 4(%rdx), RR2d;
+
+ bswapl RL0d;
+ bswapl RR0d;
+ bswapl RL1d;
+ bswapl RR1d;
+ bswapl RL2d;
+ bswapl RR2d;
+
+ /* Update IV */
+ movq 4 * 4(%rdx), RW0;
+ movq RW0, (RW2);
+
+ call _gcry_3des_amd64_crypt_blk3;
+
+ popq %rdx; /*src*/
+ popq %rsi; /*dst*/
+
+ bswapl RR0d;
+ bswapl RL0d;
+ bswapl RR1d;
+ bswapl RL1d;
+ bswapl RR2d;
+ bswapl RL2d;
+
+ xorl 0 * 4(%rdx), RR0d;
+ xorl 1 * 4(%rdx), RL0d;
+ xorl 2 * 4(%rdx), RR1d;
+ xorl 3 * 4(%rdx), RL1d;
+ xorl 4 * 4(%rdx), RR2d;
+ xorl 5 * 4(%rdx), RL2d;
+
+ movl RR0d, 0 * 4(%rsi);
+ movl RL0d, 1 * 4(%rsi);
+ movl RR1d, 2 * 4(%rsi);
+ movl RL1d, 3 * 4(%rsi);
+ movl RR2d, 4 * 4(%rsi);
+ movl RL2d, 5 * 4(%rsi);
+
+ popq %r15;
+ popq %r14;
+ popq %r13;
+ popq %r12;
+ popq %rbx;
+ popq %rbp;
+ ret;
+.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;
+
+.data
+.align 16
+.L_s1:
+ .quad 0x0010100001010400, 0x0000000000000000
+ .quad 0x0000100000010000, 0x0010100001010404
+ .quad 0x0010100001010004, 0x0000100000010404
+ .quad 0x0000000000000004, 0x0000100000010000
+ .quad 0x0000000000000400, 0x0010100001010400
+ .quad 0x0010100001010404, 0x0000000000000400
+ .quad 0x0010000001000404, 0x0010100001010004
+ .quad 0x0010000001000000, 0x0000000000000004
+ .quad 0x0000000000000404, 0x0010000001000400
+ .quad 0x0010000001000400, 0x0000100000010400
+ .quad 0x0000100000010400, 0x0010100001010000
+ .quad 0x0010100001010000, 0x0010000001000404
+ .quad 0x0000100000010004, 0x0010000001000004
+ .quad 0x0010000001000004, 0x0000100000010004
+ .quad 0x0000000000000000, 0x0000000000000404
+ .quad 0x0000100000010404, 0x0010000001000000
+ .quad 0x0000100000010000, 0x0010100001010404
+ .quad 0x0000000000000004, 0x0010100001010000
+ .quad 0x0010100001010400, 0x0010000001000000
+ .quad 0x0010000001000000, 0x0000000000000400
+ .quad 0x0010100001010004, 0x0000100000010000
+ .quad 0x0000100000010400, 0x0010000001000004
+ .quad 0x0000000000000400, 0x0000000000000004
+ .quad 0x0010000001000404, 0x0000100000010404
+ .quad 0x0010100001010404, 0x0000100000010004
+ .quad 0x0010100001010000, 0x0010000001000404
+ .quad 0x0010000001000004, 0x0000000000000404
+ .quad 0x0000100000010404, 0x0010100001010400
+ .quad 0x0000000000000404, 0x0010000001000400
+ .quad 0x0010000001000400, 0x0000000000000000
+ .quad 0x0000100000010004, 0x0000100000010400
+ .quad 0x0000000000000000, 0x0010100001010004
+.L_s2:
+ .quad 0x0801080200100020, 0x0800080000000000
+ .quad 0x0000080000000000, 0x0001080200100020
+ .quad 0x0001000000100000, 0x0000000200000020
+ .quad 0x0801000200100020, 0x0800080200000020
+ .quad 0x0800000200000020, 0x0801080200100020
+ .quad 0x0801080000100000, 0x0800000000000000
+ .quad 0x0800080000000000, 0x0001000000100000
+ .quad 0x0000000200000020, 0x0801000200100020
+ .quad 0x0001080000100000, 0x0001000200100020
+ .quad 0x0800080200000020, 0x0000000000000000
+ .quad 0x0800000000000000, 0x0000080000000000
+ .quad 0x0001080200100020, 0x0801000000100000
+ .quad 0x0001000200100020, 0x0800000200000020
+ .quad 0x0000000000000000, 0x0001080000100000
+ .quad 0x0000080200000020, 0x0801080000100000
+ .quad 0x0801000000100000, 0x0000080200000020
+ .quad 0x0000000000000000, 0x0001080200100020
+ .quad 0x0801000200100020, 0x0001000000100000
+ .quad 0x0800080200000020, 0x0801000000100000
+ .quad 0x0801080000100000, 0x0000080000000000
+ .quad 0x0801000000100000, 0x0800080000000000
+ .quad 0x0000000200000020, 0x0801080200100020
+ .quad 0x0001080200100020, 0x0000000200000020
+ .quad 0x0000080000000000, 0x0800000000000000
+ .quad 0x0000080200000020, 0x0801080000100000
+ .quad 0x0001000000100000, 0x0800000200000020
+ .quad 0x0001000200100020, 0x0800080200000020
+ .quad 0x0800000200000020, 0x0001000200100020
+ .quad 0x0001080000100000, 0x0000000000000000
+ .quad 0x0800080000000000, 0x0000080200000020
+ .quad 0x0800000000000000, 0x0801000200100020
+ .quad 0x0801080200100020, 0x0001080000100000
+.L_s3:
+ .quad 0x0000002000000208, 0x0000202008020200
+ .quad 0x0000000000000000, 0x0000200008020008
+ .quad 0x0000002008000200, 0x0000000000000000
+ .quad 0x0000202000020208, 0x0000002008000200
+ .quad 0x0000200000020008, 0x0000000008000008
+ .quad 0x0000000008000008, 0x0000200000020000
+ .quad 0x0000202008020208, 0x0000200000020008
+ .quad 0x0000200008020000, 0x0000002000000208
+ .quad 0x0000000008000000, 0x0000000000000008
+ .quad 0x0000202008020200, 0x0000002000000200
+ .quad 0x0000202000020200, 0x0000200008020000
+ .quad 0x0000200008020008, 0x0000202000020208
+ .quad 0x0000002008000208, 0x0000202000020200
+ .quad 0x0000200000020000, 0x0000002008000208
+ .quad 0x0000000000000008, 0x0000202008020208
+ .quad 0x0000002000000200, 0x0000000008000000
+ .quad 0x0000202008020200, 0x0000000008000000
+ .quad 0x0000200000020008, 0x0000002000000208
+ .quad 0x0000200000020000, 0x0000202008020200
+ .quad 0x0000002008000200, 0x0000000000000000
+ .quad 0x0000002000000200, 0x0000200000020008
+ .quad 0x0000202008020208, 0x0000002008000200
+ .quad 0x0000000008000008, 0x0000002000000200
+ .quad 0x0000000000000000, 0x0000200008020008
+ .quad 0x0000002008000208, 0x0000200000020000
+ .quad 0x0000000008000000, 0x0000202008020208
+ .quad 0x0000000000000008, 0x0000202000020208
+ .quad 0x0000202000020200, 0x0000000008000008
+ .quad 0x0000200008020000, 0x0000002008000208
+ .quad 0x0000002000000208, 0x0000200008020000
+ .quad 0x0000202000020208, 0x0000000000000008
+ .quad 0x0000200008020008, 0x0000202000020200
+.L_s4:
+ .quad 0x1008020000002001, 0x1000020800002001
+ .quad 0x1000020800002001, 0x0000000800000000
+ .quad 0x0008020800002000, 0x1008000800000001
+ .quad 0x1008000000000001, 0x1000020000002001
+ .quad 0x0000000000000000, 0x0008020000002000
+ .quad 0x0008020000002000, 0x1008020800002001
+ .quad 0x1000000800000001, 0x0000000000000000
+ .quad 0x0008000800000000, 0x1008000000000001
+ .quad 0x1000000000000001, 0x0000020000002000
+ .quad 0x0008000000000000, 0x1008020000002001
+ .quad 0x0000000800000000, 0x0008000000000000
+ .quad 0x1000020000002001, 0x0000020800002000
+ .quad 0x1008000800000001, 0x1000000000000001
+ .quad 0x0000020800002000, 0x0008000800000000
+ .quad 0x0000020000002000, 0x0008020800002000
+ .quad 0x1008020800002001, 0x1000000800000001
+ .quad 0x0008000800000000, 0x1008000000000001
+ .quad 0x0008020000002000, 0x1008020800002001
+ .quad 0x1000000800000001, 0x0000000000000000
+ .quad 0x0000000000000000, 0x0008020000002000
+ .quad 0x0000020800002000, 0x0008000800000000
+ .quad 0x1008000800000001, 0x1000000000000001
+ .quad 0x1008020000002001, 0x1000020800002001
+ .quad 0x1000020800002001, 0x0000000800000000
+ .quad 0x1008020800002001, 0x1000000800000001
+ .quad 0x1000000000000001, 0x0000020000002000
+ .quad 0x1008000000000001, 0x1000020000002001
+ .quad 0x0008020800002000, 0x1008000800000001
+ .quad 0x1000020000002001, 0x0000020800002000
+ .quad 0x0008000000000000, 0x1008020000002001
+ .quad 0x0000000800000000, 0x0008000000000000
+ .quad 0x0000020000002000, 0x0008020800002000
+.L_s5:
+ .quad 0x0000001000000100, 0x0020001002080100
+ .quad 0x0020000002080000, 0x0420001002000100
+ .quad 0x0000000000080000, 0x0000001000000100
+ .quad 0x0400000000000000, 0x0020000002080000
+ .quad 0x0400001000080100, 0x0000000000080000
+ .quad 0x0020001002000100, 0x0400001000080100
+ .quad 0x0420001002000100, 0x0420000002080000
+ .quad 0x0000001000080100, 0x0400000000000000
+ .quad 0x0020000002000000, 0x0400000000080000
+ .quad 0x0400000000080000, 0x0000000000000000
+ .quad 0x0400001000000100, 0x0420001002080100
+ .quad 0x0420001002080100, 0x0020001002000100
+ .quad 0x0420000002080000, 0x0400001000000100
+ .quad 0x0000000000000000, 0x0420000002000000
+ .quad 0x0020001002080100, 0x0020000002000000
+ .quad 0x0420000002000000, 0x0000001000080100
+ .quad 0x0000000000080000, 0x0420001002000100
+ .quad 0x0000001000000100, 0x0020000002000000
+ .quad 0x0400000000000000, 0x0020000002080000
+ .quad 0x0420001002000100, 0x0400001000080100
+ .quad 0x0020001002000100, 0x0400000000000000
+ .quad 0x0420000002080000, 0x0020001002080100
+ .quad 0x0400001000080100, 0x0000001000000100
+ .quad 0x0020000002000000, 0x0420000002080000
+ .quad 0x0420001002080100, 0x0000001000080100
+ .quad 0x0420000002000000, 0x0420001002080100
+ .quad 0x0020000002080000, 0x0000000000000000
+ .quad 0x0400000000080000, 0x0420000002000000
+ .quad 0x0000001000080100, 0x0020001002000100
+ .quad 0x0400001000000100, 0x0000000000080000
+ .quad 0x0000000000000000, 0x0400000000080000
+ .quad 0x0020001002080100, 0x0400001000000100
+.L_s6:
+ .quad 0x0200000120000010, 0x0204000020000000
+ .quad 0x0000040000000000, 0x0204040120000010
+ .quad 0x0204000020000000, 0x0000000100000010
+ .quad 0x0204040120000010, 0x0004000000000000
+ .quad 0x0200040020000000, 0x0004040100000010
+ .quad 0x0004000000000000, 0x0200000120000010
+ .quad 0x0004000100000010, 0x0200040020000000
+ .quad 0x0200000020000000, 0x0000040100000010
+ .quad 0x0000000000000000, 0x0004000100000010
+ .quad 0x0200040120000010, 0x0000040000000000
+ .quad 0x0004040000000000, 0x0200040120000010
+ .quad 0x0000000100000010, 0x0204000120000010
+ .quad 0x0204000120000010, 0x0000000000000000
+ .quad 0x0004040100000010, 0x0204040020000000
+ .quad 0x0000040100000010, 0x0004040000000000
+ .quad 0x0204040020000000, 0x0200000020000000
+ .quad 0x0200040020000000, 0x0000000100000010
+ .quad 0x0204000120000010, 0x0004040000000000
+ .quad 0x0204040120000010, 0x0004000000000000
+ .quad 0x0000040100000010, 0x0200000120000010
+ .quad 0x0004000000000000, 0x0200040020000000
+ .quad 0x0200000020000000, 0x0000040100000010
+ .quad 0x0200000120000010, 0x0204040120000010
+ .quad 0x0004040000000000, 0x0204000020000000
+ .quad 0x0004040100000010, 0x0204040020000000
+ .quad 0x0000000000000000, 0x0204000120000010
+ .quad 0x0000000100000010, 0x0000040000000000
+ .quad 0x0204000020000000, 0x0004040100000010
+ .quad 0x0000040000000000, 0x0004000100000010
+ .quad 0x0200040120000010, 0x0000000000000000
+ .quad 0x0204040020000000, 0x0200000020000000
+ .quad 0x0004000100000010, 0x0200040120000010
+.L_s7:
+ .quad 0x0002000000200000, 0x2002000004200002
+ .quad 0x2000000004000802, 0x0000000000000000
+ .quad 0x0000000000000800, 0x2000000004000802
+ .quad 0x2002000000200802, 0x0002000004200800
+ .quad 0x2002000004200802, 0x0002000000200000
+ .quad 0x0000000000000000, 0x2000000004000002
+ .quad 0x2000000000000002, 0x0000000004000000
+ .quad 0x2002000004200002, 0x2000000000000802
+ .quad 0x0000000004000800, 0x2002000000200802
+ .quad 0x2002000000200002, 0x0000000004000800
+ .quad 0x2000000004000002, 0x0002000004200000
+ .quad 0x0002000004200800, 0x2002000000200002
+ .quad 0x0002000004200000, 0x0000000000000800
+ .quad 0x2000000000000802, 0x2002000004200802
+ .quad 0x0002000000200800, 0x2000000000000002
+ .quad 0x0000000004000000, 0x0002000000200800
+ .quad 0x0000000004000000, 0x0002000000200800
+ .quad 0x0002000000200000, 0x2000000004000802
+ .quad 0x2000000004000802, 0x2002000004200002
+ .quad 0x2002000004200002, 0x2000000000000002
+ .quad 0x2002000000200002, 0x0000000004000000
+ .quad 0x0000000004000800, 0x0002000000200000
+ .quad 0x0002000004200800, 0x2000000000000802
+ .quad 0x2002000000200802, 0x0002000004200800
+ .quad 0x2000000000000802, 0x2000000004000002
+ .quad 0x2002000004200802, 0x0002000004200000
+ .quad 0x0002000000200800, 0x0000000000000000
+ .quad 0x2000000000000002, 0x2002000004200802
+ .quad 0x0000000000000000, 0x2002000000200802
+ .quad 0x0002000004200000, 0x0000000000000800
+ .quad 0x2000000004000002, 0x0000000004000800
+ .quad 0x0000000000000800, 0x2002000000200002
+.L_s8:
+ .quad 0x0100010410001000, 0x0000010000001000
+ .quad 0x0000000000040000, 0x0100010410041000
+ .quad 0x0100000010000000, 0x0100010410001000
+ .quad 0x0000000400000000, 0x0100000010000000
+ .quad 0x0000000400040000, 0x0100000010040000
+ .quad 0x0100010410041000, 0x0000010000041000
+ .quad 0x0100010010041000, 0x0000010400041000
+ .quad 0x0000010000001000, 0x0000000400000000
+ .quad 0x0100000010040000, 0x0100000410000000
+ .quad 0x0100010010001000, 0x0000010400001000
+ .quad 0x0000010000041000, 0x0000000400040000
+ .quad 0x0100000410040000, 0x0100010010041000
+ .quad 0x0000010400001000, 0x0000000000000000
+ .quad 0x0000000000000000, 0x0100000410040000
+ .quad 0x0100000410000000, 0x0100010010001000
+ .quad 0x0000010400041000, 0x0000000000040000
+ .quad 0x0000010400041000, 0x0000000000040000
+ .quad 0x0100010010041000, 0x0000010000001000
+ .quad 0x0000000400000000, 0x0100000410040000
+ .quad 0x0000010000001000, 0x0000010400041000
+ .quad 0x0100010010001000, 0x0000000400000000
+ .quad 0x0100000410000000, 0x0100000010040000
+ .quad 0x0100000410040000, 0x0100000010000000
+ .quad 0x0000000000040000, 0x0100010410001000
+ .quad 0x0000000000000000, 0x0100010410041000
+ .quad 0x0000000400040000, 0x0100000410000000
+ .quad 0x0100000010040000, 0x0100010010001000
+ .quad 0x0100010410001000, 0x0000000000000000
+ .quad 0x0100010410041000, 0x0000010000041000
+ .quad 0x0000010000041000, 0x0000010400001000
+ .quad 0x0000010400001000, 0x0000000400040000
+ .quad 0x0100000010000000, 0x0100010010041000
+
+#endif
+#endif
diff --git a/cipher/des.c b/cipher/des.c
index 6611fd3b..bc2a474d 100644
--- a/cipher/des.c
+++ b/cipher/des.c
@@ -119,9 +119,27 @@
#include "g10lib.h"
#include "cipher.h"
#include "bufhelp.h"
+#include "cipher-selftest.h"
+
+
+#define DES_BLOCKSIZE 8
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)
+# define USE_AMD64_ASM 1
+#endif
+
+/* Helper macro to force alignment to 16 bytes. */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16 __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
#if defined(__GNUC__) && defined(__GNU_LIBRARY__)
-#define working_memcmp memcmp
+# define working_memcmp memcmp
#else
/*
* According to the SunOS man page, memcmp returns indeterminate sign
@@ -171,6 +189,12 @@ static int tripledes_ecb_crypt (struct _tripledes_ctx *,
const byte *, byte *, int);
static int is_weak_key ( const byte *key );
static const char *selftest (void);
+static unsigned int do_tripledes_encrypt(void *context, byte *outbuf,
+ const byte *inbuf );
+static unsigned int do_tripledes_decrypt(void *context, byte *outbuf,
+ const byte *inbuf );
+static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key,
+ unsigned keylen);
static int initialized;
@@ -727,6 +751,46 @@ tripledes_set3keys (struct _tripledes_ctx *ctx,
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementation of triple-DES. */
+extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out,
+ const byte *in);
+
+/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out,
+ const byte *in, byte *ctr);
+
+extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out,
+ const byte *in, byte *iv);
+
+extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
+ const byte *in, byte *iv);
+
+#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
+
+/*
+ * Electronic Codebook Mode Triple-DES encryption/decryption of data
+ * according to 'mode'. Sometimes this mode is named 'EDE' mode
+ * (Encryption-Decryption-Encryption).
+ */
+static inline int
+tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
+ byte * to, int mode)
+{
+ u32 *keys;
+
+ keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+
+ _gcry_3des_amd64_crypt_block(keys, to, from);
+
+ return 0;
+}
+
+#else /*USE_AMD64_ASM*/
+
+#define TRIPLEDES_ECB_BURN_STACK 32
+
/*
* Electronic Codebook Mode Triple-DES encryption/decryption of data
* according to 'mode'. Sometimes this mode is named 'EDE' mode
@@ -777,8 +841,158 @@ tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
return 0;
}
+#endif /*!USE_AMD64_ASM*/
+
+
+
+/* Bulk encryption of complete blocks in CTR mode. This function is only
+ intended for the bulk encryption feature of cipher.c. CTR is expected to be
+ of size DES_BLOCKSIZE. */
+void
+_gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char tmpbuf[DES_BLOCKSIZE];
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+ int i;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 9 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ _gcry_3des_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
+
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* Encrypt the counter. */
+ tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
+ /* XOR the input with the encrypted counter and store in output. */
+ buf_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
+ outbuf += DES_BLOCKSIZE;
+ inbuf += DES_BLOCKSIZE;
+ /* Increment the counter. */
+ for (i = DES_BLOCKSIZE; i > 0; i--)
+ {
+ ctr[i-1]++;
+ if (ctr[i-1])
+ break;
+ }
+ }
+
+ wipememory(tmpbuf, sizeof(tmpbuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ unsigned char savebuf[DES_BLOCKSIZE];
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 10 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ _gcry_3des_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
+
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ /* INBUF is needed later and it may be identical to OUTBUF, so store
+ the intermediate result to SAVEBUF. */
+ tripledes_ecb_decrypt (ctx, inbuf, savebuf);
+
+ buf_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
+ inbuf += DES_BLOCKSIZE;
+ outbuf += DES_BLOCKSIZE;
+ }
+
+ wipememory(savebuf, sizeof(savebuf));
+ _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode. This function is only
+ intended for the bulk encryption feature of cipher.c. */
+void
+_gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks)
+{
+ struct _tripledes_ctx *ctx = context;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+ {
+ int asm_burn_depth = 9 * sizeof(void *);
+
+ if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+ burn_stack_depth = asm_burn_depth;
+ /* Process data in 3 block chunks. */
+ while (nblocks >= 3)
+ {
+ _gcry_3des_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
+ nblocks -= 3;
+ outbuf += 3 * DES_BLOCKSIZE;
+ inbuf += 3 * DES_BLOCKSIZE;
+ }
+
+ /* Use generic code to handle smaller chunks... */
+ }
+#endif
+
+ for ( ;nblocks; nblocks-- )
+ {
+ tripledes_ecb_encrypt (ctx, iv, iv);
+ buf_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
+ outbuf += DES_BLOCKSIZE;
+ inbuf += DES_BLOCKSIZE;
+ }
+
+ _gcry_burn_stack(burn_stack_depth);
+}
/*
@@ -815,6 +1029,67 @@ is_weak_key ( const byte *key )
}
+/* Alternative setkey for selftests; need larger key than default. */
+static gcry_err_code_t
+bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen)
+{
+ static const unsigned char key[24] ATTR_ALIGNED_16 = {
+ 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+ 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22,
+ 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82
+ };
+
+ (void)__key;
+ (void)__keylen;
+
+ return do_tripledes_setkey(context, key, sizeof(key));
+}
+
+
+/* Run the self-tests for DES-CTR, tests IV increment of bulk CTR
+ encryption. Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+ const int nblocks = 3+1;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, &_gcry_3des_ctr_enc, nblocks, blocksize,
+ context_size);
+}
+
+
+/* Run the self-tests for DES-CBC, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+ const int nblocks = 3+2;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, &_gcry_3des_cbc_dec, nblocks, blocksize,
+ context_size);
+}
+
+
+/* Run the self-tests for DES-CFB, tests bulk CBC decryption.
+ Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+ const int nblocks = 3+2;
+ const int blocksize = DES_BLOCKSIZE;
+ const int context_size = sizeof(struct _tripledes_ctx);
+
+ return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey,
+ &do_tripledes_encrypt, &_gcry_3des_cfb_dec, nblocks, blocksize,
+ context_size);
+}
+
/*
* Performs a selftest of this DES/Triple-DES implementation.
@@ -824,6 +1099,8 @@ is_weak_key ( const byte *key )
static const char *
selftest (void)
{
+ const char *r;
+
/*
* Check if 'u32' is really 32 bits wide. This DES / 3DES implementation
* need this.
@@ -1003,6 +1280,15 @@ selftest (void)
return "DES weak key detection failed";
}
+ if ( (r = selftest_cbc ()) )
+ return r;
+
+ if ( (r = selftest_cfb ()) )
+ return r;
+
+ if ( (r = selftest_ctr ()) )
+ return r;
+
return 0;
}
@@ -1060,7 +1346,7 @@ do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf )
struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
tripledes_ecb_encrypt ( ctx, inbuf, outbuf );
- return /*burn_stack*/ (32);
+ return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
}
static unsigned int
@@ -1068,7 +1354,7 @@ do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf )
{
struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
tripledes_ecb_decrypt ( ctx, inbuf, outbuf );
- return /*burn_stack*/ (32);
+ return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
}
static gcry_err_code_t
diff --git a/configure.ac b/configure.ac
index a0f75a5d..79f79ef3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1671,6 +1671,13 @@ LIST_MEMBER(des, $enabled_ciphers)
if test "$found" = "1" ; then
GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo"
AC_DEFINE(USE_DES, 1, [Defined if this module should be included])
+
+ case "${host}" in
+ x86_64-*-*)
+ # Build with the assembly implementation
+ GCRYPT_CIPHERS="$GCRYPT_CIPHERS des-amd64.lo"
+ ;;
+ esac
fi
LIST_MEMBER(aes, $enabled_ciphers)
diff --git a/src/cipher.h b/src/cipher.h
index cd981b3d..5d1b5f6f 100644
--- a/src/cipher.h
+++ b/src/cipher.h
@@ -173,6 +173,19 @@ void _gcry_camellia_cfb_dec (void *context, unsigned char *iv,
void *outbuf_arg, const void *inbuf_arg,
size_t nblocks);
+/*-- des.c --*/
+void _gcry_3des_ctr_enc (void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
+void _gcry_3des_cbc_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
+void _gcry_3des_cfb_dec (void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks);
+
/*-- serpent.c --*/
void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
void *outbuf_arg, const void *inbuf_arg,