summaryrefslogtreecommitdiff
path: root/cipher/serpent-avx2-amd64.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-11-09 22:39:19 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-11-09 22:39:19 +0200
commitdf29831d008e32faf74091d080a415731418d158 (patch)
tree384cc216ede33e8aacb2363de307b171c36ed1d3 /cipher/serpent-avx2-amd64.S
parent51501b638546665163bbb85a14308fdb99211a28 (diff)
downloadlibgcrypt-df29831d008e32faf74091d080a415731418d158.tar.gz
Fix Serpent-AVX2 and Camellia-AVX2 counter modes
* cipher/camellia-aesni-avx2-amd64.S (_gcry_camellia_aesni_avx2_ctr_enc): Byte-swap before checking for overflow handling. * cipher/camellia-glue.c (selftest_ctr_128, selftest_cfb_128) (selftest_cbc_128): Add 16 to nblocks. * cipher/cipher-selftest.c (_gcry_selftest_helper_ctr): Add test with non-overflowing IV and modify overflow IV to detect broken endianness handling. * cipher/serpent-avx2-amd64.S (_gcry_serpent_avx2_ctr_enc): Byte-swap before checking for overflow handling; Fix crazy-mixed-endian IV construction to big-endian. * cipher/serpent.c (selftest_ctr_128, selftest_cfb_128) (selftest_cbc_128): Add 8 to nblocks. -- The selftest for CTR was setting counter-IV to all '0xff' except last byte. This had the effect that even with broken endianness handling Serpent-AVX2 and Camellia-AVX2 passed the tests. Patch corrects the CTR selftest and fixes the broken implementations. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/serpent-avx2-amd64.S')
-rw-r--r--cipher/serpent-avx2-amd64.S35
1 files changed, 19 insertions, 16 deletions
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 532361df..31775746 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -598,6 +598,9 @@ _gcry_serpent_avx2_ctr_enc:
* %rcx: iv (big endian, 128bit)
*/
+ movq 8(%rcx), %rax;
+ bswapq %rax;
+
vzeroupper;
vbroadcasti128 .Lbswap128_mask RIP, RTMP3;
@@ -614,25 +617,25 @@ _gcry_serpent_avx2_ctr_enc:
vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
/* check need for handling 64-bit overflow and carry */
- cmpq $(0xffffffffffffffff - 16), (%rcx);
+ cmpq $(0xffffffffffffffff - 16), %rax;
ja .Lhandle_ctr_carry;
/* construct IVs */
- vpsubq RTMP2, RTMP0, RA1; /* +3 ; +2 */
- vpshufb RTMP3, RA1, RA1;
- vpsubq RTMP2, RA1, RA2; /* +5 ; +4 */
- vpshufb RTMP3, RA2, RA2;
- vpsubq RTMP2, RA2, RA3; /* +7 ; +6 */
- vpshufb RTMP3, RA3, RA3;
- vpsubq RTMP2, RA3, RB0; /* +9 ; +8 */
- vpshufb RTMP3, RB0, RB0;
- vpsubq RTMP2, RB0, RB1; /* +11 ; +10 */
- vpshufb RTMP3, RB1, RB1;
- vpsubq RTMP2, RB1, RB2; /* +13 ; +12 */
- vpshufb RTMP3, RB2, RB2;
- vpsubq RTMP2, RB2, RB3; /* +15 ; +14 */
- vpshufb RTMP3, RB3, RB3;
- vpsubq RTMP2, RB3, RTMP0; /* +16 */
+ vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+ vpshufb RTMP3, RTMP0, RA1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+ vpshufb RTMP3, RTMP0, RA2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+ vpshufb RTMP3, RTMP0, RA3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+ vpshufb RTMP3, RTMP0, RB0;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+ vpshufb RTMP3, RTMP0, RB1;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+ vpshufb RTMP3, RTMP0, RB2;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+ vpshufb RTMP3, RTMP0, RB3;
+ vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
vpshufb RTMP3x, RTMP0x, RTMP0x;
jmp .Lctr_carry_done;