summaryrefslogtreecommitdiff
path: root/cipher/serpent-avx2-amd64.S
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-09-01 16:50:55 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-09-01 16:50:55 +0300
commitd12828cd821a4b4428eae19de5aee02cf536e536 (patch)
treed91b173619c4cc23260ccd1cd918ea7ecc419937 /cipher/serpent-avx2-amd64.S
parentfd6721c235a5bdcb332c8eb708fbd4f96e52e824 (diff)
downloadlibgcrypt-d12828cd821a4b4428eae19de5aee02cf536e536.tar.gz
serpent-avx2-amd64: Move register clearing to assembly
* cipher/serpent-avx2-amd64.S (_gcry_serpent_avx2_ctr_enc) (_gcry_serpent_avx2_cbc_dec, _gcry_serpent_avx2_cfb_dec): Change last 'vzeroupper' to 'vzeroall'. * cipher/serpent.c (_gcry_serpent_ctr_enc, _gcry_serpent_cbc_dec) (_gcry_serpent_avx2_cfb_dec) [USE_AVX2]: Remove register clearing with 'vzeroall'. -- AVX2 implementation was already clearing upper halfs of YMM registers at end of assembly functions to prevent long SSE<->AVX transition stalls present on Intel CPUs. Patch changes these 'vzeroupper' instructions to 'vzeroall' to fully clear YMM registers. After this change register clearing in serpent.c in not needed. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/serpent-avx2-amd64.S')
-rw-r--r--cipher/serpent-avx2-amd64.S6
1 files changed, 3 insertions, 3 deletions
diff --git a/cipher/serpent-avx2-amd64.S b/cipher/serpent-avx2-amd64.S
index 7586c0c5..c726e7ba 100644
--- a/cipher/serpent-avx2-amd64.S
+++ b/cipher/serpent-avx2-amd64.S
@@ -730,7 +730,7 @@ _gcry_serpent_avx2_ctr_enc:
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
- vzeroupper;
+ vzeroall;
ret
.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;
@@ -799,7 +799,7 @@ _gcry_serpent_avx2_cbc_dec:
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
- vzeroupper;
+ vzeroall;
ret
.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;
@@ -870,7 +870,7 @@ _gcry_serpent_avx2_cfb_dec:
vmovdqu RB2, (6 * 32)(%rsi);
vmovdqu RB3, (7 * 32)(%rsi);
- vzeroupper;
+ vzeroall;
ret
.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;