diff options
-rw-r--r-- | cipher/cipher.c | 1 | ||||
-rw-r--r-- | cipher/serpent-sse2-amd64.S | 66 | ||||
-rw-r--r-- | cipher/serpent.c | 88 | ||||
-rw-r--r-- | src/cipher.h | 3 |
4 files changed, 158 insertions, 0 deletions
diff --git a/cipher/cipher.c b/cipher/cipher.c index e9a652f2..652d7953 100644 --- a/cipher/cipher.c +++ b/cipher/cipher.c @@ -732,6 +732,7 @@ gcry_cipher_open (gcry_cipher_hd_t *handle, case GCRY_CIPHER_SERPENT192: case GCRY_CIPHER_SERPENT256: h->bulk.cbc_dec = _gcry_serpent_cbc_dec; + h->bulk.cfb_dec = _gcry_serpent_cfb_dec; h->bulk.ctr_enc = _gcry_serpent_ctr_enc; break; #endif /*USE_SERPENT*/ diff --git a/cipher/serpent-sse2-amd64.S b/cipher/serpent-sse2-amd64.S index 8d8c8dda..5f9e9d22 100644 --- a/cipher/serpent-sse2-amd64.S +++ b/cipher/serpent-sse2-amd64.S @@ -822,5 +822,71 @@ _gcry_serpent_sse2_cbc_dec: ret .size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec; +.align 8 +.global _gcry_serpent_sse2_cfb_dec +.type _gcry_serpent_sse2_cfb_dec,@function; +_gcry_serpent_sse2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (8 blocks) + * %rdx: src (8 blocks) + * %rcx: iv + */ + + .set RA0, enc_in_a0 + .set RA1, enc_in_a1 + .set RA2, enc_in_a2 + .set RA3, enc_in_a3 + .set RB0, enc_in_b0 + .set RB1, enc_in_b1 + .set RB2, enc_in_b2 + .set RB3, enc_in_b3 + + /* Load input */ + movdqu (%rcx), RA0; + movdqu 0 * 16(%rdx), RA1; + movdqu 1 * 16(%rdx), RA2; + movdqu 2 * 16(%rdx), RA3; + movdqu 3 * 16(%rdx), RB0; + movdqu 4 * 16(%rdx), RB1; + movdqu 5 * 16(%rdx), RB2; + movdqu 6 * 16(%rdx), RB3; + + /* Update IV */ + movdqu 7 * 16(%rdx), RNOT; + movdqu RNOT, (%rcx); + + call __serpent_enc_blk8; + + .set RA0, enc_out_a0 + .set RA1, enc_out_a1 + .set RA2, enc_out_a2 + .set RA3, enc_out_a3 + .set RB0, enc_out_b0 + .set RB1, enc_out_b1 + .set RB2, enc_out_b2 + .set RB3, enc_out_b3 + + pxor_u((0 * 16)(%rdx), RA0, RTMP0); + pxor_u((1 * 16)(%rdx), RA1, RTMP0); + pxor_u((2 * 16)(%rdx), RA2, RTMP0); + pxor_u((3 * 16)(%rdx), RA3, RTMP0); + pxor_u((4 * 16)(%rdx), RB0, RTMP0); + pxor_u((5 * 16)(%rdx), RB1, RTMP0); + pxor_u((6 * 16)(%rdx), RB2, RTMP0); + pxor_u((7 * 16)(%rdx), RB3, RTMP0); + + movdqu RA0, (0 * 16)(%rsi); + movdqu RA1, (1 * 16)(%rsi); + movdqu RA2, (2 * 16)(%rsi); + movdqu RA3, (3 * 16)(%rsi); + movdqu RB0, (4 * 16)(%rsi); + movdqu RB1, (5 * 16)(%rsi); + movdqu RB2, (6 * 16)(%rsi); + movdqu RB3, (7 * 16)(%rsi); + + ret +.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec; + #endif /*defined(USE_SERPENT)*/ #endif /*__x86_64*/ diff --git a/cipher/serpent.c b/cipher/serpent.c index 7b82b48c..95ac7c15 100644 --- a/cipher/serpent.c +++ b/cipher/serpent.c @@ -74,6 +74,11 @@ extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx, unsigned char *out, const unsigned char *in, unsigned char *iv); + +extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv); #endif /* A prototype. */ @@ -916,6 +921,71 @@ _gcry_serpent_cbc_dec(void *context, unsigned char *iv, _gcry_burn_stack(burn_stack_depth); } +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +void +_gcry_serpent_cfb_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + serpent_context_t *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = 2 * sizeof (serpent_block_t); + +#ifdef USE_SSE2 + { + int did_use_sse2 = 0; + + /* Process data in 8 block chunks. */ + while (nblocks >= 8) + { + _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 8; + outbuf += 8 * sizeof(serpent_block_t); + inbuf += 8 * sizeof(serpent_block_t); + did_use_sse2 = 1; + } + + if (did_use_sse2) + { + /* clear SSE2 registers used by serpent-sse2 */ + asm volatile ( + "pxor %%xmm0, %%xmm0;\n" + "pxor %%xmm1, %%xmm1;\n" + "pxor %%xmm2, %%xmm2;\n" + "pxor %%xmm3, %%xmm3;\n" + "pxor %%xmm4, %%xmm4;\n" + "pxor %%xmm5, %%xmm5;\n" + "pxor %%xmm6, %%xmm6;\n" + "pxor %%xmm7, %%xmm7;\n" + "pxor %%xmm10, %%xmm10;\n" + "pxor %%xmm11, %%xmm11;\n" + "pxor %%xmm12, %%xmm12;\n" + "pxor %%xmm13, %%xmm13;\n" + :::); + + /* serpent-sse2 assembly code does not use stack */ + if (nblocks == 0) + burn_stack_depth = 0; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + serpent_encrypt_internal(ctx, iv, iv); + buf_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t)); + outbuf += sizeof(serpent_block_t); + inbuf += sizeof(serpent_block_t); + } + + _gcry_burn_stack(burn_stack_depth); +} + /* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR @@ -948,6 +1018,21 @@ selftest_cbc_128 (void) } +/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cfb_128 (void) +{ + const int nblocks = 8+2; + const int blocksize = sizeof(serpent_block_t); + const int context_size = sizeof(serpent_context_t); + + return _gcry_selftest_helper_cfb_128("SERPENT", &serpent_setkey, + &serpent_encrypt, &_gcry_serpent_cfb_dec, nblocks, blocksize, + context_size); +} + + /* Serpent test. */ static const char * @@ -1034,6 +1119,9 @@ serpent_test (void) if ( (r = selftest_cbc_128 ()) ) return r; + if ( (r = selftest_cfb_128 ()) ) + return r; + return NULL; } diff --git a/src/cipher.h b/src/cipher.h index f28990d4..9d6cc015 100644 --- a/src/cipher.h +++ b/src/cipher.h @@ -113,6 +113,9 @@ void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr, void _gcry_serpent_cbc_dec (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, unsigned int nblocks); +void _gcry_serpent_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks); /*-- dsa.c --*/ void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void *cb_data); |