From 70f8669d792e3186d6c455d5e15fc6828508560e Mon Sep 17 00:00:00 2001 From: Werner Koch Date: Fri, 16 Jul 2004 17:25:30 +0000 Subject: (do_encrypt): Fix alignment problem. Bugs found by Matthias Urlichs. (do_decrypt): Ditto. (keySched, keySched2): Use 2 macros along with unions in the key schedule context. --- THANKS | 1 + cipher/ChangeLog | 13 ++++ cipher/random.c | 13 +++- cipher/rijndael.c | 227 ++++++++++++++++++++++++++++++++---------------------- 4 files changed, 162 insertions(+), 92 deletions(-) diff --git a/THANKS b/THANKS index b5f54189..2fe1adc9 100644 --- a/THANKS +++ b/THANKS @@ -63,6 +63,7 @@ Marco d'Itri md@linux.it Mark Adler madler@alumni.caltech.edu Mark Elbrecht snowball3@bigfoot.com Markus Friedl Markus.Friedl@informatik.uni-erlangen.de +Matthias Urlichs smurf@smurf.noris.de Martin Kahlert martin.kahlert@provi.de Martin Hamilton Martin Schulte schulte@thp.uni-koeln.de diff --git a/cipher/ChangeLog b/cipher/ChangeLog index 378e07a7..e015e6e0 100644 --- a/cipher/ChangeLog +++ b/cipher/ChangeLog @@ -1,8 +1,21 @@ +2004-07-16 Werner Koch + + * rijndael.c (do_encrypt): Fix alignment problem. Bugs found by + Matthias Urlichs. + (do_decrypt): Ditto. + (keySched, keySched2): Use 2 macros along with unions in the key + schedule context. + 2004-07-14 Moritz Schulte * rsa.c (_gcry_rsa_decrypt): Don't forget to free "a". Thanks to Nikos Mavroyanopoulos. +2004-05-09 Werner Koch + + * random.c (read_pool): Mix the PID in to better protect after a + fork. + 2004-07-04 Moritz Schulte * serpent.c: Use "u32_t" instead of "unsigned long", do not diff --git a/cipher/random.c b/cipher/random.c index 9e2878bf..1b60ade9 100644 --- a/cipher/random.c +++ b/cipher/random.c @@ -760,6 +760,10 @@ read_pool (byte *buffer, size_t length, int level) /* Always do a fast random poll (we have to use the unlocked version). */ do_fast_random_poll(); + + /* Mix the pid in so that we for sure won't deliver the same random + after a fork. */ + add_randomness (&my_pid, sizeof (my_pid), 0); /* Mix the pool (if add_randomness() didn't it). */ if (!just_mixed) @@ -777,7 +781,7 @@ read_pool (byte *buffer, size_t length, int level) mix_pool(rndpool); rndstats.mixrnd++; mix_pool(keypool); rndstats.mixkey++; - /* Read the required data. We use a readpoiter to read from a + /* Read the required data. We use a readpointer to read from a different position each time */ while (length--) { @@ -802,7 +806,12 @@ read_pool (byte *buffer, size_t length, int level) faults, though. */ if ( getpid () != my_pid ) - goto retry; + { + pid_t x = getpid(); + add_randomness (&x, sizeof(x), 0); + just_mixed = 0; /* Make sure it will get mixed. */ + goto retry; + } } diff --git a/cipher/rijndael.c b/cipher/rijndael.c index e53880f3..633bad36 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -48,13 +48,25 @@ static const char *selftest(void); -typedef struct { - int ROUNDS; /* key-length-dependent number of rounds */ - int decryption_prepared; - byte keySched[MAXROUNDS+1][4][4]; /* key schedule */ - byte keySched2[MAXROUNDS+1][4][4]; /* key schedule */ +typedef struct +{ + int ROUNDS; /* key-length-dependent number of rounds */ + int decryption_prepared; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; + } u1; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; + } u2; } RIJNDAEL_context; +#define keySched u1.keyschedule +#define keySched2 u2.keyschedule + static const byte S[256] = { 99, 124, 119, 123, 242, 107, 111, 197, @@ -1881,90 +1893,108 @@ prepare_decryption( RIJNDAEL_context *ctx ) /* Encrypt one block. A and B may be the same. */ static void -do_encrypt (const RIJNDAEL_context *ctx, byte *b, const byte *a) +do_encrypt (const RIJNDAEL_context *ctx, byte *bx, const byte *ax) { /* FIXME: Ugly code, replace by straighter implementaion and use optimized assembler for common CPUs. */ int r; - union { + union + { u32 tempu32[4]; /* Force correct alignment. */ byte temp[4][4]; } u; int ROUNDS = ctx->ROUNDS; #define rk (ctx->keySched) - *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[0][0]); - *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[0][1]); - *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[0][2]); - *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[0][3]); - *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + /* BX and AX are not necessary correctly aligned. Thus we need to + copy them here. */ + union + { + u32 dummy[4]; + byte a[16]; + } a; + union + { + u32 dummy[4]; + byte b[16]; + } b; + + memcpy (a.a, ax, 16); + + *((u32*)u.temp[0]) = *((u32*)(a.a )) ^ *((u32*)rk[0][0]); + *((u32*)u.temp[1]) = *((u32*)(a.a+ 4)) ^ *((u32*)rk[0][1]); + *((u32*)u.temp[2]) = *((u32*)(a.a+ 8)) ^ *((u32*)rk[0][2]); + *((u32*)u.temp[3]) = *((u32*)(a.a+12)) ^ *((u32*)rk[0][3]); + *((u32*)(b.b )) = (*((u32*)T1[u.temp[0][0]]) ^ *((u32*)T2[u.temp[1][1]]) ^ *((u32*)T3[u.temp[2][2]]) ^ *((u32*)T4[u.temp[3][3]])); - *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + *((u32*)(b.b + 4)) = (*((u32*)T1[u.temp[1][0]]) ^ *((u32*)T2[u.temp[2][1]]) ^ *((u32*)T3[u.temp[3][2]]) ^ *((u32*)T4[u.temp[0][3]])); - *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + *((u32*)(b.b + 8)) = (*((u32*)T1[u.temp[2][0]]) ^ *((u32*)T2[u.temp[3][1]]) ^ *((u32*)T3[u.temp[0][2]]) ^ *((u32*)T4[u.temp[1][3]])); - *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + *((u32*)(b.b +12)) = (*((u32*)T1[u.temp[3][0]]) ^ *((u32*)T2[u.temp[0][1]]) ^ *((u32*)T3[u.temp[1][2]]) ^ *((u32*)T4[u.temp[2][3]])); for (r = 1; r < ROUNDS-1; r++) { - *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); - *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); - *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); - *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); + *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[r][0]); + *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[r][1]); + *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[r][2]); + *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[r][3]); - *((u32*)(b )) = (*((u32*)T1[u.temp[0][0]]) + *((u32*)(b.b )) = (*((u32*)T1[u.temp[0][0]]) ^ *((u32*)T2[u.temp[1][1]]) ^ *((u32*)T3[u.temp[2][2]]) ^ *((u32*)T4[u.temp[3][3]])); - *((u32*)(b + 4)) = (*((u32*)T1[u.temp[1][0]]) + *((u32*)(b.b + 4)) = (*((u32*)T1[u.temp[1][0]]) ^ *((u32*)T2[u.temp[2][1]]) ^ *((u32*)T3[u.temp[3][2]]) ^ *((u32*)T4[u.temp[0][3]])); - *((u32*)(b + 8)) = (*((u32*)T1[u.temp[2][0]]) + *((u32*)(b.b + 8)) = (*((u32*)T1[u.temp[2][0]]) ^ *((u32*)T2[u.temp[3][1]]) ^ *((u32*)T3[u.temp[0][2]]) ^ *((u32*)T4[u.temp[1][3]])); - *((u32*)(b +12)) = (*((u32*)T1[u.temp[3][0]]) + *((u32*)(b.b +12)) = (*((u32*)T1[u.temp[3][0]]) ^ *((u32*)T2[u.temp[0][1]]) ^ *((u32*)T3[u.temp[1][2]]) ^ *((u32*)T4[u.temp[2][3]])); } /* Last round is special. */ - *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[ROUNDS-1][0]); - *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); - *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); - *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[ROUNDS-1][3]); - b[ 0] = T1[u.temp[0][0]][1]; - b[ 1] = T1[u.temp[1][1]][1]; - b[ 2] = T1[u.temp[2][2]][1]; - b[ 3] = T1[u.temp[3][3]][1]; - b[ 4] = T1[u.temp[1][0]][1]; - b[ 5] = T1[u.temp[2][1]][1]; - b[ 6] = T1[u.temp[3][2]][1]; - b[ 7] = T1[u.temp[0][3]][1]; - b[ 8] = T1[u.temp[2][0]][1]; - b[ 9] = T1[u.temp[3][1]][1]; - b[10] = T1[u.temp[0][2]][1]; - b[11] = T1[u.temp[1][3]][1]; - b[12] = T1[u.temp[3][0]][1]; - b[13] = T1[u.temp[0][1]][1]; - b[14] = T1[u.temp[1][2]][1]; - b[15] = T1[u.temp[2][3]][1]; - *((u32*)(b )) ^= *((u32*)rk[ROUNDS][0]); - *((u32*)(b+ 4)) ^= *((u32*)rk[ROUNDS][1]); - *((u32*)(b+ 8)) ^= *((u32*)rk[ROUNDS][2]); - *((u32*)(b+12)) ^= *((u32*)rk[ROUNDS][3]); + *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[ROUNDS-1][0]); + *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[ROUNDS-1][1]); + *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[ROUNDS-1][2]); + *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[ROUNDS-1][3]); + b.b[ 0] = T1[u.temp[0][0]][1]; + b.b[ 1] = T1[u.temp[1][1]][1]; + b.b[ 2] = T1[u.temp[2][2]][1]; + b.b[ 3] = T1[u.temp[3][3]][1]; + b.b[ 4] = T1[u.temp[1][0]][1]; + b.b[ 5] = T1[u.temp[2][1]][1]; + b.b[ 6] = T1[u.temp[3][2]][1]; + b.b[ 7] = T1[u.temp[0][3]][1]; + b.b[ 8] = T1[u.temp[2][0]][1]; + b.b[ 9] = T1[u.temp[3][1]][1]; + b.b[10] = T1[u.temp[0][2]][1]; + b.b[11] = T1[u.temp[1][3]][1]; + b.b[12] = T1[u.temp[3][0]][1]; + b.b[13] = T1[u.temp[0][1]][1]; + b.b[14] = T1[u.temp[1][2]][1]; + b.b[15] = T1[u.temp[2][3]][1]; + *((u32*)(b.b )) ^= *((u32*)rk[ROUNDS][0]); + *((u32*)(b.b+ 4)) ^= *((u32*)rk[ROUNDS][1]); + *((u32*)(b.b+ 8)) ^= *((u32*)rk[ROUNDS][2]); + *((u32*)(b.b+12)) ^= *((u32*)rk[ROUNDS][3]); + + memcpy (bx, b.b, 16); #undef rk } @@ -1974,14 +2004,14 @@ rijndael_encrypt (void *context, byte *b, const byte *a) RIJNDAEL_context *ctx = context; do_encrypt (ctx, b, a); - _gcry_burn_stack (16 + 2*sizeof(int)); + _gcry_burn_stack (48 + 2*sizeof(int)); } /* Decrypt one block. a and b may be the same. */ static void -do_decrypt (RIJNDAEL_context *ctx, byte *b, const byte *a) +do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) { #define rk (ctx->keySched2) int ROUNDS = ctx->ROUNDS; @@ -1991,6 +2021,21 @@ do_decrypt (RIJNDAEL_context *ctx, byte *b, const byte *a) byte temp[4][4]; } u; + /* BX and AX are not necessary correctly aligned. Thus we need to + copy them here. */ + union + { + u32 dummy[4]; + byte a[16]; + } a; + union + { + u32 dummy[4]; + byte b[16]; + } b; + + memcpy (a.a, ax, 16); + if ( !ctx->decryption_prepared ) { prepare_decryption ( ctx ); @@ -1998,77 +2043,79 @@ do_decrypt (RIJNDAEL_context *ctx, byte *b, const byte *a) ctx->decryption_prepared = 1; } - *((u32*)u.temp[0]) = *((u32*)(a )) ^ *((u32*)rk[ROUNDS][0]); - *((u32*)u.temp[1]) = *((u32*)(a+ 4)) ^ *((u32*)rk[ROUNDS][1]); - *((u32*)u.temp[2]) = *((u32*)(a+ 8)) ^ *((u32*)rk[ROUNDS][2]); - *((u32*)u.temp[3]) = *((u32*)(a+12)) ^ *((u32*)rk[ROUNDS][3]); + *((u32*)u.temp[0]) = *((u32*)(a.a )) ^ *((u32*)rk[ROUNDS][0]); + *((u32*)u.temp[1]) = *((u32*)(a.a+ 4)) ^ *((u32*)rk[ROUNDS][1]); + *((u32*)u.temp[2]) = *((u32*)(a.a+ 8)) ^ *((u32*)rk[ROUNDS][2]); + *((u32*)u.temp[3]) = *((u32*)(a.a+12)) ^ *((u32*)rk[ROUNDS][3]); - *((u32*)(b )) = (*((u32*)T5[u.temp[0][0]]) + *((u32*)(b.b )) = (*((u32*)T5[u.temp[0][0]]) ^ *((u32*)T6[u.temp[3][1]]) ^ *((u32*)T7[u.temp[2][2]]) ^ *((u32*)T8[u.temp[1][3]])); - *((u32*)(b+ 4)) = (*((u32*)T5[u.temp[1][0]]) + *((u32*)(b.b+ 4)) = (*((u32*)T5[u.temp[1][0]]) ^ *((u32*)T6[u.temp[0][1]]) ^ *((u32*)T7[u.temp[3][2]]) ^ *((u32*)T8[u.temp[2][3]])); - *((u32*)(b+ 8)) = (*((u32*)T5[u.temp[2][0]]) + *((u32*)(b.b+ 8)) = (*((u32*)T5[u.temp[2][0]]) ^ *((u32*)T6[u.temp[1][1]]) ^ *((u32*)T7[u.temp[0][2]]) ^ *((u32*)T8[u.temp[3][3]])); - *((u32*)(b+12)) = (*((u32*)T5[u.temp[3][0]]) + *((u32*)(b.b+12)) = (*((u32*)T5[u.temp[3][0]]) ^ *((u32*)T6[u.temp[2][1]]) ^ *((u32*)T7[u.temp[1][2]]) ^ *((u32*)T8[u.temp[0][3]])); for (r = ROUNDS-1; r > 1; r--) { - *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[r][0]); - *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[r][1]); - *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[r][2]); - *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[r][3]); - *((u32*)(b )) = (*((u32*)T5[u.temp[0][0]]) + *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[r][0]); + *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[r][1]); + *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[r][2]); + *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[r][3]); + *((u32*)(b.b )) = (*((u32*)T5[u.temp[0][0]]) ^ *((u32*)T6[u.temp[3][1]]) ^ *((u32*)T7[u.temp[2][2]]) ^ *((u32*)T8[u.temp[1][3]])); - *((u32*)(b+ 4)) = (*((u32*)T5[u.temp[1][0]]) + *((u32*)(b.b+ 4)) = (*((u32*)T5[u.temp[1][0]]) ^ *((u32*)T6[u.temp[0][1]]) ^ *((u32*)T7[u.temp[3][2]]) ^ *((u32*)T8[u.temp[2][3]])); - *((u32*)(b+ 8)) = (*((u32*)T5[u.temp[2][0]]) + *((u32*)(b.b+ 8)) = (*((u32*)T5[u.temp[2][0]]) ^ *((u32*)T6[u.temp[1][1]]) ^ *((u32*)T7[u.temp[0][2]]) ^ *((u32*)T8[u.temp[3][3]])); - *((u32*)(b+12)) = (*((u32*)T5[u.temp[3][0]]) + *((u32*)(b.b+12)) = (*((u32*)T5[u.temp[3][0]]) ^ *((u32*)T6[u.temp[2][1]]) ^ *((u32*)T7[u.temp[1][2]]) ^ *((u32*)T8[u.temp[0][3]])); } /* Last round is special. */ - *((u32*)u.temp[0]) = *((u32*)(b )) ^ *((u32*)rk[1][0]); - *((u32*)u.temp[1]) = *((u32*)(b+ 4)) ^ *((u32*)rk[1][1]); - *((u32*)u.temp[2]) = *((u32*)(b+ 8)) ^ *((u32*)rk[1][2]); - *((u32*)u.temp[3]) = *((u32*)(b+12)) ^ *((u32*)rk[1][3]); - b[ 0] = S5[u.temp[0][0]]; - b[ 1] = S5[u.temp[3][1]]; - b[ 2] = S5[u.temp[2][2]]; - b[ 3] = S5[u.temp[1][3]]; - b[ 4] = S5[u.temp[1][0]]; - b[ 5] = S5[u.temp[0][1]]; - b[ 6] = S5[u.temp[3][2]]; - b[ 7] = S5[u.temp[2][3]]; - b[ 8] = S5[u.temp[2][0]]; - b[ 9] = S5[u.temp[1][1]]; - b[10] = S5[u.temp[0][2]]; - b[11] = S5[u.temp[3][3]]; - b[12] = S5[u.temp[3][0]]; - b[13] = S5[u.temp[2][1]]; - b[14] = S5[u.temp[1][2]]; - b[15] = S5[u.temp[0][3]]; - *((u32*)(b )) ^= *((u32*)rk[0][0]); - *((u32*)(b+ 4)) ^= *((u32*)rk[0][1]); - *((u32*)(b+ 8)) ^= *((u32*)rk[0][2]); - *((u32*)(b+12)) ^= *((u32*)rk[0][3]); + *((u32*)u.temp[0]) = *((u32*)(b.b )) ^ *((u32*)rk[1][0]); + *((u32*)u.temp[1]) = *((u32*)(b.b+ 4)) ^ *((u32*)rk[1][1]); + *((u32*)u.temp[2]) = *((u32*)(b.b+ 8)) ^ *((u32*)rk[1][2]); + *((u32*)u.temp[3]) = *((u32*)(b.b+12)) ^ *((u32*)rk[1][3]); + b.b[ 0] = S5[u.temp[0][0]]; + b.b[ 1] = S5[u.temp[3][1]]; + b.b[ 2] = S5[u.temp[2][2]]; + b.b[ 3] = S5[u.temp[1][3]]; + b.b[ 4] = S5[u.temp[1][0]]; + b.b[ 5] = S5[u.temp[0][1]]; + b.b[ 6] = S5[u.temp[3][2]]; + b.b[ 7] = S5[u.temp[2][3]]; + b.b[ 8] = S5[u.temp[2][0]]; + b.b[ 9] = S5[u.temp[1][1]]; + b.b[10] = S5[u.temp[0][2]]; + b.b[11] = S5[u.temp[3][3]]; + b.b[12] = S5[u.temp[3][0]]; + b.b[13] = S5[u.temp[2][1]]; + b.b[14] = S5[u.temp[1][2]]; + b.b[15] = S5[u.temp[0][3]]; + *((u32*)(b.b )) ^= *((u32*)rk[0][0]); + *((u32*)(b.b+ 4)) ^= *((u32*)rk[0][1]); + *((u32*)(b.b+ 8)) ^= *((u32*)rk[0][2]); + *((u32*)(b.b+12)) ^= *((u32*)rk[0][3]); + + memcpy (bx, b.b, 16); #undef rk } @@ -2078,7 +2125,7 @@ rijndael_decrypt (void *context, byte *b, const byte *a) RIJNDAEL_context *ctx = context; do_decrypt (ctx, b, a); - _gcry_burn_stack (16+2*sizeof(int)); + _gcry_burn_stack (48+2*sizeof(int)); } -- cgit v1.2.1