1 /* $OpenBSD: rijndael.c,v 1.9 2001/08/22 09:46:42 heko Exp $ */ 2 3 /* This is an independent implementation of the encryption algorithm: */ 4 /* */ 5 /* RIJNDAEL by Joan Daemen and Vincent Rijmen */ 6 /* */ 7 /* which is a candidate algorithm in the Advanced Encryption Standard */ 8 /* programme of the US National Institute of Standards and Technology. */ 9 10 /* 11 ----------------------------------------------------------------------- 12 Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK 13 14 TERMS 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions 18 are met: 19 1. Redistributions of source code must retain the above copyright 20 notice, this list of conditions and the following disclaimer. 21 2. Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in the 23 documentation and/or other materials provided with the distribution. 24 25 This software is provided 'as is' with no guarantees of correctness or 26 fitness for purpose. 27 ----------------------------------------------------------------------- 28 */ 29 30 /* Timing data for Rijndael (rijndael.c) 31 32 Algorithm: rijndael (rijndael.c) 33 34 128 bit key: 35 Key Setup: 305/1389 cycles (encrypt/decrypt) 36 Encrypt: 374 cycles = 68.4 mbits/sec 37 Decrypt: 352 cycles = 72.7 mbits/sec 38 Mean: 363 cycles = 70.5 mbits/sec 39 40 192 bit key: 41 Key Setup: 277/1595 cycles (encrypt/decrypt) 42 Encrypt: 439 cycles = 58.3 mbits/sec 43 Decrypt: 425 cycles = 60.2 mbits/sec 44 Mean: 432 cycles = 59.3 mbits/sec 45 46 256 bit key: 47 Key Setup: 374/1960 cycles (encrypt/decrypt) 48 Encrypt: 502 cycles = 51.0 mbits/sec 49 Decrypt: 498 cycles = 51.4 mbits/sec 50 Mean: 500 cycles = 51.2 mbits/sec 51 52 */ 53 54 #include <sys/types.h> 55 #include "rijndael.h" 56 57 void gen_tabs __P((void)); 58 59 /* 3. Basic macros for speeding up generic operations */ 60 61 /* Circular rotate of 32 bit values */ 62 63 #define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) 64 #define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) 65 66 /* Invert byte order in a 32 bit variable */ 67 68 #define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00)) 69 70 /* Extract byte from a 32 bit quantity (little endian notation) */ 71 72 #define byte(x,n) ((u1byte)((x) >> (8 * n))) 73 74 #if BYTE_ORDER != LITTLE_ENDIAN 75 #define BYTE_SWAP 76 #endif 77 78 #ifdef BYTE_SWAP 79 #define io_swap(x) bswap(x) 80 #else 81 #define io_swap(x) (x) 82 #endif 83 84 #define LARGE_TABLES 85 86 u1byte pow_tab[256]; 87 u1byte log_tab[256]; 88 u1byte sbx_tab[256]; 89 u1byte isb_tab[256]; 90 u4byte rco_tab[ 10]; 91 u4byte ft_tab[4][256]; 92 u4byte it_tab[4][256]; 93 94 #ifdef LARGE_TABLES 95 u4byte fl_tab[4][256]; 96 u4byte il_tab[4][256]; 97 #endif 98 99 u4byte tab_gen = 0; 100 101 #define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0) 102 103 #define f_rn(bo, bi, n, k) \ 104 bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ 105 ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ 106 ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 107 ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) 108 109 #define i_rn(bo, bi, n, k) \ 110 bo[n] = it_tab[0][byte(bi[n],0)] ^ \ 111 it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ 112 it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 113 it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) 114 115 #ifdef LARGE_TABLES 116 117 #define ls_box(x) \ 118 ( fl_tab[0][byte(x, 0)] ^ \ 119 fl_tab[1][byte(x, 1)] ^ \ 120 fl_tab[2][byte(x, 2)] ^ \ 121 fl_tab[3][byte(x, 3)] ) 122 123 #define f_rl(bo, bi, n, k) \ 124 bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ 125 fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ 126 fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 127 fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) 128 129 #define i_rl(bo, bi, n, k) \ 130 bo[n] = il_tab[0][byte(bi[n],0)] ^ \ 131 il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ 132 il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 133 il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) 134 135 #else 136 137 #define ls_box(x) \ 138 ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \ 139 ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \ 140 ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \ 141 ((u4byte)sbx_tab[byte(x, 3)] << 24) 142 143 #define f_rl(bo, bi, n, k) \ 144 bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \ 145 rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \ 146 rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ 147 rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n) 148 149 #define i_rl(bo, bi, n, k) \ 150 bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \ 151 rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \ 152 rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ 153 rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n) 154 155 #endif 156 157 void 158 gen_tabs(void) 159 { 160 u4byte i, t; 161 u1byte p, q; 162 163 /* log and power tables for GF(2**8) finite field with */ 164 /* 0x11b as modular polynomial - the simplest prmitive */ 165 /* root is 0x11, used here to generate the tables */ 166 167 for(i = 0,p = 1; i < 256; ++i) { 168 pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i; 169 170 p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0); 171 } 172 173 log_tab[1] = 0; p = 1; 174 175 for(i = 0; i < 10; ++i) { 176 rco_tab[i] = p; 177 178 p = (p << 1) ^ (p & 0x80 ? 0x1b : 0); 179 } 180 181 /* note that the affine byte transformation matrix in */ 182 /* rijndael specification is in big endian format with */ 183 /* bit 0 as the most significant bit. In the remainder */ 184 /* of the specification the bits are numbered from the */ 185 /* least significant end of a byte. */ 186 187 for(i = 0; i < 256; ++i) { 188 p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; 189 q = (q >> 7) | (q << 1); p ^= q; 190 q = (q >> 7) | (q << 1); p ^= q; 191 q = (q >> 7) | (q << 1); p ^= q; 192 q = (q >> 7) | (q << 1); p ^= q ^ 0x63; 193 sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i; 194 } 195 196 for(i = 0; i < 256; ++i) { 197 p = sbx_tab[i]; 198 199 #ifdef LARGE_TABLES 200 201 t = p; fl_tab[0][i] = t; 202 fl_tab[1][i] = rotl(t, 8); 203 fl_tab[2][i] = rotl(t, 16); 204 fl_tab[3][i] = rotl(t, 24); 205 #endif 206 t = ((u4byte)ff_mult(2, p)) | 207 ((u4byte)p << 8) | 208 ((u4byte)p << 16) | 209 ((u4byte)ff_mult(3, p) << 24); 210 211 ft_tab[0][i] = t; 212 ft_tab[1][i] = rotl(t, 8); 213 ft_tab[2][i] = rotl(t, 16); 214 ft_tab[3][i] = rotl(t, 24); 215 216 p = isb_tab[i]; 217 218 #ifdef LARGE_TABLES 219 220 t = p; il_tab[0][i] = t; 221 il_tab[1][i] = rotl(t, 8); 222 il_tab[2][i] = rotl(t, 16); 223 il_tab[3][i] = rotl(t, 24); 224 #endif 225 t = ((u4byte)ff_mult(14, p)) | 226 ((u4byte)ff_mult( 9, p) << 8) | 227 ((u4byte)ff_mult(13, p) << 16) | 228 ((u4byte)ff_mult(11, p) << 24); 229 230 it_tab[0][i] = t; 231 it_tab[1][i] = rotl(t, 8); 232 it_tab[2][i] = rotl(t, 16); 233 it_tab[3][i] = rotl(t, 24); 234 } 235 236 tab_gen = 1; 237 } 238 239 #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) 240 241 #define imix_col(y,x) \ 242 u = star_x(x); \ 243 v = star_x(u); \ 244 w = star_x(v); \ 245 t = w ^ (x); \ 246 (y) = u ^ v ^ w; \ 247 (y) ^= rotr(u ^ t, 8) ^ \ 248 rotr(v ^ t, 16) ^ \ 249 rotr(t,24) 250 251 /* initialise the key schedule from the user supplied key */ 252 253 #define loop4(i) \ 254 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 255 t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \ 256 t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \ 257 t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \ 258 t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \ 259 } 260 261 #define loop6(i) \ 262 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 263 t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \ 264 t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \ 265 t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \ 266 t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \ 267 t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \ 268 t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \ 269 } 270 271 #define loop8(i) \ 272 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 273 t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \ 274 t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \ 275 t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \ 276 t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \ 277 t = e_key[8 * i + 4] ^ ls_box(t); \ 278 e_key[8 * i + 12] = t; \ 279 t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \ 280 t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \ 281 t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \ 282 } 283 284 rijndael_ctx * 285 rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len, 286 int encrypt) 287 { 288 u4byte i, t, u, v, w; 289 u4byte *e_key = ctx->e_key; 290 u4byte *d_key = ctx->d_key; 291 292 ctx->decrypt = !encrypt; 293 294 if(!tab_gen) 295 gen_tabs(); 296 297 ctx->k_len = (key_len + 31) / 32; 298 299 e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]); 300 e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]); 301 302 switch(ctx->k_len) { 303 case 4: t = e_key[3]; 304 for(i = 0; i < 10; ++i) 305 loop4(i); 306 break; 307 308 case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]); 309 for(i = 0; i < 8; ++i) 310 loop6(i); 311 break; 312 313 case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]); 314 e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]); 315 for(i = 0; i < 7; ++i) 316 loop8(i); 317 break; 318 } 319 320 if (!encrypt) { 321 d_key[0] = e_key[0]; d_key[1] = e_key[1]; 322 d_key[2] = e_key[2]; d_key[3] = e_key[3]; 323 324 for(i = 4; i < 4 * ctx->k_len + 24; ++i) { 325 imix_col(d_key[i], e_key[i]); 326 } 327 } 328 329 return ctx; 330 } 331 332 /* encrypt a block of text */ 333 334 #define f_nround(bo, bi, k) \ 335 f_rn(bo, bi, 0, k); \ 336 f_rn(bo, bi, 1, k); \ 337 f_rn(bo, bi, 2, k); \ 338 f_rn(bo, bi, 3, k); \ 339 k += 4 340 341 #define f_lround(bo, bi, k) \ 342 f_rl(bo, bi, 0, k); \ 343 f_rl(bo, bi, 1, k); \ 344 f_rl(bo, bi, 2, k); \ 345 f_rl(bo, bi, 3, k) 346 347 void 348 rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) 349 { 350 u4byte k_len = ctx->k_len; 351 u4byte *e_key = ctx->e_key; 352 u4byte b0[4], b1[4], *kp; 353 u4byte tbuf[4]; 354 355 if ((u_long)in_blk & 3) { 356 bcopy(in_blk, tbuf, sizeof(tbuf)); 357 b0[0] = io_swap(tbuf[0]) ^ e_key[0]; 358 b0[1] = io_swap(tbuf[1]) ^ e_key[1]; 359 b0[2] = io_swap(tbuf[2]) ^ e_key[2]; 360 b0[3] = io_swap(tbuf[3]) ^ e_key[3]; 361 } else { 362 b0[0] = io_swap(in_blk[0]) ^ e_key[0]; 363 b0[1] = io_swap(in_blk[1]) ^ e_key[1]; 364 b0[2] = io_swap(in_blk[2]) ^ e_key[2]; 365 b0[3] = io_swap(in_blk[3]) ^ e_key[3]; 366 } 367 368 kp = e_key + 4; 369 370 if(k_len > 6) { 371 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 372 } 373 374 if(k_len > 4) { 375 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 376 } 377 378 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 379 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 380 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 381 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 382 f_nround(b1, b0, kp); f_lround(b0, b1, kp); 383 384 if ((u_long)out_blk & 3) { 385 tbuf[0] = io_swap(b0[0]); 386 tbuf[1] = io_swap(b0[1]); 387 tbuf[2] = io_swap(b0[2]); 388 tbuf[3] = io_swap(b0[3]); 389 bcopy(tbuf, out_blk, sizeof(tbuf)); 390 } else { 391 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); 392 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); 393 } 394 } 395 396 /* decrypt a block of text */ 397 398 #define i_nround(bo, bi, k) \ 399 i_rn(bo, bi, 0, k); \ 400 i_rn(bo, bi, 1, k); \ 401 i_rn(bo, bi, 2, k); \ 402 i_rn(bo, bi, 3, k); \ 403 k -= 4 404 405 #define i_lround(bo, bi, k) \ 406 i_rl(bo, bi, 0, k); \ 407 i_rl(bo, bi, 1, k); \ 408 i_rl(bo, bi, 2, k); \ 409 i_rl(bo, bi, 3, k) 410 411 void 412 rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) 413 { 414 u4byte b0[4], b1[4], *kp; 415 u4byte k_len = ctx->k_len; 416 u4byte *e_key = ctx->e_key; 417 u4byte *d_key = ctx->d_key; 418 u4byte tbuf[4]; 419 420 if ((u_long)in_blk & 3) { 421 bcopy(in_blk, tbuf, sizeof(b0)); 422 b0[0] = io_swap(tbuf[0]) ^ e_key[4 * k_len + 24]; 423 b0[1] = io_swap(tbuf[1]) ^ e_key[4 * k_len + 25]; 424 b0[2] = io_swap(tbuf[2]) ^ e_key[4 * k_len + 26]; 425 b0[3] = io_swap(tbuf[3]) ^ e_key[4 * k_len + 27]; 426 } else { 427 b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24]; 428 b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25]; 429 b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26]; 430 b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27]; 431 } 432 433 kp = d_key + 4 * (k_len + 5); 434 435 if(k_len > 6) { 436 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 437 } 438 439 if(k_len > 4) { 440 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 441 } 442 443 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 444 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 445 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 446 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 447 i_nround(b1, b0, kp); i_lround(b0, b1, kp); 448 449 if ((u_long)out_blk & 3) { 450 tbuf[0] = io_swap(b0[0]); 451 tbuf[1] = io_swap(b0[1]); 452 tbuf[2] = io_swap(b0[2]); 453 tbuf[3] = io_swap(b0[3]); 454 bcopy(tbuf, out_blk, sizeof(tbuf)); 455 } else { 456 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); 457 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); 458 } 459 } 460