1 /* $OpenBSD: rijndael.c,v 1.8 2001/07/30 16:23:30 stevesk Exp $ */ 2 3 /* This is an independent implementation of the encryption algorithm: */ 4 /* */ 5 /* RIJNDAEL by Joan Daemen and Vincent Rijmen */ 6 /* */ 7 /* which is a candidate algorithm in the Advanced Encryption Standard */ 8 /* programme of the US National Institute of Standards and Technology. */ 9 10 /* 11 ----------------------------------------------------------------------- 12 Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK 13 14 TERMS 15 16 Redistribution and use in source and binary forms, with or without 17 modification, are permitted provided that the following conditions 18 are met: 19 1. Redistributions of source code must retain the above copyright 20 notice, this list of conditions and the following disclaimer. 21 2. Redistributions in binary form must reproduce the above copyright 22 notice, this list of conditions and the following disclaimer in the 23 documentation and/or other materials provided with the distribution. 24 25 This software is provided 'as is' with no guarantees of correctness or 26 fitness for purpose. 27 ----------------------------------------------------------------------- 28 */ 29 30 /* Timing data for Rijndael (rijndael.c) 31 32 Algorithm: rijndael (rijndael.c) 33 34 128 bit key: 35 Key Setup: 305/1389 cycles (encrypt/decrypt) 36 Encrypt: 374 cycles = 68.4 mbits/sec 37 Decrypt: 352 cycles = 72.7 mbits/sec 38 Mean: 363 cycles = 70.5 mbits/sec 39 40 192 bit key: 41 Key Setup: 277/1595 cycles (encrypt/decrypt) 42 Encrypt: 439 cycles = 58.3 mbits/sec 43 Decrypt: 425 cycles = 60.2 mbits/sec 44 Mean: 432 cycles = 59.3 mbits/sec 45 46 256 bit key: 47 Key Setup: 374/1960 cycles (encrypt/decrypt) 48 Encrypt: 502 cycles = 51.0 mbits/sec 49 Decrypt: 498 cycles = 51.4 mbits/sec 50 Mean: 500 cycles = 51.2 mbits/sec 51 52 */ 53 54 #include <sys/types.h> 55 #include "rijndael.h" 56 57 void gen_tabs __P((void)); 58 59 /* 3. Basic macros for speeding up generic operations */ 60 61 /* Circular rotate of 32 bit values */ 62 63 #define rotr(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) 64 #define rotl(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) 65 66 /* Invert byte order in a 32 bit variable */ 67 68 #define bswap(x) ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00)) 69 70 /* Extract byte from a 32 bit quantity (little endian notation) */ 71 72 #define byte(x,n) ((u1byte)((x) >> (8 * n))) 73 74 #if BYTE_ORDER != LITTLE_ENDIAN 75 #define BYTE_SWAP 76 #endif 77 78 #ifdef BYTE_SWAP 79 #define io_swap(x) bswap(x) 80 #else 81 #define io_swap(x) (x) 82 #endif 83 84 #define LARGE_TABLES 85 86 u1byte pow_tab[256]; 87 u1byte log_tab[256]; 88 u1byte sbx_tab[256]; 89 u1byte isb_tab[256]; 90 u4byte rco_tab[ 10]; 91 u4byte ft_tab[4][256]; 92 u4byte it_tab[4][256]; 93 94 #ifdef LARGE_TABLES 95 u4byte fl_tab[4][256]; 96 u4byte il_tab[4][256]; 97 #endif 98 99 u4byte tab_gen = 0; 100 101 #define ff_mult(a,b) (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0) 102 103 #define f_rn(bo, bi, n, k) \ 104 bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ 105 ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ 106 ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 107 ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) 108 109 #define i_rn(bo, bi, n, k) \ 110 bo[n] = it_tab[0][byte(bi[n],0)] ^ \ 111 it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ 112 it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 113 it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) 114 115 #ifdef LARGE_TABLES 116 117 #define ls_box(x) \ 118 ( fl_tab[0][byte(x, 0)] ^ \ 119 fl_tab[1][byte(x, 1)] ^ \ 120 fl_tab[2][byte(x, 2)] ^ \ 121 fl_tab[3][byte(x, 3)] ) 122 123 #define f_rl(bo, bi, n, k) \ 124 bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ 125 fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ 126 fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 127 fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) 128 129 #define i_rl(bo, bi, n, k) \ 130 bo[n] = il_tab[0][byte(bi[n],0)] ^ \ 131 il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ 132 il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ 133 il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) 134 135 #else 136 137 #define ls_box(x) \ 138 ((u4byte)sbx_tab[byte(x, 0)] << 0) ^ \ 139 ((u4byte)sbx_tab[byte(x, 1)] << 8) ^ \ 140 ((u4byte)sbx_tab[byte(x, 2)] << 16) ^ \ 141 ((u4byte)sbx_tab[byte(x, 3)] << 24) 142 143 #define f_rl(bo, bi, n, k) \ 144 bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^ \ 145 rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]), 8) ^ \ 146 rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ 147 rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n) 148 149 #define i_rl(bo, bi, n, k) \ 150 bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^ \ 151 rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]), 8) ^ \ 152 rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^ \ 153 rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n) 154 155 #endif 156 157 void 158 gen_tabs(void) 159 { 160 u4byte i, t; 161 u1byte p, q; 162 163 /* log and power tables for GF(2**8) finite field with */ 164 /* 0x11b as modular polynomial - the simplest prmitive */ 165 /* root is 0x11, used here to generate the tables */ 166 167 for(i = 0,p = 1; i < 256; ++i) { 168 pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i; 169 170 p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0); 171 } 172 173 log_tab[1] = 0; p = 1; 174 175 for(i = 0; i < 10; ++i) { 176 rco_tab[i] = p; 177 178 p = (p << 1) ^ (p & 0x80 ? 0x1b : 0); 179 } 180 181 /* note that the affine byte transformation matrix in */ 182 /* rijndael specification is in big endian format with */ 183 /* bit 0 as the most significant bit. In the remainder */ 184 /* of the specification the bits are numbered from the */ 185 /* least significant end of a byte. */ 186 187 for(i = 0; i < 256; ++i) { 188 p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p; 189 q = (q >> 7) | (q << 1); p ^= q; 190 q = (q >> 7) | (q << 1); p ^= q; 191 q = (q >> 7) | (q << 1); p ^= q; 192 q = (q >> 7) | (q << 1); p ^= q ^ 0x63; 193 sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i; 194 } 195 196 for(i = 0; i < 256; ++i) { 197 p = sbx_tab[i]; 198 199 #ifdef LARGE_TABLES 200 201 t = p; fl_tab[0][i] = t; 202 fl_tab[1][i] = rotl(t, 8); 203 fl_tab[2][i] = rotl(t, 16); 204 fl_tab[3][i] = rotl(t, 24); 205 #endif 206 t = ((u4byte)ff_mult(2, p)) | 207 ((u4byte)p << 8) | 208 ((u4byte)p << 16) | 209 ((u4byte)ff_mult(3, p) << 24); 210 211 ft_tab[0][i] = t; 212 ft_tab[1][i] = rotl(t, 8); 213 ft_tab[2][i] = rotl(t, 16); 214 ft_tab[3][i] = rotl(t, 24); 215 216 p = isb_tab[i]; 217 218 #ifdef LARGE_TABLES 219 220 t = p; il_tab[0][i] = t; 221 il_tab[1][i] = rotl(t, 8); 222 il_tab[2][i] = rotl(t, 16); 223 il_tab[3][i] = rotl(t, 24); 224 #endif 225 t = ((u4byte)ff_mult(14, p)) | 226 ((u4byte)ff_mult( 9, p) << 8) | 227 ((u4byte)ff_mult(13, p) << 16) | 228 ((u4byte)ff_mult(11, p) << 24); 229 230 it_tab[0][i] = t; 231 it_tab[1][i] = rotl(t, 8); 232 it_tab[2][i] = rotl(t, 16); 233 it_tab[3][i] = rotl(t, 24); 234 } 235 236 tab_gen = 1; 237 } 238 239 #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) 240 241 #define imix_col(y,x) \ 242 u = star_x(x); \ 243 v = star_x(u); \ 244 w = star_x(v); \ 245 t = w ^ (x); \ 246 (y) = u ^ v ^ w; \ 247 (y) ^= rotr(u ^ t, 8) ^ \ 248 rotr(v ^ t, 16) ^ \ 249 rotr(t,24) 250 251 /* initialise the key schedule from the user supplied key */ 252 253 #define loop4(i) \ 254 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 255 t ^= e_key[4 * i]; e_key[4 * i + 4] = t; \ 256 t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t; \ 257 t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t; \ 258 t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t; \ 259 } 260 261 #define loop6(i) \ 262 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 263 t ^= e_key[6 * i]; e_key[6 * i + 6] = t; \ 264 t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t; \ 265 t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t; \ 266 t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t; \ 267 t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t; \ 268 t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t; \ 269 } 270 271 #define loop8(i) \ 272 { t = ls_box(rotr(t, 8)) ^ rco_tab[i]; \ 273 t ^= e_key[8 * i]; e_key[8 * i + 8] = t; \ 274 t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t; \ 275 t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t; \ 276 t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t; \ 277 t = e_key[8 * i + 4] ^ ls_box(t); \ 278 e_key[8 * i + 12] = t; \ 279 t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t; \ 280 t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t; \ 281 t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t; \ 282 } 283 284 rijndael_ctx * 285 rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len, 286 int encrypt) 287 { 288 u4byte i, t, u, v, w; 289 u4byte *e_key = ctx->e_key; 290 u4byte *d_key = ctx->d_key; 291 292 ctx->decrypt = !encrypt; 293 294 if(!tab_gen) 295 gen_tabs(); 296 297 ctx->k_len = (key_len + 31) / 32; 298 299 e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]); 300 e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]); 301 302 switch(ctx->k_len) { 303 case 4: t = e_key[3]; 304 for(i = 0; i < 10; ++i) 305 loop4(i); 306 break; 307 308 case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]); 309 for(i = 0; i < 8; ++i) 310 loop6(i); 311 break; 312 313 case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]); 314 e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]); 315 for(i = 0; i < 7; ++i) 316 loop8(i); 317 break; 318 } 319 320 if (!encrypt) { 321 d_key[0] = e_key[0]; d_key[1] = e_key[1]; 322 d_key[2] = e_key[2]; d_key[3] = e_key[3]; 323 324 for(i = 4; i < 4 * ctx->k_len + 24; ++i) { 325 imix_col(d_key[i], e_key[i]); 326 } 327 } 328 329 return ctx; 330 } 331 332 /* encrypt a block of text */ 333 334 #define f_nround(bo, bi, k) \ 335 f_rn(bo, bi, 0, k); \ 336 f_rn(bo, bi, 1, k); \ 337 f_rn(bo, bi, 2, k); \ 338 f_rn(bo, bi, 3, k); \ 339 k += 4 340 341 #define f_lround(bo, bi, k) \ 342 f_rl(bo, bi, 0, k); \ 343 f_rl(bo, bi, 1, k); \ 344 f_rl(bo, bi, 2, k); \ 345 f_rl(bo, bi, 3, k) 346 347 void 348 rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) 349 { 350 u4byte k_len = ctx->k_len; 351 u4byte *e_key = ctx->e_key; 352 u4byte b0[4], b1[4], *kp; 353 354 b0[0] = io_swap(in_blk[0]) ^ e_key[0]; 355 b0[1] = io_swap(in_blk[1]) ^ e_key[1]; 356 b0[2] = io_swap(in_blk[2]) ^ e_key[2]; 357 b0[3] = io_swap(in_blk[3]) ^ e_key[3]; 358 359 kp = e_key + 4; 360 361 if(k_len > 6) { 362 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 363 } 364 365 if(k_len > 4) { 366 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 367 } 368 369 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 370 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 371 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 372 f_nround(b1, b0, kp); f_nround(b0, b1, kp); 373 f_nround(b1, b0, kp); f_lround(b0, b1, kp); 374 375 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); 376 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); 377 } 378 379 /* decrypt a block of text */ 380 381 #define i_nround(bo, bi, k) \ 382 i_rn(bo, bi, 0, k); \ 383 i_rn(bo, bi, 1, k); \ 384 i_rn(bo, bi, 2, k); \ 385 i_rn(bo, bi, 3, k); \ 386 k -= 4 387 388 #define i_lround(bo, bi, k) \ 389 i_rl(bo, bi, 0, k); \ 390 i_rl(bo, bi, 1, k); \ 391 i_rl(bo, bi, 2, k); \ 392 i_rl(bo, bi, 3, k) 393 394 void 395 rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk) 396 { 397 u4byte b0[4], b1[4], *kp; 398 u4byte k_len = ctx->k_len; 399 u4byte *e_key = ctx->e_key; 400 u4byte *d_key = ctx->d_key; 401 402 b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24]; 403 b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25]; 404 b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26]; 405 b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27]; 406 407 kp = d_key + 4 * (k_len + 5); 408 409 if(k_len > 6) { 410 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 411 } 412 413 if(k_len > 4) { 414 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 415 } 416 417 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 418 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 419 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 420 i_nround(b1, b0, kp); i_nround(b0, b1, kp); 421 i_nround(b1, b0, kp); i_lround(b0, b1, kp); 422 423 out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]); 424 out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]); 425 } 426