xref: /openbsd-src/usr.bin/ssh/rijndael.c (revision 2badd5e3f47d2d4252969cd98d7042b4e701b5ac)
1 /*	$OpenBSD: rijndael.c,v 1.9 2001/08/22 09:46:42 heko Exp $	*/
2 
3 /* This is an independent implementation of the encryption algorithm:   */
4 /*                                                                      */
5 /*         RIJNDAEL by Joan Daemen and Vincent Rijmen                   */
6 /*                                                                      */
7 /* which is a candidate algorithm in the Advanced Encryption Standard   */
8 /* programme of the US National Institute of Standards and Technology.  */
9 
10 /*
11    -----------------------------------------------------------------------
12    Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK
13 
14    TERMS
15 
16    Redistribution and use in source and binary forms, with or without
17    modification, are permitted provided that the following conditions
18    are met:
19    1. Redistributions of source code must retain the above copyright
20       notice, this list of conditions and the following disclaimer.
21    2. Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25    This software is provided 'as is' with no guarantees of correctness or
26    fitness for purpose.
27    -----------------------------------------------------------------------
28 */
29 
30 /* Timing data for Rijndael (rijndael.c)
31 
32 Algorithm: rijndael (rijndael.c)
33 
34 128 bit key:
35 Key Setup:    305/1389 cycles (encrypt/decrypt)
36 Encrypt:       374 cycles =    68.4 mbits/sec
37 Decrypt:       352 cycles =    72.7 mbits/sec
38 Mean:          363 cycles =    70.5 mbits/sec
39 
40 192 bit key:
41 Key Setup:    277/1595 cycles (encrypt/decrypt)
42 Encrypt:       439 cycles =    58.3 mbits/sec
43 Decrypt:       425 cycles =    60.2 mbits/sec
44 Mean:          432 cycles =    59.3 mbits/sec
45 
46 256 bit key:
47 Key Setup:    374/1960 cycles (encrypt/decrypt)
48 Encrypt:       502 cycles =    51.0 mbits/sec
49 Decrypt:       498 cycles =    51.4 mbits/sec
50 Mean:          500 cycles =    51.2 mbits/sec
51 
52 */
53 
54 #include <sys/types.h>
55 #include "rijndael.h"
56 
57 void gen_tabs	__P((void));
58 
59 /* 3. Basic macros for speeding up generic operations               */
60 
61 /* Circular rotate of 32 bit values                                 */
62 
63 #define rotr(x,n)   (((x) >> ((int)(n))) | ((x) << (32 - (int)(n))))
64 #define rotl(x,n)   (((x) << ((int)(n))) | ((x) >> (32 - (int)(n))))
65 
66 /* Invert byte order in a 32 bit variable                           */
67 
68 #define bswap(x)    ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00))
69 
70 /* Extract byte from a 32 bit quantity (little endian notation)     */
71 
72 #define byte(x,n)   ((u1byte)((x) >> (8 * n)))
73 
74 #if BYTE_ORDER != LITTLE_ENDIAN
75 #define BYTE_SWAP
76 #endif
77 
78 #ifdef  BYTE_SWAP
79 #define io_swap(x)  bswap(x)
80 #else
81 #define io_swap(x)  (x)
82 #endif
83 
84 #define LARGE_TABLES
85 
86 u1byte  pow_tab[256];
87 u1byte  log_tab[256];
88 u1byte  sbx_tab[256];
89 u1byte  isb_tab[256];
90 u4byte  rco_tab[ 10];
91 u4byte  ft_tab[4][256];
92 u4byte  it_tab[4][256];
93 
94 #ifdef  LARGE_TABLES
95   u4byte  fl_tab[4][256];
96   u4byte  il_tab[4][256];
97 #endif
98 
99 u4byte  tab_gen = 0;
100 
101 #define ff_mult(a,b)    (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0)
102 
103 #define f_rn(bo, bi, n, k)                          \
104     bo[n] =  ft_tab[0][byte(bi[n],0)] ^             \
105 	     ft_tab[1][byte(bi[(n + 1) & 3],1)] ^   \
106 	     ft_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
107 	     ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
108 
109 #define i_rn(bo, bi, n, k)                          \
110     bo[n] =  it_tab[0][byte(bi[n],0)] ^             \
111 	     it_tab[1][byte(bi[(n + 3) & 3],1)] ^   \
112 	     it_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
113 	     it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
114 
115 #ifdef LARGE_TABLES
116 
117 #define ls_box(x)                \
118     ( fl_tab[0][byte(x, 0)] ^    \
119       fl_tab[1][byte(x, 1)] ^    \
120       fl_tab[2][byte(x, 2)] ^    \
121       fl_tab[3][byte(x, 3)] )
122 
123 #define f_rl(bo, bi, n, k)                          \
124     bo[n] =  fl_tab[0][byte(bi[n],0)] ^             \
125 	     fl_tab[1][byte(bi[(n + 1) & 3],1)] ^   \
126 	     fl_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
127 	     fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
128 
129 #define i_rl(bo, bi, n, k)                          \
130     bo[n] =  il_tab[0][byte(bi[n],0)] ^             \
131 	     il_tab[1][byte(bi[(n + 3) & 3],1)] ^   \
132 	     il_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
133 	     il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
134 
135 #else
136 
137 #define ls_box(x)                            \
138     ((u4byte)sbx_tab[byte(x, 0)] <<  0) ^    \
139     ((u4byte)sbx_tab[byte(x, 1)] <<  8) ^    \
140     ((u4byte)sbx_tab[byte(x, 2)] << 16) ^    \
141     ((u4byte)sbx_tab[byte(x, 3)] << 24)
142 
143 #define f_rl(bo, bi, n, k)                                      \
144     bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^                    \
145 	rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]),  8) ^  \
146 	rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^  \
147 	rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n)
148 
149 #define i_rl(bo, bi, n, k)                                      \
150     bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^                    \
151 	rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]),  8) ^  \
152 	rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^  \
153 	rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n)
154 
155 #endif
156 
157 void
158 gen_tabs(void)
159 {
160 	u4byte  i, t;
161 	u1byte  p, q;
162 
163 	/* log and power tables for GF(2**8) finite field with  */
164 	/* 0x11b as modular polynomial - the simplest prmitive  */
165 	/* root is 0x11, used here to generate the tables       */
166 
167 	for(i = 0,p = 1; i < 256; ++i) {
168 		pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i;
169 
170 		p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0);
171 	}
172 
173 	log_tab[1] = 0; p = 1;
174 
175 	for(i = 0; i < 10; ++i) {
176 		rco_tab[i] = p;
177 
178 		p = (p << 1) ^ (p & 0x80 ? 0x1b : 0);
179 	}
180 
181 	/* note that the affine byte transformation matrix in   */
182 	/* rijndael specification is in big endian format with  */
183 	/* bit 0 as the most significant bit. In the remainder  */
184 	/* of the specification the bits are numbered from the  */
185 	/* least significant end of a byte.                     */
186 
187 	for(i = 0; i < 256; ++i) {
188 		p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p;
189 		q = (q >> 7) | (q << 1); p ^= q;
190 		q = (q >> 7) | (q << 1); p ^= q;
191 		q = (q >> 7) | (q << 1); p ^= q;
192 		q = (q >> 7) | (q << 1); p ^= q ^ 0x63;
193 		sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i;
194 	}
195 
196 	for(i = 0; i < 256; ++i) {
197 		p = sbx_tab[i];
198 
199 #ifdef  LARGE_TABLES
200 
201 		t = p; fl_tab[0][i] = t;
202 		fl_tab[1][i] = rotl(t,  8);
203 		fl_tab[2][i] = rotl(t, 16);
204 		fl_tab[3][i] = rotl(t, 24);
205 #endif
206 		t = ((u4byte)ff_mult(2, p)) |
207 			((u4byte)p <<  8) |
208 			((u4byte)p << 16) |
209 			((u4byte)ff_mult(3, p) << 24);
210 
211 		ft_tab[0][i] = t;
212 		ft_tab[1][i] = rotl(t,  8);
213 		ft_tab[2][i] = rotl(t, 16);
214 		ft_tab[3][i] = rotl(t, 24);
215 
216 		p = isb_tab[i];
217 
218 #ifdef  LARGE_TABLES
219 
220 		t = p; il_tab[0][i] = t;
221 		il_tab[1][i] = rotl(t,  8);
222 		il_tab[2][i] = rotl(t, 16);
223 		il_tab[3][i] = rotl(t, 24);
224 #endif
225 		t = ((u4byte)ff_mult(14, p)) |
226 			((u4byte)ff_mult( 9, p) <<  8) |
227 			((u4byte)ff_mult(13, p) << 16) |
228 			((u4byte)ff_mult(11, p) << 24);
229 
230 		it_tab[0][i] = t;
231 		it_tab[1][i] = rotl(t,  8);
232 		it_tab[2][i] = rotl(t, 16);
233 		it_tab[3][i] = rotl(t, 24);
234 	}
235 
236 	tab_gen = 1;
237 }
238 
239 #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
240 
241 #define imix_col(y,x)       \
242     u   = star_x(x);        \
243     v   = star_x(u);        \
244     w   = star_x(v);        \
245     t   = w ^ (x);          \
246    (y)  = u ^ v ^ w;        \
247    (y) ^= rotr(u ^ t,  8) ^ \
248 	  rotr(v ^ t, 16) ^ \
249 	  rotr(t,24)
250 
251 /* initialise the key schedule from the user supplied key   */
252 
253 #define loop4(i)                                    \
254 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
255     t ^= e_key[4 * i];     e_key[4 * i + 4] = t;    \
256     t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t;    \
257     t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t;    \
258     t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t;    \
259 }
260 
261 #define loop6(i)                                    \
262 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
263     t ^= e_key[6 * i];     e_key[6 * i + 6] = t;    \
264     t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t;    \
265     t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t;    \
266     t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t;    \
267     t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t;   \
268     t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t;   \
269 }
270 
271 #define loop8(i)                                    \
272 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
273     t ^= e_key[8 * i];     e_key[8 * i + 8] = t;    \
274     t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t;    \
275     t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t;   \
276     t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t;   \
277     t  = e_key[8 * i + 4] ^ ls_box(t);              \
278     e_key[8 * i + 12] = t;                          \
279     t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t;   \
280     t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t;   \
281     t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t;   \
282 }
283 
284 rijndael_ctx *
285 rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len,
286 		 int encrypt)
287 {
288 	u4byte  i, t, u, v, w;
289 	u4byte *e_key = ctx->e_key;
290 	u4byte *d_key = ctx->d_key;
291 
292 	ctx->decrypt = !encrypt;
293 
294 	if(!tab_gen)
295 		gen_tabs();
296 
297 	ctx->k_len = (key_len + 31) / 32;
298 
299 	e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]);
300 	e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]);
301 
302 	switch(ctx->k_len) {
303 	case 4: t = e_key[3];
304 		for(i = 0; i < 10; ++i)
305 			loop4(i);
306 		break;
307 
308 	case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]);
309 		for(i = 0; i < 8; ++i)
310 			loop6(i);
311 		break;
312 
313 	case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]);
314 		e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]);
315 		for(i = 0; i < 7; ++i)
316 			loop8(i);
317 		break;
318 	}
319 
320 	if (!encrypt) {
321 		d_key[0] = e_key[0]; d_key[1] = e_key[1];
322 		d_key[2] = e_key[2]; d_key[3] = e_key[3];
323 
324 		for(i = 4; i < 4 * ctx->k_len + 24; ++i) {
325 			imix_col(d_key[i], e_key[i]);
326 		}
327 	}
328 
329 	return ctx;
330 }
331 
332 /* encrypt a block of text  */
333 
334 #define f_nround(bo, bi, k) \
335     f_rn(bo, bi, 0, k);     \
336     f_rn(bo, bi, 1, k);     \
337     f_rn(bo, bi, 2, k);     \
338     f_rn(bo, bi, 3, k);     \
339     k += 4
340 
341 #define f_lround(bo, bi, k) \
342     f_rl(bo, bi, 0, k);     \
343     f_rl(bo, bi, 1, k);     \
344     f_rl(bo, bi, 2, k);     \
345     f_rl(bo, bi, 3, k)
346 
347 void
348 rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
349 {
350 	u4byte k_len = ctx->k_len;
351 	u4byte *e_key = ctx->e_key;
352 	u4byte  b0[4], b1[4], *kp;
353 	u4byte tbuf[4];
354 
355 	if ((u_long)in_blk & 3) {
356 		bcopy(in_blk, tbuf, sizeof(tbuf));
357 		b0[0] = io_swap(tbuf[0]) ^ e_key[0];
358 		b0[1] = io_swap(tbuf[1]) ^ e_key[1];
359 		b0[2] = io_swap(tbuf[2]) ^ e_key[2];
360 		b0[3] = io_swap(tbuf[3]) ^ e_key[3];
361 	} else {
362 	b0[0] = io_swap(in_blk[0]) ^ e_key[0];
363 	b0[1] = io_swap(in_blk[1]) ^ e_key[1];
364 	b0[2] = io_swap(in_blk[2]) ^ e_key[2];
365 	b0[3] = io_swap(in_blk[3]) ^ e_key[3];
366 	}
367 
368 	kp = e_key + 4;
369 
370 	if(k_len > 6) {
371 		f_nround(b1, b0, kp); f_nround(b0, b1, kp);
372 	}
373 
374 	if(k_len > 4) {
375 		f_nround(b1, b0, kp); f_nround(b0, b1, kp);
376 	}
377 
378 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
379 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
380 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
381 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
382 	f_nround(b1, b0, kp); f_lround(b0, b1, kp);
383 
384 	if ((u_long)out_blk & 3) {
385 		tbuf[0] = io_swap(b0[0]);
386 		tbuf[1] = io_swap(b0[1]);
387 		tbuf[2] = io_swap(b0[2]);
388 		tbuf[3] = io_swap(b0[3]);
389 		bcopy(tbuf, out_blk, sizeof(tbuf));
390 	} else {
391 	out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
392 	out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
393 	}
394 }
395 
396 /* decrypt a block of text  */
397 
398 #define i_nround(bo, bi, k) \
399     i_rn(bo, bi, 0, k);     \
400     i_rn(bo, bi, 1, k);     \
401     i_rn(bo, bi, 2, k);     \
402     i_rn(bo, bi, 3, k);     \
403     k -= 4
404 
405 #define i_lround(bo, bi, k) \
406     i_rl(bo, bi, 0, k);     \
407     i_rl(bo, bi, 1, k);     \
408     i_rl(bo, bi, 2, k);     \
409     i_rl(bo, bi, 3, k)
410 
411 void
412 rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
413 {
414 	u4byte  b0[4], b1[4], *kp;
415 	u4byte k_len = ctx->k_len;
416 	u4byte *e_key = ctx->e_key;
417 	u4byte *d_key = ctx->d_key;
418 	u4byte tbuf[4];
419 
420 	if ((u_long)in_blk & 3) {
421 		bcopy(in_blk, tbuf, sizeof(b0));
422 		b0[0] = io_swap(tbuf[0]) ^ e_key[4 * k_len + 24];
423 		b0[1] = io_swap(tbuf[1]) ^ e_key[4 * k_len + 25];
424 		b0[2] = io_swap(tbuf[2]) ^ e_key[4 * k_len + 26];
425 		b0[3] = io_swap(tbuf[3]) ^ e_key[4 * k_len + 27];
426 	} else {
427 	b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24];
428 	b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25];
429 	b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26];
430 	b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27];
431 	}
432 
433 	kp = d_key + 4 * (k_len + 5);
434 
435 	if(k_len > 6) {
436 		i_nround(b1, b0, kp); i_nround(b0, b1, kp);
437 	}
438 
439 	if(k_len > 4) {
440 		i_nround(b1, b0, kp); i_nround(b0, b1, kp);
441 	}
442 
443 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
444 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
445 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
446 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
447 	i_nround(b1, b0, kp); i_lround(b0, b1, kp);
448 
449 	if ((u_long)out_blk & 3) {
450 		tbuf[0] = io_swap(b0[0]);
451 		tbuf[1] = io_swap(b0[1]);
452 		tbuf[2] = io_swap(b0[2]);
453 		tbuf[3] = io_swap(b0[3]);
454 		bcopy(tbuf, out_blk, sizeof(tbuf));
455 	} else {
456 	out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
457 	out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
458 	}
459 }
460