xref: /openbsd-src/usr.bin/ssh/rijndael.c (revision b2ea75c1b17e1a9a339660e7ed45cd24946b230e)
1 /*	$OpenBSD: rijndael.c,v 1.8 2001/07/30 16:23:30 stevesk Exp $	*/
2 
3 /* This is an independent implementation of the encryption algorithm:   */
4 /*                                                                      */
5 /*         RIJNDAEL by Joan Daemen and Vincent Rijmen                   */
6 /*                                                                      */
7 /* which is a candidate algorithm in the Advanced Encryption Standard   */
8 /* programme of the US National Institute of Standards and Technology.  */
9 
10 /*
11    -----------------------------------------------------------------------
12    Copyright (c) 2001 Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK
13 
14    TERMS
15 
16    Redistribution and use in source and binary forms, with or without
17    modification, are permitted provided that the following conditions
18    are met:
19    1. Redistributions of source code must retain the above copyright
20       notice, this list of conditions and the following disclaimer.
21    2. Redistributions in binary form must reproduce the above copyright
22       notice, this list of conditions and the following disclaimer in the
23       documentation and/or other materials provided with the distribution.
24 
25    This software is provided 'as is' with no guarantees of correctness or
26    fitness for purpose.
27    -----------------------------------------------------------------------
28 */
29 
30 /* Timing data for Rijndael (rijndael.c)
31 
32 Algorithm: rijndael (rijndael.c)
33 
34 128 bit key:
35 Key Setup:    305/1389 cycles (encrypt/decrypt)
36 Encrypt:       374 cycles =    68.4 mbits/sec
37 Decrypt:       352 cycles =    72.7 mbits/sec
38 Mean:          363 cycles =    70.5 mbits/sec
39 
40 192 bit key:
41 Key Setup:    277/1595 cycles (encrypt/decrypt)
42 Encrypt:       439 cycles =    58.3 mbits/sec
43 Decrypt:       425 cycles =    60.2 mbits/sec
44 Mean:          432 cycles =    59.3 mbits/sec
45 
46 256 bit key:
47 Key Setup:    374/1960 cycles (encrypt/decrypt)
48 Encrypt:       502 cycles =    51.0 mbits/sec
49 Decrypt:       498 cycles =    51.4 mbits/sec
50 Mean:          500 cycles =    51.2 mbits/sec
51 
52 */
53 
54 #include <sys/types.h>
55 #include "rijndael.h"
56 
57 void gen_tabs	__P((void));
58 
59 /* 3. Basic macros for speeding up generic operations               */
60 
61 /* Circular rotate of 32 bit values                                 */
62 
63 #define rotr(x,n)   (((x) >> ((int)(n))) | ((x) << (32 - (int)(n))))
64 #define rotl(x,n)   (((x) << ((int)(n))) | ((x) >> (32 - (int)(n))))
65 
66 /* Invert byte order in a 32 bit variable                           */
67 
68 #define bswap(x)    ((rotl(x, 8) & 0x00ff00ff) | (rotr(x, 8) & 0xff00ff00))
69 
70 /* Extract byte from a 32 bit quantity (little endian notation)     */
71 
72 #define byte(x,n)   ((u1byte)((x) >> (8 * n)))
73 
74 #if BYTE_ORDER != LITTLE_ENDIAN
75 #define BYTE_SWAP
76 #endif
77 
78 #ifdef  BYTE_SWAP
79 #define io_swap(x)  bswap(x)
80 #else
81 #define io_swap(x)  (x)
82 #endif
83 
84 #define LARGE_TABLES
85 
86 u1byte  pow_tab[256];
87 u1byte  log_tab[256];
88 u1byte  sbx_tab[256];
89 u1byte  isb_tab[256];
90 u4byte  rco_tab[ 10];
91 u4byte  ft_tab[4][256];
92 u4byte  it_tab[4][256];
93 
94 #ifdef  LARGE_TABLES
95   u4byte  fl_tab[4][256];
96   u4byte  il_tab[4][256];
97 #endif
98 
99 u4byte  tab_gen = 0;
100 
101 #define ff_mult(a,b)    (a && b ? pow_tab[(log_tab[a] + log_tab[b]) % 255] : 0)
102 
103 #define f_rn(bo, bi, n, k)                          \
104     bo[n] =  ft_tab[0][byte(bi[n],0)] ^             \
105 	     ft_tab[1][byte(bi[(n + 1) & 3],1)] ^   \
106 	     ft_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
107 	     ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
108 
109 #define i_rn(bo, bi, n, k)                          \
110     bo[n] =  it_tab[0][byte(bi[n],0)] ^             \
111 	     it_tab[1][byte(bi[(n + 3) & 3],1)] ^   \
112 	     it_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
113 	     it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
114 
115 #ifdef LARGE_TABLES
116 
117 #define ls_box(x)                \
118     ( fl_tab[0][byte(x, 0)] ^    \
119       fl_tab[1][byte(x, 1)] ^    \
120       fl_tab[2][byte(x, 2)] ^    \
121       fl_tab[3][byte(x, 3)] )
122 
123 #define f_rl(bo, bi, n, k)                          \
124     bo[n] =  fl_tab[0][byte(bi[n],0)] ^             \
125 	     fl_tab[1][byte(bi[(n + 1) & 3],1)] ^   \
126 	     fl_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
127 	     fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n)
128 
129 #define i_rl(bo, bi, n, k)                          \
130     bo[n] =  il_tab[0][byte(bi[n],0)] ^             \
131 	     il_tab[1][byte(bi[(n + 3) & 3],1)] ^   \
132 	     il_tab[2][byte(bi[(n + 2) & 3],2)] ^   \
133 	     il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n)
134 
135 #else
136 
137 #define ls_box(x)                            \
138     ((u4byte)sbx_tab[byte(x, 0)] <<  0) ^    \
139     ((u4byte)sbx_tab[byte(x, 1)] <<  8) ^    \
140     ((u4byte)sbx_tab[byte(x, 2)] << 16) ^    \
141     ((u4byte)sbx_tab[byte(x, 3)] << 24)
142 
143 #define f_rl(bo, bi, n, k)                                      \
144     bo[n] = (u4byte)sbx_tab[byte(bi[n],0)] ^                    \
145 	rotl(((u4byte)sbx_tab[byte(bi[(n + 1) & 3],1)]),  8) ^  \
146 	rotl(((u4byte)sbx_tab[byte(bi[(n + 2) & 3],2)]), 16) ^  \
147 	rotl(((u4byte)sbx_tab[byte(bi[(n + 3) & 3],3)]), 24) ^ *(k + n)
148 
149 #define i_rl(bo, bi, n, k)                                      \
150     bo[n] = (u4byte)isb_tab[byte(bi[n],0)] ^                    \
151 	rotl(((u4byte)isb_tab[byte(bi[(n + 3) & 3],1)]),  8) ^  \
152 	rotl(((u4byte)isb_tab[byte(bi[(n + 2) & 3],2)]), 16) ^  \
153 	rotl(((u4byte)isb_tab[byte(bi[(n + 1) & 3],3)]), 24) ^ *(k + n)
154 
155 #endif
156 
157 void
158 gen_tabs(void)
159 {
160 	u4byte  i, t;
161 	u1byte  p, q;
162 
163 	/* log and power tables for GF(2**8) finite field with  */
164 	/* 0x11b as modular polynomial - the simplest prmitive  */
165 	/* root is 0x11, used here to generate the tables       */
166 
167 	for(i = 0,p = 1; i < 256; ++i) {
168 		pow_tab[i] = (u1byte)p; log_tab[p] = (u1byte)i;
169 
170 		p = p ^ (p << 1) ^ (p & 0x80 ? 0x01b : 0);
171 	}
172 
173 	log_tab[1] = 0; p = 1;
174 
175 	for(i = 0; i < 10; ++i) {
176 		rco_tab[i] = p;
177 
178 		p = (p << 1) ^ (p & 0x80 ? 0x1b : 0);
179 	}
180 
181 	/* note that the affine byte transformation matrix in   */
182 	/* rijndael specification is in big endian format with  */
183 	/* bit 0 as the most significant bit. In the remainder  */
184 	/* of the specification the bits are numbered from the  */
185 	/* least significant end of a byte.                     */
186 
187 	for(i = 0; i < 256; ++i) {
188 		p = (i ? pow_tab[255 - log_tab[i]] : 0); q = p;
189 		q = (q >> 7) | (q << 1); p ^= q;
190 		q = (q >> 7) | (q << 1); p ^= q;
191 		q = (q >> 7) | (q << 1); p ^= q;
192 		q = (q >> 7) | (q << 1); p ^= q ^ 0x63;
193 		sbx_tab[i] = (u1byte)p; isb_tab[p] = (u1byte)i;
194 	}
195 
196 	for(i = 0; i < 256; ++i) {
197 		p = sbx_tab[i];
198 
199 #ifdef  LARGE_TABLES
200 
201 		t = p; fl_tab[0][i] = t;
202 		fl_tab[1][i] = rotl(t,  8);
203 		fl_tab[2][i] = rotl(t, 16);
204 		fl_tab[3][i] = rotl(t, 24);
205 #endif
206 		t = ((u4byte)ff_mult(2, p)) |
207 			((u4byte)p <<  8) |
208 			((u4byte)p << 16) |
209 			((u4byte)ff_mult(3, p) << 24);
210 
211 		ft_tab[0][i] = t;
212 		ft_tab[1][i] = rotl(t,  8);
213 		ft_tab[2][i] = rotl(t, 16);
214 		ft_tab[3][i] = rotl(t, 24);
215 
216 		p = isb_tab[i];
217 
218 #ifdef  LARGE_TABLES
219 
220 		t = p; il_tab[0][i] = t;
221 		il_tab[1][i] = rotl(t,  8);
222 		il_tab[2][i] = rotl(t, 16);
223 		il_tab[3][i] = rotl(t, 24);
224 #endif
225 		t = ((u4byte)ff_mult(14, p)) |
226 			((u4byte)ff_mult( 9, p) <<  8) |
227 			((u4byte)ff_mult(13, p) << 16) |
228 			((u4byte)ff_mult(11, p) << 24);
229 
230 		it_tab[0][i] = t;
231 		it_tab[1][i] = rotl(t,  8);
232 		it_tab[2][i] = rotl(t, 16);
233 		it_tab[3][i] = rotl(t, 24);
234 	}
235 
236 	tab_gen = 1;
237 }
238 
239 #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
240 
241 #define imix_col(y,x)       \
242     u   = star_x(x);        \
243     v   = star_x(u);        \
244     w   = star_x(v);        \
245     t   = w ^ (x);          \
246    (y)  = u ^ v ^ w;        \
247    (y) ^= rotr(u ^ t,  8) ^ \
248 	  rotr(v ^ t, 16) ^ \
249 	  rotr(t,24)
250 
251 /* initialise the key schedule from the user supplied key   */
252 
253 #define loop4(i)                                    \
254 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
255     t ^= e_key[4 * i];     e_key[4 * i + 4] = t;    \
256     t ^= e_key[4 * i + 1]; e_key[4 * i + 5] = t;    \
257     t ^= e_key[4 * i + 2]; e_key[4 * i + 6] = t;    \
258     t ^= e_key[4 * i + 3]; e_key[4 * i + 7] = t;    \
259 }
260 
261 #define loop6(i)                                    \
262 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
263     t ^= e_key[6 * i];     e_key[6 * i + 6] = t;    \
264     t ^= e_key[6 * i + 1]; e_key[6 * i + 7] = t;    \
265     t ^= e_key[6 * i + 2]; e_key[6 * i + 8] = t;    \
266     t ^= e_key[6 * i + 3]; e_key[6 * i + 9] = t;    \
267     t ^= e_key[6 * i + 4]; e_key[6 * i + 10] = t;   \
268     t ^= e_key[6 * i + 5]; e_key[6 * i + 11] = t;   \
269 }
270 
271 #define loop8(i)                                    \
272 {   t = ls_box(rotr(t,  8)) ^ rco_tab[i];           \
273     t ^= e_key[8 * i];     e_key[8 * i + 8] = t;    \
274     t ^= e_key[8 * i + 1]; e_key[8 * i + 9] = t;    \
275     t ^= e_key[8 * i + 2]; e_key[8 * i + 10] = t;   \
276     t ^= e_key[8 * i + 3]; e_key[8 * i + 11] = t;   \
277     t  = e_key[8 * i + 4] ^ ls_box(t);              \
278     e_key[8 * i + 12] = t;                          \
279     t ^= e_key[8 * i + 5]; e_key[8 * i + 13] = t;   \
280     t ^= e_key[8 * i + 6]; e_key[8 * i + 14] = t;   \
281     t ^= e_key[8 * i + 7]; e_key[8 * i + 15] = t;   \
282 }
283 
284 rijndael_ctx *
285 rijndael_set_key(rijndael_ctx *ctx, const u4byte *in_key, const u4byte key_len,
286 		 int encrypt)
287 {
288 	u4byte  i, t, u, v, w;
289 	u4byte *e_key = ctx->e_key;
290 	u4byte *d_key = ctx->d_key;
291 
292 	ctx->decrypt = !encrypt;
293 
294 	if(!tab_gen)
295 		gen_tabs();
296 
297 	ctx->k_len = (key_len + 31) / 32;
298 
299 	e_key[0] = io_swap(in_key[0]); e_key[1] = io_swap(in_key[1]);
300 	e_key[2] = io_swap(in_key[2]); e_key[3] = io_swap(in_key[3]);
301 
302 	switch(ctx->k_len) {
303 	case 4: t = e_key[3];
304 		for(i = 0; i < 10; ++i)
305 			loop4(i);
306 		break;
307 
308 	case 6: e_key[4] = io_swap(in_key[4]); t = e_key[5] = io_swap(in_key[5]);
309 		for(i = 0; i < 8; ++i)
310 			loop6(i);
311 		break;
312 
313 	case 8: e_key[4] = io_swap(in_key[4]); e_key[5] = io_swap(in_key[5]);
314 		e_key[6] = io_swap(in_key[6]); t = e_key[7] = io_swap(in_key[7]);
315 		for(i = 0; i < 7; ++i)
316 			loop8(i);
317 		break;
318 	}
319 
320 	if (!encrypt) {
321 		d_key[0] = e_key[0]; d_key[1] = e_key[1];
322 		d_key[2] = e_key[2]; d_key[3] = e_key[3];
323 
324 		for(i = 4; i < 4 * ctx->k_len + 24; ++i) {
325 			imix_col(d_key[i], e_key[i]);
326 		}
327 	}
328 
329 	return ctx;
330 }
331 
332 /* encrypt a block of text  */
333 
334 #define f_nround(bo, bi, k) \
335     f_rn(bo, bi, 0, k);     \
336     f_rn(bo, bi, 1, k);     \
337     f_rn(bo, bi, 2, k);     \
338     f_rn(bo, bi, 3, k);     \
339     k += 4
340 
341 #define f_lround(bo, bi, k) \
342     f_rl(bo, bi, 0, k);     \
343     f_rl(bo, bi, 1, k);     \
344     f_rl(bo, bi, 2, k);     \
345     f_rl(bo, bi, 3, k)
346 
347 void
348 rijndael_encrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
349 {
350 	u4byte k_len = ctx->k_len;
351 	u4byte *e_key = ctx->e_key;
352 	u4byte  b0[4], b1[4], *kp;
353 
354 	b0[0] = io_swap(in_blk[0]) ^ e_key[0];
355 	b0[1] = io_swap(in_blk[1]) ^ e_key[1];
356 	b0[2] = io_swap(in_blk[2]) ^ e_key[2];
357 	b0[3] = io_swap(in_blk[3]) ^ e_key[3];
358 
359 	kp = e_key + 4;
360 
361 	if(k_len > 6) {
362 		f_nround(b1, b0, kp); f_nround(b0, b1, kp);
363 	}
364 
365 	if(k_len > 4) {
366 		f_nround(b1, b0, kp); f_nround(b0, b1, kp);
367 	}
368 
369 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
370 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
371 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
372 	f_nround(b1, b0, kp); f_nround(b0, b1, kp);
373 	f_nround(b1, b0, kp); f_lround(b0, b1, kp);
374 
375 	out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
376 	out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
377 }
378 
379 /* decrypt a block of text  */
380 
381 #define i_nround(bo, bi, k) \
382     i_rn(bo, bi, 0, k);     \
383     i_rn(bo, bi, 1, k);     \
384     i_rn(bo, bi, 2, k);     \
385     i_rn(bo, bi, 3, k);     \
386     k -= 4
387 
388 #define i_lround(bo, bi, k) \
389     i_rl(bo, bi, 0, k);     \
390     i_rl(bo, bi, 1, k);     \
391     i_rl(bo, bi, 2, k);     \
392     i_rl(bo, bi, 3, k)
393 
394 void
395 rijndael_decrypt(rijndael_ctx *ctx, const u4byte *in_blk, u4byte *out_blk)
396 {
397 	u4byte  b0[4], b1[4], *kp;
398 	u4byte k_len = ctx->k_len;
399 	u4byte *e_key = ctx->e_key;
400 	u4byte *d_key = ctx->d_key;
401 
402 	b0[0] = io_swap(in_blk[0]) ^ e_key[4 * k_len + 24];
403 	b0[1] = io_swap(in_blk[1]) ^ e_key[4 * k_len + 25];
404 	b0[2] = io_swap(in_blk[2]) ^ e_key[4 * k_len + 26];
405 	b0[3] = io_swap(in_blk[3]) ^ e_key[4 * k_len + 27];
406 
407 	kp = d_key + 4 * (k_len + 5);
408 
409 	if(k_len > 6) {
410 		i_nround(b1, b0, kp); i_nround(b0, b1, kp);
411 	}
412 
413 	if(k_len > 4) {
414 		i_nround(b1, b0, kp); i_nround(b0, b1, kp);
415 	}
416 
417 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
418 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
419 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
420 	i_nround(b1, b0, kp); i_nround(b0, b1, kp);
421 	i_nround(b1, b0, kp); i_lround(b0, b1, kp);
422 
423 	out_blk[0] = io_swap(b0[0]); out_blk[1] = io_swap(b0[1]);
424 	out_blk[2] = io_swap(b0[2]); out_blk[3] = io_swap(b0[3]);
425 }
426