xref: /plan9-contrib/sys/src/libsec/port/aes.c (revision c77838d532b0fe1e231546e9aada64c47acaab0b)
1 /*
2  * this code is derived from the following source,
3  * and modified to fit into the plan 9 libsec interface.
4  * most of the changes are confined to the top section,
5  * with the exception of converting Te4 and Td4 into u8 rather than u32 arrays.
6  *
7  * rijndael-alg-fst.c
8  *
9  * @version 3.0 (December 2000)
10  *
11  * Optimised ANSI C code for the Rijndael cipher (now AES)
12  *
13  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
14  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
15  * @author Paulo Barreto <paulo.barreto@terra.com.br>
16  *
17  * This code is hereby placed in the public domain.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <u.h>
32 #include <libc.h>
33 #include <mp.h>
34 #include <libsec.h>
35 
36 typedef uchar	u8;
37 typedef ulong	u32;
38 
39 #define FULL_UNROLL
40 #define const
41 
42 static const u32 Td0[256];
43 static const u32 Td1[256];
44 static const u32 Td2[256];
45 static const u32 Td3[256];
46 static const u8  Te4[256];
47 static uchar basekey[3][16] = {
48 	{
49 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
50 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
51 	},
52 	{
53 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
54 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
55 	},
56 	{
57 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
58 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
59 	},
60 };
61 
62 static int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
63 		int keyBits);
64 static int aes_setupDec(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
65 		int keyBits);
66 static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
67 		const uchar cipherKey[], int keyBits);
68 
69 void	aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
70 void	aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
71 
72 void
setupAESstate(AESstate * s,uchar key[],int keybytes,uchar * ivec)73 setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
74 {
75 	memset(s, 0, sizeof(*s));
76 	if(keybytes > AESmaxkey)
77 		keybytes = AESmaxkey;
78 	memmove(s->key, key, keybytes);
79 	s->keybytes = keybytes;
80 	s->ctrsz = 4;	/* default counter size from rfc3686 */
81 	s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
82 	if(ivec != nil)
83 		memmove(s->ivec, ivec, AESbsize);
84 	if(keybytes==16 || keybytes==24 || keybytes==32)
85 		s->setup = 0xcafebabe;
86 	/* else aes_setup was invalid */
87 }
88 
89 /*
90  * AES-XCBC-MAC-96 message authentication, per rfc3566.
91  */
92 
93 void
setupAESXCBCstate(AESstate * s)94 setupAESXCBCstate(AESstate *s)		/* was setupmac96 */
95 {
96 	int i, j;
97 	uint q[16 / sizeof(uint)];
98 	uchar *p;
99 
100 	assert(s->keybytes == 16);
101 	for(i = 0; i < 3; i++)
102 		aes_encrypt(s->ekey, s->rounds, basekey[i],
103 			s->mackey + AESbsize*i);
104 
105 	p = s->mackey;
106 	memset(q, 0, AESbsize);
107 
108 	/*
109 	 * put the in the right endian.  once figured, probably better
110 	 * to use some fcall macros.
111 	 * keys for encryption in local endianness for the algorithm...
112 	 * only key1 is used for encryption;
113 	 * BUG!!: I think this is what I got wrong.
114 	 */
115 	for(i = 0; i < 16 / sizeof(uint); i ++){
116 		for(j = 0; j < sizeof(uint); j++)
117 			q[i] |= p[sizeof(uint)-j-1] << 8*j;
118 		p += sizeof(uint);
119 	}
120 	memmove(s->mackey, q, 16);
121 }
122 
123 /*
124  * Not dealing with > 128-bit keys, not dealing with strange corner cases like
125  * empty message.  Should be fine for AES-XCBC-MAC-96.
126  */
127 uchar*
aesXCBCmac(uchar * p,int len,AESstate * s)128 aesXCBCmac(uchar *p, int len, AESstate *s)
129 {
130 	uchar *p2, *ip, *eip, *mackey;
131 	uchar q[AESbsize];
132 
133 	assert(s->keybytes == 16);	/* more complicated for bigger */
134 	memset(s->ivec, 0, AESbsize);	/* E[0] is 0+ */
135 
136 	for(; len > AESbsize; len -= AESbsize){
137 		memmove(q, p, AESbsize);
138 		p2 = q;
139 		ip = s->ivec;
140 		for(eip = ip + AESbsize; ip < eip; )
141 			*p2++ ^= *ip++;
142 		aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
143 		p += AESbsize;
144 	}
145 	/* the last one */
146 
147 	memmove(q, p, len);
148 	p2 = q+len;
149 	if(len == AESbsize)
150 		mackey = s->mackey + AESbsize;	/* k2 */
151 	else{
152 		mackey = s->mackey+2*AESbsize;	/* k3 */
153 		*p2++ = 1 << 7;			/* padding */
154 		len = AESbsize - len - 1;
155 		memset(p2, 0, len);
156 	}
157 
158 	ip = s->ivec;
159 	p2 = q;
160 	for(eip = ip + AESbsize; ip < eip; )
161 		*p2++ ^= *ip++ ^ *mackey++;
162 	aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
163 	return s->ivec;			/* only the 12 bytes leftmost */
164 }
165 
166 /*
167  * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
168  * Because of the way that non-multiple-of-16 buffers are handled,
169  * the decryptor must be fed buffers of the same size as the encryptor.
170  */
171 void
aesCBCencrypt(uchar * p,int len,AESstate * s)172 aesCBCencrypt(uchar *p, int len, AESstate *s)
173 {
174 	uchar *p2, *ip, *eip;
175 	uchar q[AESbsize];
176 
177 	for(; len >= AESbsize; len -= AESbsize){
178 		p2 = p;
179 		ip = s->ivec;
180 		for(eip = ip+AESbsize; ip < eip; )
181 			*p2++ ^= *ip++;
182 		aes_encrypt(s->ekey, s->rounds, p, q);
183 		memmove(s->ivec, q, AESbsize);
184 		memmove(p, q, AESbsize);
185 		p += AESbsize;
186 	}
187 
188 	if(len > 0){
189 		ip = s->ivec;
190 		aes_encrypt(s->ekey, s->rounds, ip, q);
191 		memmove(s->ivec, q, AESbsize);
192 		for(eip = ip+len; ip < eip; )
193 			*p++ ^= *ip++;
194 	}
195 }
196 
197 void
aesCBCdecrypt(uchar * p,int len,AESstate * s)198 aesCBCdecrypt(uchar *p, int len, AESstate *s)
199 {
200 	uchar *ip, *eip, *tp;
201 	uchar tmp[AESbsize], q[AESbsize];
202 
203 	for(; len >= AESbsize; len -= AESbsize){
204 		memmove(tmp, p, AESbsize);
205 		aes_decrypt(s->dkey, s->rounds, p, q);
206 		memmove(p, q, AESbsize);
207 		tp = tmp;
208 		ip = s->ivec;
209 		for(eip = ip+AESbsize; ip < eip; ){
210 			*p++ ^= *ip;
211 			*ip++ = *tp++;
212 		}
213 	}
214 
215 	if(len > 0){
216 		ip = s->ivec;
217 		aes_encrypt(s->ekey, s->rounds, ip, q);
218 		memmove(s->ivec, q, AESbsize);
219 		for(eip = ip+len; ip < eip; )
220 			*p++ ^= *ip++;
221 	}
222 }
223 
224 /*
225  * AES-CTR mode, per rfc3686.
226  * CTRs could be precalculated for efficiency
227  * and there would also be less back and forth mp
228  */
229 
230 static void
incrementCTR(uchar * p,uint ctrsz)231 incrementCTR(uchar *p, uint ctrsz)
232 {
233 	int len;
234 	ulong c;
235 	uchar *ctr;
236 	mpint *mpctr, *mpctrsz;
237 
238 	ctr = p + AESbsize - ctrsz;
239 	if(ctrsz == 4){
240 		/*
241 		 * If counter is 32 bits (as in rfc3686 and ssh2) there's
242 		 * no need to use extended precision.
243 		 */
244 		c = 1 + (ctr[0]<<24 | ctr[1]<<16 | ctr[2]<<8 | ctr[3]);
245 		ctr[0] = c>>24; ctr[1] = c>>16; ctr[2] = c>>8; ctr[3] = c;
246 		return;
247 	}
248 	mpctr = betomp(ctr, ctrsz, nil);
249 	mpctrsz = mpnew(ctrsz*8 + 1);
250 	mpleft(mpone, ctrsz*8, mpctrsz);
251 	mpadd(mpctr, mpone, mpctr);
252 	mpmod(mpctr, mpctrsz, mpctr);
253 	len = mptobe(mpctr, ctr, ctrsz, nil);
254 	assert(len == ctrsz);
255 	mpfree(mpctrsz);
256 	mpfree(mpctr);
257 }
258 
259 void
aesCTRencrypt(uchar * p,int len,AESstate * s)260 aesCTRencrypt(uchar *p, int len, AESstate *s)
261 {
262 	uchar q[AESbsize];
263 	uchar *ip, *eip, *ctr;
264 
265 	ctr = s->ivec;
266 	for(; len >= AESbsize; len -= AESbsize){
267 		ip = q;
268 		aes_encrypt(s->ekey, s->rounds, ctr, q);
269 		for(eip = p + AESbsize; p < eip; )
270 			*p++ ^= *ip++;
271 		incrementCTR(ctr, s->ctrsz);
272 	}
273 
274 	if(len > 0){
275 		ip = q;
276 		aes_encrypt(s->ekey, s->rounds, ctr, q);
277 		for(eip = p + len; p < eip; )
278 			*p++ ^= *ip++;
279 		incrementCTR(ctr, s->ctrsz);
280 	}
281 }
282 
283 void
aesCTRdecrypt(uchar * p,int len,AESstate * s)284 aesCTRdecrypt(uchar *p, int len, AESstate *s)
285 {
286 	aesCTRencrypt(p, len, s);
287 }
288 
289 
290 /* taken from sha1; TODO: verify suitability (esp. byte order) for aes */
291 /*
292  *	encodes input (ulong) into output (uchar). Assumes len is
293  *	a multiple of 4.
294  */
295 static void
encode(uchar * output,ulong * input,ulong len)296 encode(uchar *output, ulong *input, ulong len)
297 {
298 	ulong x;
299 	uchar *e;
300 
301 	for(e = output + len; output < e;) {
302 		x = *input++;
303 		*output++ = x >> 24;
304 		*output++ = x >> 16;
305 		*output++ = x >> 8;
306 		*output++ = x;
307 	}
308 }
309 
310 /* TODO: verify use of aes_encrypt here */
311 AEShstate*
aes(uchar * p,ulong len,uchar * digest,AEShstate * s)312 aes(uchar *p, ulong len, uchar *digest, AEShstate *s)
313 {
314 	uchar buf[128];
315 	ulong x[16];
316 	int i;
317 	uchar *e;
318 
319 	if(s == nil){
320 		s = malloc(sizeof(*s));
321 		if(s == nil)
322 			return nil;
323 		memset(s, 0, sizeof(*s));
324 		s->malloced = 1;
325 	}
326 
327 	if(s->seeded == 0){
328 		/* seed the state, these constants would look nicer big-endian */
329 		s->state[0] = 0x67452301;
330 		s->state[1] = 0xefcdab89;
331 		s->state[2] = 0x98badcfe;
332 		s->state[3] = 0x10325476;
333 		/* in sha1 (20-byte digest), but not md5 (16 bytes)*/
334 		s->state[4] = 0xc3d2e1f0;
335 		s->seeded = 1;
336 	}
337 
338 	/* fill out the partial 64 byte block from previous calls */
339 	if(s->blen){
340 		i = 64 - s->blen;
341 		if(len < i)
342 			i = len;
343 		memmove(s->buf + s->blen, p, i);
344 		len -= i;
345 		s->blen += i;
346 		p += i;
347 		if(s->blen == 64){
348 			/* encrypt s->buf into s->state */
349 			// _sha1block(s->buf, s->blen, s->state);
350 			aes_encrypt((ulong *)s->buf, 1, s->buf, (uchar *)s->state);
351 			s->len += s->blen;
352 			s->blen = 0;
353 		}
354 	}
355 
356 	/* do 64 byte blocks */
357 	i = len & ~0x3f;
358 	if(i){
359 		/* encrypt p into s->state */
360 		// _sha1block(p, i, s->state);
361 		aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
362 		s->len += i;
363 		len -= i;
364 		p += i;
365 	}
366 
367 	/* save the left overs if not last call */
368 	if(digest == 0){
369 		if(len){
370 			memmove(s->buf, p, len);
371 			s->blen += len;
372 		}
373 		return s;
374 	}
375 
376 	/*
377 	 *  this is the last time through, pad what's left with 0x80,
378 	 *  0's, and the input count to create a multiple of 64 bytes
379 	 */
380 	if(s->blen){
381 		p = s->buf;
382 		len = s->blen;
383 	} else {
384 		memmove(buf, p, len);
385 		p = buf;
386 	}
387 	s->len += len;
388 	e = p + len;
389 	if(len < 56)
390 		i = 56 - len;
391 	else
392 		i = 120 - len;
393 	memset(e, 0, i);
394 	*e = 0x80;
395 	len += i;
396 
397 	/* append the count */
398 	x[0] = s->len>>29;		/* byte-order dependent */
399 	x[1] = s->len<<3;
400 	encode(p+len, x, 8);
401 
402 	/* digest the last part */
403 	/* encrypt p into s->state */
404 	// _sha1block(p, len+8, s->state);
405 	aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
406 	s->len += len+8;		/* sha1: +8 */
407 
408 	/* return result and free state */
409 	encode((uchar *)digest, (ulong *)s->state, AESdlen);
410 	if(s->malloced == 1)
411 		free(s);
412 	return nil;
413 }
414 
415 DigestState*
hmac_aes(uchar * p,ulong len,uchar * key,ulong klen,uchar * digest,DigestState * s)416 hmac_aes(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest,
417 	DigestState *s)
418 {
419 	return hmac_x(p, len, key, klen, digest, s, aes, AESdlen);
420 }
421 
422 
423 
424 /*
425  * this function has been changed for plan 9.
426  * Expand the cipher key into the encryption and decryption key schedules.
427  *
428  * @return	the number of rounds for the given cipher key size.
429  */
430 static int
aes_setup(ulong erk[],ulong drk[],const uchar cipherKey[],int keyBits)431 aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
432 	const uchar cipherKey[], int keyBits)
433 {
434 	int Nr, i;
435 
436 	/* expand the cipher key: */
437 	Nr = aes_setupEnc(erk, cipherKey, keyBits);
438 
439 	/*
440 	 * invert the order of the round keys and apply the inverse MixColumn
441 	 * transform to all round keys but the first and the last
442 	 */
443 	drk[0       ] = erk[4*Nr    ];
444 	drk[1       ] = erk[4*Nr + 1];
445 	drk[2       ] = erk[4*Nr + 2];
446 	drk[3       ] = erk[4*Nr + 3];
447 	drk[4*Nr    ] = erk[0       ];
448 	drk[4*Nr + 1] = erk[1       ];
449 	drk[4*Nr + 2] = erk[2       ];
450 	drk[4*Nr + 3] = erk[3       ];
451 	erk += 4 * Nr;
452 	for (i = 1; i < Nr; i++) {
453 		drk += 4;
454 		erk -= 4;
455 		drk[0] =
456 		    Td0[Te4[(erk[0] >> 24)       ]] ^
457 		    Td1[Te4[(erk[0] >> 16) & 0xff]] ^
458 		    Td2[Te4[(erk[0] >>  8) & 0xff]] ^
459 		    Td3[Te4[(erk[0]      ) & 0xff]];
460 		drk[1] =
461 		    Td0[Te4[(erk[1] >> 24)       ]] ^
462 		    Td1[Te4[(erk[1] >> 16) & 0xff]] ^
463 		    Td2[Te4[(erk[1] >>  8) & 0xff]] ^
464 		    Td3[Te4[(erk[1]      ) & 0xff]];
465 		drk[2] =
466 		    Td0[Te4[(erk[2] >> 24)       ]] ^
467 		    Td1[Te4[(erk[2] >> 16) & 0xff]] ^
468 		    Td2[Te4[(erk[2] >>  8) & 0xff]] ^
469 		    Td3[Te4[(erk[2]      ) & 0xff]];
470 		drk[3] =
471 		    Td0[Te4[(erk[3] >> 24)       ]] ^
472 		    Td1[Te4[(erk[3] >> 16) & 0xff]] ^
473 		    Td2[Te4[(erk[3] >>  8) & 0xff]] ^
474 		    Td3[Te4[(erk[3]      ) & 0xff]];
475 	}
476 	return Nr;
477 }
478 
479 
480 /*
481 Te0[x] = S [x].[02, 01, 01, 03];
482 Te1[x] = S [x].[03, 02, 01, 01];
483 Te2[x] = S [x].[01, 03, 02, 01];
484 Te3[x] = S [x].[01, 01, 03, 02];
485 Te4[x] = S [x]
486 
487 Td0[x] = Si[x].[0e, 09, 0d, 0b];
488 Td1[x] = Si[x].[0b, 0e, 09, 0d];
489 Td2[x] = Si[x].[0d, 0b, 0e, 09];
490 Td3[x] = Si[x].[09, 0d, 0b, 0e];
491 Td4[x] = Si[x]
492 */
493 
494 static const u32 Te0[256] = {
495     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
496     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
497     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
498     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
499     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
500     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
501     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
502     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
503     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
504     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
505     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
506     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
507     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
508     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
509     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
510     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
511     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
512     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
513     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
514     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
515     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
516     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
517     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
518     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
519     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
520     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
521     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
522     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
523     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
524     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
525     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
526     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
527     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
528     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
529     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
530     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
531     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
532     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
533     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
534     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
535     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
536     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
537     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
538     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
539     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
540     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
541     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
542     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
543     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
544     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
545     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
546     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
547     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
548     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
549     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
550     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
551     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
552     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
553     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
554     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
555     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
556     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
557     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
558     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
559 };
560 static const u32 Te1[256] = {
561     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
562     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
563     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
564     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
565     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
566     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
567     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
568     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
569     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
570     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
571     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
572     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
573     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
574     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
575     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
576     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
577     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
578     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
579     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
580     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
581     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
582     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
583     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
584     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
585     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
586     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
587     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
588     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
589     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
590     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
591     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
592     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
593     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
594     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
595     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
596     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
597     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
598     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
599     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
600     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
601     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
602     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
603     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
604     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
605     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
606     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
607     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
608     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
609     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
610     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
611     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
612     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
613     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
614     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
615     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
616     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
617     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
618     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
619     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
620     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
621     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
622     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
623     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
624     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
625 };
626 static const u32 Te2[256] = {
627     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
628     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
629     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
630     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
631     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
632     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
633     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
634     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
635     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
636     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
637     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
638     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
639     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
640     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
641     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
642     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
643     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
644     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
645     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
646     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
647     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
648     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
649     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
650     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
651     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
652     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
653     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
654     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
655     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
656     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
657     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
658     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
659     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
660     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
661     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
662     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
663     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
664     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
665     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
666     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
667     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
668     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
669     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
670     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
671     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
672     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
673     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
674     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
675     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
676     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
677     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
678     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
679     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
680     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
681     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
682     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
683     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
684     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
685     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
686     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
687     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
688     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
689     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
690     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
691 };
692 static const u32 Te3[256] = {
693 
694     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
695     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
696     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
697     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
698     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
699     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
700     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
701     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
702     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
703     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
704     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
705     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
706     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
707     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
708     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
709     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
710     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
711     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
712     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
713     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
714     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
715     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
716     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
717     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
718     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
719     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
720     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
721     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
722     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
723     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
724     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
725     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
726     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
727     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
728     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
729     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
730     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
731     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
732     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
733     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
734     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
735     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
736     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
737     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
738     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
739     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
740     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
741     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
742     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
743     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
744     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
745     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
746     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
747     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
748     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
749     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
750     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
751     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
752     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
753     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
754     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
755     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
756     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
757     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
758 };
759 static const u8 Te4[256] = {
760     0x63U, 0x7cU, 0x77U, 0x7bU,
761     0xf2U, 0x6bU, 0x6fU, 0xc5U,
762     0x30U, 0x01U, 0x67U, 0x2bU,
763     0xfeU, 0xd7U, 0xabU, 0x76U,
764     0xcaU, 0x82U, 0xc9U, 0x7dU,
765     0xfaU, 0x59U, 0x47U, 0xf0U,
766     0xadU, 0xd4U, 0xa2U, 0xafU,
767     0x9cU, 0xa4U, 0x72U, 0xc0U,
768     0xb7U, 0xfdU, 0x93U, 0x26U,
769     0x36U, 0x3fU, 0xf7U, 0xccU,
770     0x34U, 0xa5U, 0xe5U, 0xf1U,
771     0x71U, 0xd8U, 0x31U, 0x15U,
772     0x04U, 0xc7U, 0x23U, 0xc3U,
773     0x18U, 0x96U, 0x05U, 0x9aU,
774     0x07U, 0x12U, 0x80U, 0xe2U,
775     0xebU, 0x27U, 0xb2U, 0x75U,
776     0x09U, 0x83U, 0x2cU, 0x1aU,
777     0x1bU, 0x6eU, 0x5aU, 0xa0U,
778     0x52U, 0x3bU, 0xd6U, 0xb3U,
779     0x29U, 0xe3U, 0x2fU, 0x84U,
780     0x53U, 0xd1U, 0x00U, 0xedU,
781     0x20U, 0xfcU, 0xb1U, 0x5bU,
782     0x6aU, 0xcbU, 0xbeU, 0x39U,
783     0x4aU, 0x4cU, 0x58U, 0xcfU,
784     0xd0U, 0xefU, 0xaaU, 0xfbU,
785     0x43U, 0x4dU, 0x33U, 0x85U,
786     0x45U, 0xf9U, 0x02U, 0x7fU,
787     0x50U, 0x3cU, 0x9fU, 0xa8U,
788     0x51U, 0xa3U, 0x40U, 0x8fU,
789     0x92U, 0x9dU, 0x38U, 0xf5U,
790     0xbcU, 0xb6U, 0xdaU, 0x21U,
791     0x10U, 0xffU, 0xf3U, 0xd2U,
792     0xcdU, 0x0cU, 0x13U, 0xecU,
793     0x5fU, 0x97U, 0x44U, 0x17U,
794     0xc4U, 0xa7U, 0x7eU, 0x3dU,
795     0x64U, 0x5dU, 0x19U, 0x73U,
796     0x60U, 0x81U, 0x4fU, 0xdcU,
797     0x22U, 0x2aU, 0x90U, 0x88U,
798     0x46U, 0xeeU, 0xb8U, 0x14U,
799     0xdeU, 0x5eU, 0x0bU, 0xdbU,
800     0xe0U, 0x32U, 0x3aU, 0x0aU,
801     0x49U, 0x06U, 0x24U, 0x5cU,
802     0xc2U, 0xd3U, 0xacU, 0x62U,
803     0x91U, 0x95U, 0xe4U, 0x79U,
804     0xe7U, 0xc8U, 0x37U, 0x6dU,
805     0x8dU, 0xd5U, 0x4eU, 0xa9U,
806     0x6cU, 0x56U, 0xf4U, 0xeaU,
807     0x65U, 0x7aU, 0xaeU, 0x08U,
808     0xbaU, 0x78U, 0x25U, 0x2eU,
809     0x1cU, 0xa6U, 0xb4U, 0xc6U,
810     0xe8U, 0xddU, 0x74U, 0x1fU,
811     0x4bU, 0xbdU, 0x8bU, 0x8aU,
812     0x70U, 0x3eU, 0xb5U, 0x66U,
813     0x48U, 0x03U, 0xf6U, 0x0eU,
814     0x61U, 0x35U, 0x57U, 0xb9U,
815     0x86U, 0xc1U, 0x1dU, 0x9eU,
816     0xe1U, 0xf8U, 0x98U, 0x11U,
817     0x69U, 0xd9U, 0x8eU, 0x94U,
818     0x9bU, 0x1eU, 0x87U, 0xe9U,
819     0xceU, 0x55U, 0x28U, 0xdfU,
820     0x8cU, 0xa1U, 0x89U, 0x0dU,
821     0xbfU, 0xe6U, 0x42U, 0x68U,
822     0x41U, 0x99U, 0x2dU, 0x0fU,
823     0xb0U, 0x54U, 0xbbU, 0x16U,
824 };
825 static const u32 Td0[256] = {
826     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
827     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
828     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
829     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
830     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
831     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
832     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
833     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
834     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
835     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
836     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
837     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
838     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
839     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
840     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
841     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
842     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
843     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
844     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
845     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
846     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
847     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
848     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
849     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
850     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
851     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
852     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
853     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
854     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
855     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
856     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
857     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
858     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
859     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
860     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
861     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
862     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
863     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
864     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
865     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
866     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
867     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
868     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
869     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
870     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
871     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
872     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
873     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
874     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
875     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
876     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
877     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
878     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
879     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
880     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
881     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
882     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
883     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
884     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
885     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
886     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
887     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
888     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
889     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
890 };
891 static const u32 Td1[256] = {
892     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
893     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
894     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
895     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
896     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
897     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
898     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
899     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
900     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
901     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
902     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
903     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
904     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
905     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
906     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
907     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
908     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
909     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
910     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
911     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
912     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
913     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
914     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
915     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
916     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
917     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
918     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
919     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
920     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
921     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
922     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
923     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
924     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
925     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
926     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
927     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
928     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
929     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
930     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
931     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
932     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
933     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
934     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
935     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
936     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
937     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
938     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
939     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
940     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
941     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
942     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
943     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
944     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
945     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
946     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
947     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
948     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
949     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
950     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
951     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
952     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
953     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
954     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
955     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
956 };
957 static const u32 Td2[256] = {
958     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
959     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
960     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
961     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
962     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
963     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
964     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
965     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
966     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
967     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
968     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
969     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
970     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
971     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
972     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
973     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
974     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
975     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
976     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
977     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
978 
979     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
980     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
981     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
982     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
983     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
984     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
985     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
986     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
987     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
988     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
989     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
990     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
991     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
992     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
993     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
994     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
995     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
996     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
997     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
998     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
999     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
1000     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
1001     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
1002     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
1003     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
1004     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
1005     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
1006     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
1007     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
1008     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
1009     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
1010     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
1011     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1012     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1013     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1014     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1015     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1016     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1017     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1018     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1019     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1020     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1021     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1022     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1023 };
1024 static const u32 Td3[256] = {
1025     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1026     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1027     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1028     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1029     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1030     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1031     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1032     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1033     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1034     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1035     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1036     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1037     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1038     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1039     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1040     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1041     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1042     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1043     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1044     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1045     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1046     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1047     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1048     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1049     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1050     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1051     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1052     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1053     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1054     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1055     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1056     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1057     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1058     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1059     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1060     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1061     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1062     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1063     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1064     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1065     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1066     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1067     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1068     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1069     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1070     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1071     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1072     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1073     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1074     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1075     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1076     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1077     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1078     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1079     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1080     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1081     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1082     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1083     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1084     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1085     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1086     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1087     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1088     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1089 };
1090 static const u8 Td4[256] = {
1091     0x52U, 0x09U, 0x6aU, 0xd5U,
1092     0x30U, 0x36U, 0xa5U, 0x38U,
1093     0xbfU, 0x40U, 0xa3U, 0x9eU,
1094     0x81U, 0xf3U, 0xd7U, 0xfbU,
1095     0x7cU, 0xe3U, 0x39U, 0x82U,
1096     0x9bU, 0x2fU, 0xffU, 0x87U,
1097     0x34U, 0x8eU, 0x43U, 0x44U,
1098     0xc4U, 0xdeU, 0xe9U, 0xcbU,
1099     0x54U, 0x7bU, 0x94U, 0x32U,
1100     0xa6U, 0xc2U, 0x23U, 0x3dU,
1101     0xeeU, 0x4cU, 0x95U, 0x0bU,
1102     0x42U, 0xfaU, 0xc3U, 0x4eU,
1103     0x08U, 0x2eU, 0xa1U, 0x66U,
1104     0x28U, 0xd9U, 0x24U, 0xb2U,
1105     0x76U, 0x5bU, 0xa2U, 0x49U,
1106     0x6dU, 0x8bU, 0xd1U, 0x25U,
1107     0x72U, 0xf8U, 0xf6U, 0x64U,
1108     0x86U, 0x68U, 0x98U, 0x16U,
1109     0xd4U, 0xa4U, 0x5cU, 0xccU,
1110     0x5dU, 0x65U, 0xb6U, 0x92U,
1111     0x6cU, 0x70U, 0x48U, 0x50U,
1112     0xfdU, 0xedU, 0xb9U, 0xdaU,
1113     0x5eU, 0x15U, 0x46U, 0x57U,
1114     0xa7U, 0x8dU, 0x9dU, 0x84U,
1115     0x90U, 0xd8U, 0xabU, 0x00U,
1116     0x8cU, 0xbcU, 0xd3U, 0x0aU,
1117     0xf7U, 0xe4U, 0x58U, 0x05U,
1118     0xb8U, 0xb3U, 0x45U, 0x06U,
1119     0xd0U, 0x2cU, 0x1eU, 0x8fU,
1120     0xcaU, 0x3fU, 0x0fU, 0x02U,
1121     0xc1U, 0xafU, 0xbdU, 0x03U,
1122     0x01U, 0x13U, 0x8aU, 0x6bU,
1123     0x3aU, 0x91U, 0x11U, 0x41U,
1124     0x4fU, 0x67U, 0xdcU, 0xeaU,
1125     0x97U, 0xf2U, 0xcfU, 0xceU,
1126     0xf0U, 0xb4U, 0xe6U, 0x73U,
1127     0x96U, 0xacU, 0x74U, 0x22U,
1128     0xe7U, 0xadU, 0x35U, 0x85U,
1129     0xe2U, 0xf9U, 0x37U, 0xe8U,
1130     0x1cU, 0x75U, 0xdfU, 0x6eU,
1131     0x47U, 0xf1U, 0x1aU, 0x71U,
1132     0x1dU, 0x29U, 0xc5U, 0x89U,
1133     0x6fU, 0xb7U, 0x62U, 0x0eU,
1134     0xaaU, 0x18U, 0xbeU, 0x1bU,
1135     0xfcU, 0x56U, 0x3eU, 0x4bU,
1136     0xc6U, 0xd2U, 0x79U, 0x20U,
1137     0x9aU, 0xdbU, 0xc0U, 0xfeU,
1138     0x78U, 0xcdU, 0x5aU, 0xf4U,
1139     0x1fU, 0xddU, 0xa8U, 0x33U,
1140     0x88U, 0x07U, 0xc7U, 0x31U,
1141     0xb1U, 0x12U, 0x10U, 0x59U,
1142     0x27U, 0x80U, 0xecU, 0x5fU,
1143     0x60U, 0x51U, 0x7fU, 0xa9U,
1144     0x19U, 0xb5U, 0x4aU, 0x0dU,
1145     0x2dU, 0xe5U, 0x7aU, 0x9fU,
1146     0x93U, 0xc9U, 0x9cU, 0xefU,
1147     0xa0U, 0xe0U, 0x3bU, 0x4dU,
1148     0xaeU, 0x2aU, 0xf5U, 0xb0U,
1149     0xc8U, 0xebU, 0xbbU, 0x3cU,
1150     0x83U, 0x53U, 0x99U, 0x61U,
1151     0x17U, 0x2bU, 0x04U, 0x7eU,
1152     0xbaU, 0x77U, 0xd6U, 0x26U,
1153     0xe1U, 0x69U, 0x14U, 0x63U,
1154     0x55U, 0x21U, 0x0cU, 0x7dU,
1155 };
1156 static const u32 rcon[] = {
1157 	0x01000000, 0x02000000, 0x04000000, 0x08000000,
1158 	0x10000000, 0x20000000, 0x40000000, 0x80000000,
1159 	0x1B000000, 0x36000000,
1160 	/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1161 };
1162 
1163 #define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
1164 		    ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
1165 #define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
1166 			 (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
1167 
1168 /*
1169  * Expand the cipher key into the encryption key schedule.
1170  *
1171  * @return	the number of rounds for the given cipher key size.
1172  */
1173 static int
aes_setupEnc(ulong rk[],const uchar cipherKey[],int keyBits)1174 aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
1175 {
1176 	int i = 0;
1177 	u32 temp;
1178 
1179 	rk[0] = GETU32(cipherKey     );
1180 	rk[1] = GETU32(cipherKey +  4);
1181 	rk[2] = GETU32(cipherKey +  8);
1182 	rk[3] = GETU32(cipherKey + 12);
1183 	if (keyBits == 128) {
1184 		for (;;) {
1185 			temp  = rk[3];
1186 			rk[4] = rk[0] ^
1187 				(Te4[(temp >> 16) & 0xff] << 24) ^
1188 				(Te4[(temp >>  8) & 0xff] << 16) ^
1189 				(Te4[(temp      ) & 0xff] <<  8) ^
1190 				(Te4[(temp >> 24)       ]      ) ^
1191 				rcon[i];
1192 			rk[5] = rk[1] ^ rk[4];
1193 			rk[6] = rk[2] ^ rk[5];
1194 			rk[7] = rk[3] ^ rk[6];
1195 			if (++i == 10) {
1196 				return 10;
1197 			}
1198 			rk += 4;
1199 		}
1200 	}
1201 	rk[4] = GETU32(cipherKey + 16);
1202 	rk[5] = GETU32(cipherKey + 20);
1203 	if (keyBits == 192) {
1204 		for (;;) {
1205 			temp = rk[ 5];
1206 			rk[ 6] = rk[ 0] ^
1207 				(Te4[(temp >> 16) & 0xff] << 24) ^
1208 				(Te4[(temp >>  8) & 0xff] << 16) ^
1209 				(Te4[(temp      ) & 0xff] <<  8) ^
1210 				(Te4[(temp >> 24)       ]      ) ^
1211 				rcon[i];
1212 			rk[ 7] = rk[ 1] ^ rk[ 6];
1213 			rk[ 8] = rk[ 2] ^ rk[ 7];
1214 			rk[ 9] = rk[ 3] ^ rk[ 8];
1215 			if (++i == 8) {
1216 				return 12;
1217 			}
1218 			rk[10] = rk[ 4] ^ rk[ 9];
1219 			rk[11] = rk[ 5] ^ rk[10];
1220 			rk += 6;
1221 		}
1222 	}
1223 	rk[6] = GETU32(cipherKey + 24);
1224 	rk[7] = GETU32(cipherKey + 28);
1225 	if (keyBits == 256) {
1226 	        for (;;) {
1227 	        	temp = rk[ 7];
1228 	        	rk[ 8] = rk[ 0] ^
1229 	        		(Te4[(temp >> 16) & 0xff] << 24) ^
1230 	        		(Te4[(temp >>  8) & 0xff] << 16) ^
1231 	        		(Te4[(temp      ) & 0xff] <<  8) ^
1232 	        		(Te4[(temp >> 24)       ]      ) ^
1233 	        		rcon[i];
1234 	        	rk[ 9] = rk[ 1] ^ rk[ 8];
1235 	        	rk[10] = rk[ 2] ^ rk[ 9];
1236 	        	rk[11] = rk[ 3] ^ rk[10];
1237 			if (++i == 7) {
1238 				return 14;
1239 			}
1240 	        	temp = rk[11];
1241 	        	rk[12] = rk[ 4] ^
1242 	        		(Te4[(temp >> 24)       ] << 24) ^
1243 	        		(Te4[(temp >> 16) & 0xff] << 16) ^
1244 	        		(Te4[(temp >>  8) & 0xff] <<  8) ^
1245 	        		(Te4[(temp      ) & 0xff]      );
1246 	        	rk[13] = rk[ 5] ^ rk[12];
1247 	        	rk[14] = rk[ 6] ^ rk[13];
1248 	        	rk[15] = rk[ 7] ^ rk[14];
1249 			rk += 8;
1250 	        }
1251 	}
1252 	return 0;
1253 }
1254 
1255 /**
1256  * Expand the cipher key into the decryption key schedule.
1257  *
1258  * @return	the number of rounds for the given cipher key size.
1259  */
1260 static int
aes_setupDec(ulong rk[],const uchar cipherKey[],int keyBits)1261 aes_setupDec(ulong rk[/* 4*(Nr + 1) */], const uchar cipherKey[], int keyBits)
1262 {
1263 	int Nr, i, j;
1264 	ulong temp;
1265 
1266 	/* expand the cipher key: */
1267 	Nr = aes_setupEnc(rk, cipherKey, keyBits);
1268 	/* invert the order of the round keys: */
1269 	for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
1270 		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1271 		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1272 		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1273 		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1274 	}
1275 	/*
1276 	 * apply the inverse MixColumn transform to all round keys
1277 	 * but the first and the last:
1278 	 */
1279 	for (i = 1; i < Nr; i++) {
1280 		rk += 4;
1281 		rk[0] =
1282 			Td0[Te4[(rk[0] >> 24)       ]] ^
1283 			Td1[Te4[(rk[0] >> 16) & 0xff]] ^
1284 			Td2[Te4[(rk[0] >>  8) & 0xff]] ^
1285 			Td3[Te4[(rk[0]      ) & 0xff]];
1286 		rk[1] =
1287 			Td0[Te4[(rk[1] >> 24)       ]] ^
1288 			Td1[Te4[(rk[1] >> 16) & 0xff]] ^
1289 			Td2[Te4[(rk[1] >>  8) & 0xff]] ^
1290 			Td3[Te4[(rk[1]      ) & 0xff]];
1291 		rk[2] =
1292 			Td0[Te4[(rk[2] >> 24)       ]] ^
1293 			Td1[Te4[(rk[2] >> 16) & 0xff]] ^
1294 			Td2[Te4[(rk[2] >>  8) & 0xff]] ^
1295 			Td3[Te4[(rk[2]      ) & 0xff]];
1296 		rk[3] =
1297 			Td0[Te4[(rk[3] >> 24)       ]] ^
1298 			Td1[Te4[(rk[3] >> 16) & 0xff]] ^
1299 			Td2[Te4[(rk[3] >>  8) & 0xff]] ^
1300 			Td3[Te4[(rk[3]      ) & 0xff]];
1301 	}
1302 	return Nr;
1303 }
1304 
1305 /* using round keys in rk, perform Nr rounds of encrypting pt into ct */
1306 void
aes_encrypt(const ulong rk[],int Nr,const uchar pt[16],uchar ct[16])1307 aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
1308 	uchar ct[16])
1309 {
1310 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
1311 #ifndef FULL_UNROLL
1312 	int r;
1313 #endif /* ?FULL_UNROLL */
1314 
1315 	/*
1316 	 * map byte array block to cipher state
1317 	 * and add initial round key:
1318 	 */
1319 	s0 = GETU32(pt     ) ^ rk[0];
1320 	s1 = GETU32(pt +  4) ^ rk[1];
1321 	s2 = GETU32(pt +  8) ^ rk[2];
1322 	s3 = GETU32(pt + 12) ^ rk[3];
1323 #ifdef FULL_UNROLL
1324 	/* round 1: */
1325    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1326    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1327    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1328    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1329    	/* round 2: */
1330    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1331    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1332    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1333    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1334 	/* round 3: */
1335    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1336    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1337    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1338    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1339    	/* round 4: */
1340    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1341    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1342    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1343    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1344 	/* round 5: */
1345    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1346    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1347    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1348    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1349    	/* round 6: */
1350    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1351    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1352    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1353    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1354 	/* round 7: */
1355    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1356    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1357    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1358    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1359    	/* round 8: */
1360    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1361    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1362    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1363    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1364 	/* round 9: */
1365    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1366    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1367    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1368    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1369 	if (Nr > 10) {
1370 		/* round 10: */
1371 		s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1372 		s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1373 		s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1374 		s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1375 		/* round 11: */
1376 		t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1377 		t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1378 		t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1379 		t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1380 		if (Nr > 12) {
1381 			/* round 12: */
1382 			s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1383 			s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1384 			s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1385 			s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1386 			/* round 13: */
1387 			t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1388 			t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1389 			t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1390 			t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1391 		}
1392 	}
1393 	rk += Nr << 2;
1394 #else					/* !FULL_UNROLL */
1395 	/*
1396 	 * Nr - 1 full rounds:
1397 	 */
1398 	r = Nr >> 1;
1399 	for (;;) {
1400 	        t0 =
1401 	            Te0[(s0 >> 24)       ] ^
1402 	            Te1[(s1 >> 16) & 0xff] ^
1403 	            Te2[(s2 >>  8) & 0xff] ^
1404 	            Te3[(s3      ) & 0xff] ^
1405 	            rk[4];
1406 	        t1 =
1407 	            Te0[(s1 >> 24)       ] ^
1408 	            Te1[(s2 >> 16) & 0xff] ^
1409 	            Te2[(s3 >>  8) & 0xff] ^
1410 	            Te3[(s0      ) & 0xff] ^
1411 	            rk[5];
1412 	        t2 =
1413 	            Te0[(s2 >> 24)       ] ^
1414 	            Te1[(s3 >> 16) & 0xff] ^
1415 	            Te2[(s0 >>  8) & 0xff] ^
1416 	            Te3[(s1      ) & 0xff] ^
1417 	            rk[6];
1418 	        t3 =
1419 	            Te0[(s3 >> 24)       ] ^
1420 	            Te1[(s0 >> 16) & 0xff] ^
1421 	            Te2[(s1 >>  8) & 0xff] ^
1422 	            Te3[(s2      ) & 0xff] ^
1423 	            rk[7];
1424 
1425 	        rk += 8;
1426 	        if (--r == 0)
1427 	            break;
1428 
1429 	        s0 =
1430 	            Te0[(t0 >> 24)       ] ^
1431 	            Te1[(t1 >> 16) & 0xff] ^
1432 	            Te2[(t2 >>  8) & 0xff] ^
1433 	            Te3[(t3      ) & 0xff] ^
1434 	            rk[0];
1435 	        s1 =
1436 	            Te0[(t1 >> 24)       ] ^
1437 	            Te1[(t2 >> 16) & 0xff] ^
1438 	            Te2[(t3 >>  8) & 0xff] ^
1439 	            Te3[(t0      ) & 0xff] ^
1440 	            rk[1];
1441 	        s2 =
1442 	            Te0[(t2 >> 24)       ] ^
1443 	            Te1[(t3 >> 16) & 0xff] ^
1444 	            Te2[(t0 >>  8) & 0xff] ^
1445 	            Te3[(t1      ) & 0xff] ^
1446 	            rk[2];
1447 	        s3 =
1448 	            Te0[(t3 >> 24)       ] ^
1449 	            Te1[(t0 >> 16) & 0xff] ^
1450 	            Te2[(t1 >>  8) & 0xff] ^
1451 	            Te3[(t2      ) & 0xff] ^
1452 	            rk[3];
1453 	}
1454 #endif					/* ?FULL_UNROLL */
1455 	/*
1456 	 * apply last round and
1457 	 * map cipher state to byte array block:
1458 	 */
1459 	s0 =
1460 		(Te4[(t0 >> 24)       ] << 24) ^
1461 		(Te4[(t1 >> 16) & 0xff] << 16) ^
1462 		(Te4[(t2 >>  8) & 0xff] <<  8) ^
1463 		(Te4[(t3      ) & 0xff]      ) ^
1464 		rk[0];
1465 	PUTU32(ct     , s0);
1466 	s1 =
1467 		(Te4[(t1 >> 24)       ] << 24) ^
1468 		(Te4[(t2 >> 16) & 0xff] << 16) ^
1469 		(Te4[(t3 >>  8) & 0xff] <<  8) ^
1470 		(Te4[(t0      ) & 0xff]      ) ^
1471 		rk[1];
1472 	PUTU32(ct +  4, s1);
1473 	s2 =
1474 		(Te4[(t2 >> 24)       ] << 24) ^
1475 		(Te4[(t3 >> 16) & 0xff] << 16) ^
1476 		(Te4[(t0 >>  8) & 0xff] <<  8) ^
1477 		(Te4[(t1      ) & 0xff]      ) ^
1478 		rk[2];
1479 	PUTU32(ct +  8, s2);
1480 	s3 =
1481 		(Te4[(t3 >> 24)       ] << 24) ^
1482 		(Te4[(t0 >> 16) & 0xff] << 16) ^
1483 		(Te4[(t1 >>  8) & 0xff] <<  8) ^
1484 		(Te4[(t2      ) & 0xff]      ) ^
1485 		rk[3];
1486 	PUTU32(ct + 12, s3);
1487 }
1488 
1489 void
aes_decrypt(const ulong rk[],int Nr,const uchar ct[16],uchar pt[16])1490 aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
1491 	uchar pt[16])
1492 {
1493 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
1494 #ifndef FULL_UNROLL
1495 	int r;
1496 #endif		/* ?FULL_UNROLL */
1497 
1498 	/*
1499 	 * map byte array block to cipher state
1500 	 * and add initial round key:
1501 	 */
1502     s0 = GETU32(ct     ) ^ rk[0];
1503     s1 = GETU32(ct +  4) ^ rk[1];
1504     s2 = GETU32(ct +  8) ^ rk[2];
1505     s3 = GETU32(ct + 12) ^ rk[3];
1506 #ifdef FULL_UNROLL
1507     /* round 1: */
1508     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1509     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1510     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1511     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1512     /* round 2: */
1513     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1514     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1515     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1516     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1517     /* round 3: */
1518     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1519     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1520     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1521     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1522     /* round 4: */
1523     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1524     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1525     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1526     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1527     /* round 5: */
1528     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1529     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1530     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1531     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1532     /* round 6: */
1533     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1534     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1535     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1536     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1537     /* round 7: */
1538     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1539     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1540     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1541     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1542     /* round 8: */
1543     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1544     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1545     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1546     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1547     /* round 9: */
1548     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1549     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1550     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1551     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1552     if (Nr > 10) {
1553         /* round 10: */
1554         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1555         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1556         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1557         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1558         /* round 11: */
1559         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1560         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1561         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1562         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1563         if (Nr > 12) {
1564             /* round 12: */
1565             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1566             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1567             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1568             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1569             /* round 13: */
1570             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1571             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1572             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1573             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1574         }
1575     }
1576     rk += Nr << 2;
1577 #else					/* !FULL_UNROLL */
1578     /*
1579      * Nr - 1 full rounds:
1580      */
1581     r = Nr >> 1;
1582     for (;;) {
1583         t0 =
1584             Td0[(s0 >> 24)       ] ^
1585             Td1[(s3 >> 16) & 0xff] ^
1586             Td2[(s2 >>  8) & 0xff] ^
1587             Td3[(s1      ) & 0xff] ^
1588             rk[4];
1589         t1 =
1590             Td0[(s1 >> 24)       ] ^
1591             Td1[(s0 >> 16) & 0xff] ^
1592             Td2[(s3 >>  8) & 0xff] ^
1593             Td3[(s2      ) & 0xff] ^
1594             rk[5];
1595         t2 =
1596             Td0[(s2 >> 24)       ] ^
1597             Td1[(s1 >> 16) & 0xff] ^
1598             Td2[(s0 >>  8) & 0xff] ^
1599             Td3[(s3      ) & 0xff] ^
1600             rk[6];
1601         t3 =
1602             Td0[(s3 >> 24)       ] ^
1603             Td1[(s2 >> 16) & 0xff] ^
1604             Td2[(s1 >>  8) & 0xff] ^
1605             Td3[(s0      ) & 0xff] ^
1606             rk[7];
1607 
1608         rk += 8;
1609         if (--r == 0)
1610             break;
1611 
1612         s0 =
1613             Td0[(t0 >> 24)       ] ^
1614             Td1[(t3 >> 16) & 0xff] ^
1615             Td2[(t2 >>  8) & 0xff] ^
1616             Td3[(t1      ) & 0xff] ^
1617             rk[0];
1618         s1 =
1619             Td0[(t1 >> 24)       ] ^
1620             Td1[(t0 >> 16) & 0xff] ^
1621             Td2[(t3 >>  8) & 0xff] ^
1622             Td3[(t2      ) & 0xff] ^
1623             rk[1];
1624         s2 =
1625             Td0[(t2 >> 24)       ] ^
1626             Td1[(t1 >> 16) & 0xff] ^
1627             Td2[(t0 >>  8) & 0xff] ^
1628             Td3[(t3      ) & 0xff] ^
1629             rk[2];
1630         s3 =
1631             Td0[(t3 >> 24)       ] ^
1632             Td1[(t2 >> 16) & 0xff] ^
1633             Td2[(t1 >>  8) & 0xff] ^
1634             Td3[(t0      ) & 0xff] ^
1635             rk[3];
1636     }
1637 #endif					/* ?FULL_UNROLL */
1638 	/*
1639 	 * apply last round and
1640 	 * map cipher state to byte array block:
1641 	 */
1642    	s0 =
1643    		(Td4[(t0 >> 24)       ] << 24) ^
1644    		(Td4[(t3 >> 16) & 0xff] << 16) ^
1645    		(Td4[(t2 >>  8) & 0xff] <<  8) ^
1646    		(Td4[(t1      ) & 0xff]      ) ^
1647    		rk[0];
1648 	PUTU32(pt     , s0);
1649    	s1 =
1650    		(Td4[(t1 >> 24)       ] << 24) ^
1651    		(Td4[(t0 >> 16) & 0xff] << 16) ^
1652    		(Td4[(t3 >>  8) & 0xff] <<  8) ^
1653    		(Td4[(t2      ) & 0xff]      ) ^
1654    		rk[1];
1655 	PUTU32(pt +  4, s1);
1656    	s2 =
1657    		(Td4[(t2 >> 24)       ] << 24) ^
1658    		(Td4[(t1 >> 16) & 0xff] << 16) ^
1659    		(Td4[(t0 >>  8) & 0xff] <<  8) ^
1660    		(Td4[(t3      ) & 0xff]      ) ^
1661    		rk[2];
1662 	PUTU32(pt +  8, s2);
1663    	s3 =
1664    		(Td4[(t3 >> 24)       ] << 24) ^
1665    		(Td4[(t2 >> 16) & 0xff] << 16) ^
1666    		(Td4[(t1 >>  8) & 0xff] <<  8) ^
1667    		(Td4[(t0      ) & 0xff]      ) ^
1668    		rk[3];
1669 	PUTU32(pt + 12, s3);
1670 }
1671 
1672 #ifdef INTERMEDIATE_VALUE_KAT
1673 
1674 static void
aes_encryptRound(const u32 rk[],int Nr,u8 block[16],int rounds)1675 aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1676 	int rounds)
1677 {
1678 	int r;
1679 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
1680 
1681 	/*
1682 	 * map byte array block to cipher state
1683 	 * and add initial round key:
1684 	 */
1685 	s0 = GETU32(block     ) ^ rk[0];
1686 	s1 = GETU32(block +  4) ^ rk[1];
1687 	s2 = GETU32(block +  8) ^ rk[2];
1688 	s3 = GETU32(block + 12) ^ rk[3];
1689 	rk += 4;
1690 
1691 	/*
1692 	 * Nr - 1 full rounds:
1693 	 */
1694 	for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
1695 		t0 =
1696 			Te0[(s0 >> 24)       ] ^
1697 			Te1[(s1 >> 16) & 0xff] ^
1698 			Te2[(s2 >>  8) & 0xff] ^
1699 			Te3[(s3      ) & 0xff] ^
1700 			rk[0];
1701 		t1 =
1702 			Te0[(s1 >> 24)       ] ^
1703 			Te1[(s2 >> 16) & 0xff] ^
1704 			Te2[(s3 >>  8) & 0xff] ^
1705 			Te3[(s0      ) & 0xff] ^
1706 			rk[1];
1707 		t2 =
1708 			Te0[(s2 >> 24)       ] ^
1709 			Te1[(s3 >> 16) & 0xff] ^
1710 			Te2[(s0 >>  8) & 0xff] ^
1711 			Te3[(s1      ) & 0xff] ^
1712 			rk[2];
1713 		t3 =
1714 			Te0[(s3 >> 24)       ] ^
1715 			Te1[(s0 >> 16) & 0xff] ^
1716 			Te2[(s1 >>  8) & 0xff] ^
1717 			Te3[(s2      ) & 0xff] ^
1718 			rk[3];
1719 		s0 = t0;
1720 		s1 = t1;
1721 		s2 = t2;
1722 		s3 = t3;
1723 		rk += 4;
1724 	}
1725 
1726 	/*
1727 	 * apply last round and
1728 	 * map cipher state to byte array block:
1729 	 */
1730 	if (rounds == Nr) {
1731 	    	t0 =
1732 	    		(Te4[(s0 >> 24)       ] << 24) ^
1733 	    		(Te4[(s1 >> 16) & 0xff] << 16) ^
1734 	    		(Te4[(s2 >>  8) & 0xff] <<  8) ^
1735 	    		(Te4[(s3      ) & 0xff]      ) ^
1736 	    		rk[0];
1737 	    	t1 =
1738 	    		(Te4[(s1 >> 24)       ] << 24) ^
1739 	    		(Te4[(s2 >> 16) & 0xff] << 16) ^
1740 	    		(Te4[(s3 >>  8) & 0xff] <<  8) ^
1741 	    		(Te4[(s0      ) & 0xff]      ) ^
1742 	    		rk[1];
1743 	    	t2 =
1744 	    		(Te4[(s2 >> 24)       ] << 24) ^
1745 	    		(Te4[(s3 >> 16) & 0xff] << 16) ^
1746 	    		(Te4[(s0 >>  8) & 0xff] <<  8) ^
1747 	    		(Te4[(s1      ) & 0xff]      ) ^
1748 	    		rk[2];
1749 	    	t3 =
1750 	    		(Te4[(s3 >> 24)       ] << 24) ^
1751 	    		(Te4[(s0 >> 16) & 0xff] << 16) ^
1752 	    		(Te4[(s1 >>  8) & 0xff] <<  8) ^
1753 	    		(Te4[(s2      ) & 0xff]      ) ^
1754 	    		rk[3];
1755 		s0 = t0;
1756 		s1 = t1;
1757 		s2 = t2;
1758 		s3 = t3;
1759 	}
1760 
1761 	PUTU32(block     , s0);
1762 	PUTU32(block +  4, s1);
1763 	PUTU32(block +  8, s2);
1764 	PUTU32(block + 12, s3);
1765 }
1766 
1767 static void
aes_decryptRound(const u32 rk[],int Nr,u8 block[16],int rounds)1768 aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1769 	int rounds)
1770 {
1771 	int r;
1772 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
1773 
1774 	/*
1775 	 * map byte array block to cipher state
1776 	 * and add initial round key:
1777 	 */
1778 	s0 = GETU32(block     ) ^ rk[0];
1779 	s1 = GETU32(block +  4) ^ rk[1];
1780 	s2 = GETU32(block +  8) ^ rk[2];
1781 	s3 = GETU32(block + 12) ^ rk[3];
1782 	rk += 4;
1783 
1784 	/*
1785 	 * Nr - 1 full rounds:
1786 	 */
1787 	for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
1788 		t0 =
1789 			Td0[(s0 >> 24)       ] ^
1790 			Td1[(s3 >> 16) & 0xff] ^
1791 			Td2[(s2 >>  8) & 0xff] ^
1792 			Td3[(s1      ) & 0xff] ^
1793 			rk[0];
1794 		t1 =
1795 			Td0[(s1 >> 24)       ] ^
1796 			Td1[(s0 >> 16) & 0xff] ^
1797 			Td2[(s3 >>  8) & 0xff] ^
1798 			Td3[(s2      ) & 0xff] ^
1799 			rk[1];
1800 		t2 =
1801 			Td0[(s2 >> 24)       ] ^
1802 			Td1[(s1 >> 16) & 0xff] ^
1803 			Td2[(s0 >>  8) & 0xff] ^
1804 			Td3[(s3      ) & 0xff] ^
1805 			rk[2];
1806 		t3 =
1807 			Td0[(s3 >> 24)       ] ^
1808 			Td1[(s2 >> 16) & 0xff] ^
1809 			Td2[(s1 >>  8) & 0xff] ^
1810 			Td3[(s0      ) & 0xff] ^
1811 			rk[3];
1812 
1813 		s0 = t0;
1814 		s1 = t1;
1815 		s2 = t2;
1816 		s3 = t3;
1817 		rk += 4;
1818 	}
1819 
1820 	/*
1821 	 * complete the last round and
1822 	 * map cipher state to byte array block:
1823 	 */
1824 	t0 =
1825 		(Td4[(s0 >> 24)       ] << 24) ^
1826 		(Td4[(s3 >> 16) & 0xff] << 16) ^
1827 		(Td4[(s2 >>  8) & 0xff] <<  8) ^
1828 		(Td4[(s1      ) & 0xff]      );
1829 	t1 =
1830 		(Td4[(s1 >> 24)       ] << 24) ^
1831 		(Td4[(s0 >> 16) & 0xff] << 16) ^
1832 		(Td4[(s3 >>  8) & 0xff] <<  8) ^
1833 		(Td4[(s2      ) & 0xff]      );
1834 	t2 =
1835 		(Td4[(s2 >> 24)       ] << 24) ^
1836 		(Td4[(s1 >> 16) & 0xff] << 16) ^
1837 		(Td4[(s0 >>  8) & 0xff] <<  8) ^
1838 		(Td4[(s3      ) & 0xff]      );
1839 	t3 =
1840 		(Td4[(s3 >> 24)       ] << 24) ^
1841 		(Td4[(s2 >> 16) & 0xff] << 16) ^
1842 		(Td4[(s1 >>  8) & 0xff] <<  8) ^
1843 		(Td4[(s0      ) & 0xff]      );
1844 
1845 	if (rounds == Nr) {
1846 		t0 ^= rk[0];
1847 		t1 ^= rk[1];
1848 		t2 ^= rk[2];
1849 		t3 ^= rk[3];
1850 	}
1851 
1852 	PUTU32(block     , t0);
1853 	PUTU32(block +  4, t1);
1854 	PUTU32(block +  8, t2);
1855 	PUTU32(block + 12, t3);
1856 }
1857 
1858 #endif			/* INTERMEDIATE_VALUE_KAT */
1859