xref: /plan9/sys/src/libsec/port/aes.c (revision e5442215b220c3108cd1c5cb2bf5923eb080fc50)
1 /*
2  * this code is derived from the following source,
3  * and modified to fit into the plan 9 libsec interface.
4  * most of the changes are confined to the top section,
5  * with the exception of converting Te4 and Td4 into u8 rather than u32 arrays.
6  *
7  * rijndael-alg-fst.c
8  *
9  * @version 3.0 (December 2000)
10  *
11  * Optimised ANSI C code for the Rijndael cipher (now AES)
12  *
13  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
14  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
15  * @author Paulo Barreto <paulo.barreto@terra.com.br>
16  *
17  * This code is hereby placed in the public domain.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
20  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <u.h>
32 #include <libc.h>
33 #include <mp.h>
34 #include <libsec.h>
35 
36 typedef uchar	u8;
37 typedef ulong	u32;
38 
39 #define FULL_UNROLL
40 #define const
41 
42 static const u32 Td0[256];
43 static const u32 Td1[256];
44 static const u32 Td2[256];
45 static const u32 Td3[256];
46 static const u8  Te4[256];
47 static uchar basekey[3][16] = {
48 	{
49 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
50 	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
51 	},
52 	{
53 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
54 	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
55 	},
56 	{
57 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
58 	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
59 	},
60 };
61 
62 static int aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
63 		int keyBits);
64 static int aes_setupDec(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[],
65 		int keyBits);
66 static int aes_setup(ulong erk[/*4*(Nr + 1)*/], ulong drk[/*4*(Nr + 1)*/],
67 		const uchar cipherKey[], int keyBits);
68 
69 void	aes_encrypt(const ulong rk[], int Nr, const uchar pt[16], uchar ct[16]);
70 void	aes_decrypt(const ulong rk[], int Nr, const uchar ct[16], uchar pt[16]);
71 
72 void
setupAESstate(AESstate * s,uchar key[],int keybytes,uchar * ivec)73 setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
74 {
75 	memset(s, 0, sizeof(*s));
76 	if(keybytes > AESmaxkey)
77 		keybytes = AESmaxkey;
78 	memmove(s->key, key, keybytes);
79 	s->keybytes = keybytes;
80 	s->rounds = aes_setup(s->ekey, s->dkey, s->key, keybytes * 8);
81 	if(ivec != nil)
82 		memmove(s->ivec, ivec, AESbsize);
83 	if(keybytes==16 || keybytes==24 || keybytes==32)
84 		s->setup = 0xcafebabe;
85 	/* else aes_setup was invalid */
86 }
87 
88 /*
89  * AES-XCBC-MAC-96 message authentication, per rfc3566.
90  */
91 
92 void
setupAESXCBCstate(AESstate * s)93 setupAESXCBCstate(AESstate *s)		/* was setupmac96 */
94 {
95 	int i, j;
96 	uint q[16 / sizeof(uint)];
97 	uchar *p;
98 
99 	assert(s->keybytes == 16);
100 	for(i = 0; i < 3; i++)
101 		aes_encrypt(s->ekey, s->rounds, basekey[i],
102 			s->mackey + AESbsize*i);
103 
104 	p = s->mackey;
105 	memset(q, 0, AESbsize);
106 
107 	/*
108 	 * put the in the right endian.  once figured, probably better
109 	 * to use some fcall macros.
110 	 * keys for encryption in local endianness for the algorithm...
111 	 * only key1 is used for encryption;
112 	 * BUG!!: I think this is what I got wrong.
113 	 */
114 	for(i = 0; i < 16 / sizeof(uint); i ++){
115 		for(j = 0; j < sizeof(uint); j++)
116 			q[i] |= p[sizeof(uint)-j-1] << 8*j;
117 		p += sizeof(uint);
118 	}
119 	memmove(s->mackey, q, 16);
120 }
121 
122 /*
123  * Not dealing with > 128-bit keys, not dealing with strange corner cases like
124  * empty message.  Should be fine for AES-XCBC-MAC-96.
125  */
126 uchar*
aesXCBCmac(uchar * p,int len,AESstate * s)127 aesXCBCmac(uchar *p, int len, AESstate *s)
128 {
129 	uchar *p2, *ip, *eip, *mackey;
130 	uchar q[AESbsize];
131 
132 	assert(s->keybytes == 16);	/* more complicated for bigger */
133 	memset(s->ivec, 0, AESbsize);	/* E[0] is 0+ */
134 
135 	for(; len > AESbsize; len -= AESbsize){
136 		memmove(q, p, AESbsize);
137 		p2 = q;
138 		ip = s->ivec;
139 		for(eip = ip + AESbsize; ip < eip; )
140 			*p2++ ^= *ip++;
141 		aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
142 		p += AESbsize;
143 	}
144 	/* the last one */
145 
146 	memmove(q, p, len);
147 	p2 = q+len;
148 	if(len == AESbsize)
149 		mackey = s->mackey + AESbsize;	/* k2 */
150 	else{
151 		mackey = s->mackey+2*AESbsize;	/* k3 */
152 		*p2++ = 1 << 7;			/* padding */
153 		len = AESbsize - len - 1;
154 		memset(p2, 0, len);
155 	}
156 
157 	ip = s->ivec;
158 	p2 = q;
159 	for(eip = ip + AESbsize; ip < eip; )
160 		*p2++ ^= *ip++ ^ *mackey++;
161 	aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
162 	return s->ivec;			/* only the 12 bytes leftmost */
163 }
164 
165 /*
166  * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
167  * Because of the way that non-multiple-of-16 buffers are handled,
168  * the decryptor must be fed buffers of the same size as the encryptor.
169  */
170 void
aesCBCencrypt(uchar * p,int len,AESstate * s)171 aesCBCencrypt(uchar *p, int len, AESstate *s)
172 {
173 	uchar *p2, *ip, *eip;
174 	uchar q[AESbsize];
175 
176 	for(; len >= AESbsize; len -= AESbsize){
177 		p2 = p;
178 		ip = s->ivec;
179 		for(eip = ip+AESbsize; ip < eip; )
180 			*p2++ ^= *ip++;
181 		aes_encrypt(s->ekey, s->rounds, p, q);
182 		memmove(s->ivec, q, AESbsize);
183 		memmove(p, q, AESbsize);
184 		p += AESbsize;
185 	}
186 
187 	if(len > 0){
188 		ip = s->ivec;
189 		aes_encrypt(s->ekey, s->rounds, ip, q);
190 		memmove(s->ivec, q, AESbsize);
191 		for(eip = ip+len; ip < eip; )
192 			*p++ ^= *ip++;
193 	}
194 }
195 
196 void
aesCBCdecrypt(uchar * p,int len,AESstate * s)197 aesCBCdecrypt(uchar *p, int len, AESstate *s)
198 {
199 	uchar *ip, *eip, *tp;
200 	uchar tmp[AESbsize], q[AESbsize];
201 
202 	for(; len >= AESbsize; len -= AESbsize){
203 		memmove(tmp, p, AESbsize);
204 		aes_decrypt(s->dkey, s->rounds, p, q);
205 		memmove(p, q, AESbsize);
206 		tp = tmp;
207 		ip = s->ivec;
208 		for(eip = ip+AESbsize; ip < eip; ){
209 			*p++ ^= *ip;
210 			*ip++ = *tp++;
211 		}
212 	}
213 
214 	if(len > 0){
215 		ip = s->ivec;
216 		aes_encrypt(s->ekey, s->rounds, ip, q);
217 		memmove(s->ivec, q, AESbsize);
218 		for(eip = ip+len; ip < eip; )
219 			*p++ ^= *ip++;
220 	}
221 }
222 
223 /*
224  * AES-CTR mode, per rfc3686.
225  * CTRs could be precalculated for efficiency
226  * and there would also be less back and forth mp
227  */
228 
229 static void
incrementCTR(uchar * p,uint ctrsz)230 incrementCTR(uchar *p, uint ctrsz)
231 {
232 	int len;
233 	uchar *ctr;
234 	mpint *mpctr, *mpctrsz;
235 
236 	ctr = p + AESbsize - ctrsz;
237 	mpctr = betomp(ctr, ctrsz, nil);
238 	mpctrsz = itomp(1 << (ctrsz*8), nil);
239 	mpadd(mpctr, mpone, mpctr);
240 	mpmod(mpctr, mpctrsz, mpctr);
241 	len = mptobe(mpctr, ctr, ctrsz, nil);
242 	assert(len == ctrsz);
243 	mpfree(mpctrsz);
244 	mpfree(mpctr);
245 }
246 
247 void
aesCTRencrypt(uchar * p,int len,AESstate * s)248 aesCTRencrypt(uchar *p, int len, AESstate *s)
249 {
250 	uchar q[AESbsize];
251 	uchar *ip, *eip, *ctr;
252 
253 	ctr = s->ivec;
254 	for(; len >= AESbsize; len -= AESbsize){
255 		ip = q;
256 		aes_encrypt(s->ekey, s->rounds, ctr, q);
257 		for(eip = p + AESbsize; p < eip; )
258 			*p++ ^= *ip++;
259 		incrementCTR(ctr, s->ctrsz);
260 	}
261 
262 	if(len > 0){
263 		ip = q;
264 		aes_encrypt(s->ekey, s->rounds, ctr, q);
265 		for(eip = p + len; p < eip; )
266 			*p++ ^= *ip++;
267 		incrementCTR(ctr, s->ctrsz);
268 	}
269 }
270 
271 void
aesCTRdecrypt(uchar * p,int len,AESstate * s)272 aesCTRdecrypt(uchar *p, int len, AESstate *s)
273 {
274 	aesCTRencrypt(p, len, s);
275 }
276 
277 
278 /* taken from sha1; TODO: verify suitability (esp. byte order) for aes */
279 /*
280  *	encodes input (ulong) into output (uchar). Assumes len is
281  *	a multiple of 4.
282  */
283 static void
encode(uchar * output,ulong * input,ulong len)284 encode(uchar *output, ulong *input, ulong len)
285 {
286 	ulong x;
287 	uchar *e;
288 
289 	for(e = output + len; output < e;) {
290 		x = *input++;
291 		*output++ = x >> 24;
292 		*output++ = x >> 16;
293 		*output++ = x >> 8;
294 		*output++ = x;
295 	}
296 }
297 
298 /* TODO: verify use of aes_encrypt here */
299 AEShstate*
aes(uchar * p,ulong len,uchar * digest,AEShstate * s)300 aes(uchar *p, ulong len, uchar *digest, AEShstate *s)
301 {
302 	uchar buf[128];
303 	ulong x[16];
304 	int i;
305 	uchar *e;
306 
307 	if(s == nil){
308 		s = malloc(sizeof(*s));
309 		if(s == nil)
310 			return nil;
311 		memset(s, 0, sizeof(*s));
312 		s->malloced = 1;
313 	}
314 
315 	if(s->seeded == 0){
316 		/* seed the state, these constants would look nicer big-endian */
317 		s->state[0] = 0x67452301;
318 		s->state[1] = 0xefcdab89;
319 		s->state[2] = 0x98badcfe;
320 		s->state[3] = 0x10325476;
321 		/* in sha1 (20-byte digest), but not md5 (16 bytes)*/
322 		s->state[4] = 0xc3d2e1f0;
323 		s->seeded = 1;
324 	}
325 
326 	/* fill out the partial 64 byte block from previous calls */
327 	if(s->blen){
328 		i = 64 - s->blen;
329 		if(len < i)
330 			i = len;
331 		memmove(s->buf + s->blen, p, i);
332 		len -= i;
333 		s->blen += i;
334 		p += i;
335 		if(s->blen == 64){
336 			/* encrypt s->buf into s->state */
337 			// _sha1block(s->buf, s->blen, s->state);
338 			aes_encrypt((ulong *)s->buf, 1, s->buf, (uchar *)s->state);
339 			s->len += s->blen;
340 			s->blen = 0;
341 		}
342 	}
343 
344 	/* do 64 byte blocks */
345 	i = len & ~0x3f;
346 	if(i){
347 		/* encrypt p into s->state */
348 		// _sha1block(p, i, s->state);
349 		aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
350 		s->len += i;
351 		len -= i;
352 		p += i;
353 	}
354 
355 	/* save the left overs if not last call */
356 	if(digest == 0){
357 		if(len){
358 			memmove(s->buf, p, len);
359 			s->blen += len;
360 		}
361 		return s;
362 	}
363 
364 	/*
365 	 *  this is the last time through, pad what's left with 0x80,
366 	 *  0's, and the input count to create a multiple of 64 bytes
367 	 */
368 	if(s->blen){
369 		p = s->buf;
370 		len = s->blen;
371 	} else {
372 		memmove(buf, p, len);
373 		p = buf;
374 	}
375 	s->len += len;
376 	e = p + len;
377 	if(len < 56)
378 		i = 56 - len;
379 	else
380 		i = 120 - len;
381 	memset(e, 0, i);
382 	*e = 0x80;
383 	len += i;
384 
385 	/* append the count */
386 	x[0] = s->len>>29;		/* byte-order dependent */
387 	x[1] = s->len<<3;
388 	encode(p+len, x, 8);
389 
390 	/* digest the last part */
391 	/* encrypt p into s->state */
392 	// _sha1block(p, len+8, s->state);
393 	aes_encrypt((ulong *)s->buf, 1, p, (uchar *)s->state);
394 	s->len += len+8;		/* sha1: +8 */
395 
396 	/* return result and free state */
397 	encode((uchar *)digest, (ulong *)s->state, AESdlen);
398 	if(s->malloced == 1)
399 		free(s);
400 	return nil;
401 }
402 
403 DigestState*
hmac_aes(uchar * p,ulong len,uchar * key,ulong klen,uchar * digest,DigestState * s)404 hmac_aes(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest,
405 	DigestState *s)
406 {
407 	return hmac_x(p, len, key, klen, digest, s, aes, AESdlen);
408 }
409 
410 
411 
412 /*
413  * this function has been changed for plan 9.
414  * Expand the cipher key into the encryption and decryption key schedules.
415  *
416  * @return	the number of rounds for the given cipher key size.
417  */
418 static int
aes_setup(ulong erk[],ulong drk[],const uchar cipherKey[],int keyBits)419 aes_setup(ulong erk[/* 4*(Nr + 1) */], ulong drk[/* 4*(Nr + 1) */],
420 	const uchar cipherKey[], int keyBits)
421 {
422 	int Nr, i;
423 
424 	/* expand the cipher key: */
425 	Nr = aes_setupEnc(erk, cipherKey, keyBits);
426 
427 	/*
428 	 * invert the order of the round keys and apply the inverse MixColumn
429 	 * transform to all round keys but the first and the last
430 	 */
431 	drk[0       ] = erk[4*Nr    ];
432 	drk[1       ] = erk[4*Nr + 1];
433 	drk[2       ] = erk[4*Nr + 2];
434 	drk[3       ] = erk[4*Nr + 3];
435 	drk[4*Nr    ] = erk[0       ];
436 	drk[4*Nr + 1] = erk[1       ];
437 	drk[4*Nr + 2] = erk[2       ];
438 	drk[4*Nr + 3] = erk[3       ];
439 	erk += 4 * Nr;
440 	for (i = 1; i < Nr; i++) {
441 		drk += 4;
442 		erk -= 4;
443 		drk[0] =
444 		    Td0[Te4[(erk[0] >> 24)       ]] ^
445 		    Td1[Te4[(erk[0] >> 16) & 0xff]] ^
446 		    Td2[Te4[(erk[0] >>  8) & 0xff]] ^
447 		    Td3[Te4[(erk[0]      ) & 0xff]];
448 		drk[1] =
449 		    Td0[Te4[(erk[1] >> 24)       ]] ^
450 		    Td1[Te4[(erk[1] >> 16) & 0xff]] ^
451 		    Td2[Te4[(erk[1] >>  8) & 0xff]] ^
452 		    Td3[Te4[(erk[1]      ) & 0xff]];
453 		drk[2] =
454 		    Td0[Te4[(erk[2] >> 24)       ]] ^
455 		    Td1[Te4[(erk[2] >> 16) & 0xff]] ^
456 		    Td2[Te4[(erk[2] >>  8) & 0xff]] ^
457 		    Td3[Te4[(erk[2]      ) & 0xff]];
458 		drk[3] =
459 		    Td0[Te4[(erk[3] >> 24)       ]] ^
460 		    Td1[Te4[(erk[3] >> 16) & 0xff]] ^
461 		    Td2[Te4[(erk[3] >>  8) & 0xff]] ^
462 		    Td3[Te4[(erk[3]      ) & 0xff]];
463 	}
464 	return Nr;
465 }
466 
467 
468 /*
469 Te0[x] = S [x].[02, 01, 01, 03];
470 Te1[x] = S [x].[03, 02, 01, 01];
471 Te2[x] = S [x].[01, 03, 02, 01];
472 Te3[x] = S [x].[01, 01, 03, 02];
473 Te4[x] = S [x]
474 
475 Td0[x] = Si[x].[0e, 09, 0d, 0b];
476 Td1[x] = Si[x].[0b, 0e, 09, 0d];
477 Td2[x] = Si[x].[0d, 0b, 0e, 09];
478 Td3[x] = Si[x].[09, 0d, 0b, 0e];
479 Td4[x] = Si[x]
480 */
481 
482 static const u32 Te0[256] = {
483     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
484     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
485     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
486     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
487     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
488     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
489     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
490     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
491     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
492     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
493     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
494     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
495     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
496     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
497     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
498     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
499     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
500     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
501     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
502     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
503     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
504     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
505     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
506     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
507     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
508     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
509     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
510     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
511     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
512     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
513     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
514     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
515     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
516     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
517     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
518     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
519     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
520     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
521     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
522     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
523     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
524     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
525     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
526     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
527     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
528     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
529     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
530     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
531     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
532     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
533     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
534     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
535     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
536     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
537     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
538     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
539     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
540     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
541     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
542     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
543     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
544     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
545     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
546     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
547 };
548 static const u32 Te1[256] = {
549     0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
550     0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
551     0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
552     0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
553     0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
554     0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
555     0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
556     0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
557     0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
558     0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
559     0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
560     0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
561     0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
562     0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
563     0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
564     0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
565     0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
566     0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
567     0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
568     0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
569     0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
570     0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
571     0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
572     0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
573     0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
574     0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
575     0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
576     0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
577     0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
578     0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
579     0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
580     0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
581     0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
582     0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
583     0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
584     0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
585     0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
586     0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
587     0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
588     0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
589     0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
590     0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
591     0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
592     0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
593     0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
594     0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
595     0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
596     0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
597     0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
598     0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
599     0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
600     0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
601     0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
602     0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
603     0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
604     0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
605     0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
606     0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
607     0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
608     0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
609     0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
610     0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
611     0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
612     0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
613 };
614 static const u32 Te2[256] = {
615     0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
616     0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
617     0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
618     0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
619     0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
620     0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
621     0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
622     0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
623     0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
624     0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
625     0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
626     0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
627     0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
628     0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
629     0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
630     0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
631     0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
632     0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
633     0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
634     0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
635     0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
636     0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
637     0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
638     0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
639     0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
640     0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
641     0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
642     0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
643     0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
644     0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
645     0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
646     0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
647     0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
648     0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
649     0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
650     0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
651     0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
652     0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
653     0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
654     0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
655     0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
656     0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
657     0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
658     0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
659     0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
660     0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
661     0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
662     0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
663     0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
664     0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
665     0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
666     0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
667     0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
668     0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
669     0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
670     0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
671     0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
672     0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
673     0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
674     0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
675     0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
676     0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
677     0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
678     0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
679 };
680 static const u32 Te3[256] = {
681 
682     0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
683     0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
684     0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
685     0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
686     0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
687     0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
688     0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
689     0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
690     0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
691     0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
692     0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
693     0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
694     0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
695     0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
696     0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
697     0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
698     0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
699     0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
700     0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
701     0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
702     0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
703     0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
704     0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
705     0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
706     0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
707     0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
708     0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
709     0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
710     0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
711     0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
712     0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
713     0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
714     0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
715     0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
716     0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
717     0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
718     0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
719     0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
720     0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
721     0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
722     0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
723     0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
724     0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
725     0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
726     0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
727     0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
728     0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
729     0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
730     0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
731     0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
732     0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
733     0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
734     0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
735     0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
736     0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
737     0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
738     0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
739     0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
740     0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
741     0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
742     0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
743     0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
744     0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
745     0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
746 };
747 static const u8 Te4[256] = {
748     0x63U, 0x7cU, 0x77U, 0x7bU,
749     0xf2U, 0x6bU, 0x6fU, 0xc5U,
750     0x30U, 0x01U, 0x67U, 0x2bU,
751     0xfeU, 0xd7U, 0xabU, 0x76U,
752     0xcaU, 0x82U, 0xc9U, 0x7dU,
753     0xfaU, 0x59U, 0x47U, 0xf0U,
754     0xadU, 0xd4U, 0xa2U, 0xafU,
755     0x9cU, 0xa4U, 0x72U, 0xc0U,
756     0xb7U, 0xfdU, 0x93U, 0x26U,
757     0x36U, 0x3fU, 0xf7U, 0xccU,
758     0x34U, 0xa5U, 0xe5U, 0xf1U,
759     0x71U, 0xd8U, 0x31U, 0x15U,
760     0x04U, 0xc7U, 0x23U, 0xc3U,
761     0x18U, 0x96U, 0x05U, 0x9aU,
762     0x07U, 0x12U, 0x80U, 0xe2U,
763     0xebU, 0x27U, 0xb2U, 0x75U,
764     0x09U, 0x83U, 0x2cU, 0x1aU,
765     0x1bU, 0x6eU, 0x5aU, 0xa0U,
766     0x52U, 0x3bU, 0xd6U, 0xb3U,
767     0x29U, 0xe3U, 0x2fU, 0x84U,
768     0x53U, 0xd1U, 0x00U, 0xedU,
769     0x20U, 0xfcU, 0xb1U, 0x5bU,
770     0x6aU, 0xcbU, 0xbeU, 0x39U,
771     0x4aU, 0x4cU, 0x58U, 0xcfU,
772     0xd0U, 0xefU, 0xaaU, 0xfbU,
773     0x43U, 0x4dU, 0x33U, 0x85U,
774     0x45U, 0xf9U, 0x02U, 0x7fU,
775     0x50U, 0x3cU, 0x9fU, 0xa8U,
776     0x51U, 0xa3U, 0x40U, 0x8fU,
777     0x92U, 0x9dU, 0x38U, 0xf5U,
778     0xbcU, 0xb6U, 0xdaU, 0x21U,
779     0x10U, 0xffU, 0xf3U, 0xd2U,
780     0xcdU, 0x0cU, 0x13U, 0xecU,
781     0x5fU, 0x97U, 0x44U, 0x17U,
782     0xc4U, 0xa7U, 0x7eU, 0x3dU,
783     0x64U, 0x5dU, 0x19U, 0x73U,
784     0x60U, 0x81U, 0x4fU, 0xdcU,
785     0x22U, 0x2aU, 0x90U, 0x88U,
786     0x46U, 0xeeU, 0xb8U, 0x14U,
787     0xdeU, 0x5eU, 0x0bU, 0xdbU,
788     0xe0U, 0x32U, 0x3aU, 0x0aU,
789     0x49U, 0x06U, 0x24U, 0x5cU,
790     0xc2U, 0xd3U, 0xacU, 0x62U,
791     0x91U, 0x95U, 0xe4U, 0x79U,
792     0xe7U, 0xc8U, 0x37U, 0x6dU,
793     0x8dU, 0xd5U, 0x4eU, 0xa9U,
794     0x6cU, 0x56U, 0xf4U, 0xeaU,
795     0x65U, 0x7aU, 0xaeU, 0x08U,
796     0xbaU, 0x78U, 0x25U, 0x2eU,
797     0x1cU, 0xa6U, 0xb4U, 0xc6U,
798     0xe8U, 0xddU, 0x74U, 0x1fU,
799     0x4bU, 0xbdU, 0x8bU, 0x8aU,
800     0x70U, 0x3eU, 0xb5U, 0x66U,
801     0x48U, 0x03U, 0xf6U, 0x0eU,
802     0x61U, 0x35U, 0x57U, 0xb9U,
803     0x86U, 0xc1U, 0x1dU, 0x9eU,
804     0xe1U, 0xf8U, 0x98U, 0x11U,
805     0x69U, 0xd9U, 0x8eU, 0x94U,
806     0x9bU, 0x1eU, 0x87U, 0xe9U,
807     0xceU, 0x55U, 0x28U, 0xdfU,
808     0x8cU, 0xa1U, 0x89U, 0x0dU,
809     0xbfU, 0xe6U, 0x42U, 0x68U,
810     0x41U, 0x99U, 0x2dU, 0x0fU,
811     0xb0U, 0x54U, 0xbbU, 0x16U,
812 };
813 static const u32 Td0[256] = {
814     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
815     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
816     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
817     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
818     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
819     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
820     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
821     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
822     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
823     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
824     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
825     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
826     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
827     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
828     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
829     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
830     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
831     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
832     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
833     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
834     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
835     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
836     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
837     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
838     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
839     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
840     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
841     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
842     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
843     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
844     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
845     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
846     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
847     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
848     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
849     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
850     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
851     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
852     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
853     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
854     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
855     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
856     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
857     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
858     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
859     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
860     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
861     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
862     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
863     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
864     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
865     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
866     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
867     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
868     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
869     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
870     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
871     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
872     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
873     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
874     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
875     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
876     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
877     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
878 };
879 static const u32 Td1[256] = {
880     0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
881     0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
882     0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
883     0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
884     0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
885     0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
886     0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
887     0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
888     0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
889     0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
890     0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
891     0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
892     0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
893     0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
894     0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
895     0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
896     0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
897     0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
898     0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
899     0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
900     0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
901     0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
902     0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
903     0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
904     0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
905     0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
906     0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
907     0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
908     0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
909     0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
910     0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
911     0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
912     0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
913     0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
914     0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
915     0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
916     0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
917     0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
918     0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
919     0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
920     0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
921     0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
922     0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
923     0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
924     0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
925     0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
926     0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
927     0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
928     0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
929     0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
930     0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
931     0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
932     0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
933     0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
934     0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
935     0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
936     0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
937     0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
938     0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
939     0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
940     0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
941     0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
942     0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
943     0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
944 };
945 static const u32 Td2[256] = {
946     0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
947     0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
948     0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
949     0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
950     0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
951     0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
952     0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
953     0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
954     0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
955     0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
956     0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
957     0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
958     0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
959     0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
960     0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
961     0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
962     0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
963     0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
964     0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
965     0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
966 
967     0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
968     0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
969     0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
970     0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
971     0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
972     0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
973     0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
974     0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
975     0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
976     0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
977     0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
978     0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
979     0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
980     0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
981     0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
982     0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
983     0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
984     0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
985     0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
986     0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
987     0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
988     0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
989     0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
990     0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
991     0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
992     0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
993     0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
994     0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
995     0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
996     0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
997     0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
998     0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
999     0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
1000     0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
1001     0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
1002     0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
1003     0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
1004     0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
1005     0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
1006     0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
1007     0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
1008     0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
1009     0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
1010     0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
1011 };
1012 static const u32 Td3[256] = {
1013     0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
1014     0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
1015     0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
1016     0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
1017     0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
1018     0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
1019     0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
1020     0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
1021     0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
1022     0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
1023     0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
1024     0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
1025     0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
1026     0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
1027     0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
1028     0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
1029     0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
1030     0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
1031     0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
1032     0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
1033     0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
1034     0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
1035     0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
1036     0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
1037     0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
1038     0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
1039     0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
1040     0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
1041     0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
1042     0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
1043     0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
1044     0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
1045     0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
1046     0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
1047     0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
1048     0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
1049     0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
1050     0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
1051     0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
1052     0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
1053     0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
1054     0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
1055     0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
1056     0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
1057     0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
1058     0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
1059     0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
1060     0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
1061     0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
1062     0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
1063     0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
1064     0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
1065     0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
1066     0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
1067     0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
1068     0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
1069     0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
1070     0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
1071     0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
1072     0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
1073     0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
1074     0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
1075     0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
1076     0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
1077 };
1078 static const u8 Td4[256] = {
1079     0x52U, 0x09U, 0x6aU, 0xd5U,
1080     0x30U, 0x36U, 0xa5U, 0x38U,
1081     0xbfU, 0x40U, 0xa3U, 0x9eU,
1082     0x81U, 0xf3U, 0xd7U, 0xfbU,
1083     0x7cU, 0xe3U, 0x39U, 0x82U,
1084     0x9bU, 0x2fU, 0xffU, 0x87U,
1085     0x34U, 0x8eU, 0x43U, 0x44U,
1086     0xc4U, 0xdeU, 0xe9U, 0xcbU,
1087     0x54U, 0x7bU, 0x94U, 0x32U,
1088     0xa6U, 0xc2U, 0x23U, 0x3dU,
1089     0xeeU, 0x4cU, 0x95U, 0x0bU,
1090     0x42U, 0xfaU, 0xc3U, 0x4eU,
1091     0x08U, 0x2eU, 0xa1U, 0x66U,
1092     0x28U, 0xd9U, 0x24U, 0xb2U,
1093     0x76U, 0x5bU, 0xa2U, 0x49U,
1094     0x6dU, 0x8bU, 0xd1U, 0x25U,
1095     0x72U, 0xf8U, 0xf6U, 0x64U,
1096     0x86U, 0x68U, 0x98U, 0x16U,
1097     0xd4U, 0xa4U, 0x5cU, 0xccU,
1098     0x5dU, 0x65U, 0xb6U, 0x92U,
1099     0x6cU, 0x70U, 0x48U, 0x50U,
1100     0xfdU, 0xedU, 0xb9U, 0xdaU,
1101     0x5eU, 0x15U, 0x46U, 0x57U,
1102     0xa7U, 0x8dU, 0x9dU, 0x84U,
1103     0x90U, 0xd8U, 0xabU, 0x00U,
1104     0x8cU, 0xbcU, 0xd3U, 0x0aU,
1105     0xf7U, 0xe4U, 0x58U, 0x05U,
1106     0xb8U, 0xb3U, 0x45U, 0x06U,
1107     0xd0U, 0x2cU, 0x1eU, 0x8fU,
1108     0xcaU, 0x3fU, 0x0fU, 0x02U,
1109     0xc1U, 0xafU, 0xbdU, 0x03U,
1110     0x01U, 0x13U, 0x8aU, 0x6bU,
1111     0x3aU, 0x91U, 0x11U, 0x41U,
1112     0x4fU, 0x67U, 0xdcU, 0xeaU,
1113     0x97U, 0xf2U, 0xcfU, 0xceU,
1114     0xf0U, 0xb4U, 0xe6U, 0x73U,
1115     0x96U, 0xacU, 0x74U, 0x22U,
1116     0xe7U, 0xadU, 0x35U, 0x85U,
1117     0xe2U, 0xf9U, 0x37U, 0xe8U,
1118     0x1cU, 0x75U, 0xdfU, 0x6eU,
1119     0x47U, 0xf1U, 0x1aU, 0x71U,
1120     0x1dU, 0x29U, 0xc5U, 0x89U,
1121     0x6fU, 0xb7U, 0x62U, 0x0eU,
1122     0xaaU, 0x18U, 0xbeU, 0x1bU,
1123     0xfcU, 0x56U, 0x3eU, 0x4bU,
1124     0xc6U, 0xd2U, 0x79U, 0x20U,
1125     0x9aU, 0xdbU, 0xc0U, 0xfeU,
1126     0x78U, 0xcdU, 0x5aU, 0xf4U,
1127     0x1fU, 0xddU, 0xa8U, 0x33U,
1128     0x88U, 0x07U, 0xc7U, 0x31U,
1129     0xb1U, 0x12U, 0x10U, 0x59U,
1130     0x27U, 0x80U, 0xecU, 0x5fU,
1131     0x60U, 0x51U, 0x7fU, 0xa9U,
1132     0x19U, 0xb5U, 0x4aU, 0x0dU,
1133     0x2dU, 0xe5U, 0x7aU, 0x9fU,
1134     0x93U, 0xc9U, 0x9cU, 0xefU,
1135     0xa0U, 0xe0U, 0x3bU, 0x4dU,
1136     0xaeU, 0x2aU, 0xf5U, 0xb0U,
1137     0xc8U, 0xebU, 0xbbU, 0x3cU,
1138     0x83U, 0x53U, 0x99U, 0x61U,
1139     0x17U, 0x2bU, 0x04U, 0x7eU,
1140     0xbaU, 0x77U, 0xd6U, 0x26U,
1141     0xe1U, 0x69U, 0x14U, 0x63U,
1142     0x55U, 0x21U, 0x0cU, 0x7dU,
1143 };
1144 static const u32 rcon[] = {
1145 	0x01000000, 0x02000000, 0x04000000, 0x08000000,
1146 	0x10000000, 0x20000000, 0x40000000, 0x80000000,
1147 	0x1B000000, 0x36000000,
1148 	/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
1149 };
1150 
1151 #define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
1152 		    ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
1153 #define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
1154 			 (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
1155 
1156 /*
1157  * Expand the cipher key into the encryption key schedule.
1158  *
1159  * @return	the number of rounds for the given cipher key size.
1160  */
1161 static int
aes_setupEnc(ulong rk[],const uchar cipherKey[],int keyBits)1162 aes_setupEnc(ulong rk[/*4*(Nr + 1)*/], const uchar cipherKey[], int keyBits)
1163 {
1164 	int i = 0;
1165 	u32 temp;
1166 
1167 	rk[0] = GETU32(cipherKey     );
1168 	rk[1] = GETU32(cipherKey +  4);
1169 	rk[2] = GETU32(cipherKey +  8);
1170 	rk[3] = GETU32(cipherKey + 12);
1171 	if (keyBits == 128) {
1172 		for (;;) {
1173 			temp  = rk[3];
1174 			rk[4] = rk[0] ^
1175 				(Te4[(temp >> 16) & 0xff] << 24) ^
1176 				(Te4[(temp >>  8) & 0xff] << 16) ^
1177 				(Te4[(temp      ) & 0xff] <<  8) ^
1178 				(Te4[(temp >> 24)       ]      ) ^
1179 				rcon[i];
1180 			rk[5] = rk[1] ^ rk[4];
1181 			rk[6] = rk[2] ^ rk[5];
1182 			rk[7] = rk[3] ^ rk[6];
1183 			if (++i == 10) {
1184 				return 10;
1185 			}
1186 			rk += 4;
1187 		}
1188 	}
1189 	rk[4] = GETU32(cipherKey + 16);
1190 	rk[5] = GETU32(cipherKey + 20);
1191 	if (keyBits == 192) {
1192 		for (;;) {
1193 			temp = rk[ 5];
1194 			rk[ 6] = rk[ 0] ^
1195 				(Te4[(temp >> 16) & 0xff] << 24) ^
1196 				(Te4[(temp >>  8) & 0xff] << 16) ^
1197 				(Te4[(temp      ) & 0xff] <<  8) ^
1198 				(Te4[(temp >> 24)       ]      ) ^
1199 				rcon[i];
1200 			rk[ 7] = rk[ 1] ^ rk[ 6];
1201 			rk[ 8] = rk[ 2] ^ rk[ 7];
1202 			rk[ 9] = rk[ 3] ^ rk[ 8];
1203 			if (++i == 8) {
1204 				return 12;
1205 			}
1206 			rk[10] = rk[ 4] ^ rk[ 9];
1207 			rk[11] = rk[ 5] ^ rk[10];
1208 			rk += 6;
1209 		}
1210 	}
1211 	rk[6] = GETU32(cipherKey + 24);
1212 	rk[7] = GETU32(cipherKey + 28);
1213 	if (keyBits == 256) {
1214 	        for (;;) {
1215 	        	temp = rk[ 7];
1216 	        	rk[ 8] = rk[ 0] ^
1217 	        		(Te4[(temp >> 16) & 0xff] << 24) ^
1218 	        		(Te4[(temp >>  8) & 0xff] << 16) ^
1219 	        		(Te4[(temp      ) & 0xff] <<  8) ^
1220 	        		(Te4[(temp >> 24)       ]      ) ^
1221 	        		rcon[i];
1222 	        	rk[ 9] = rk[ 1] ^ rk[ 8];
1223 	        	rk[10] = rk[ 2] ^ rk[ 9];
1224 	        	rk[11] = rk[ 3] ^ rk[10];
1225 			if (++i == 7) {
1226 				return 14;
1227 			}
1228 	        	temp = rk[11];
1229 	        	rk[12] = rk[ 4] ^
1230 	        		(Te4[(temp >> 24)       ] << 24) ^
1231 	        		(Te4[(temp >> 16) & 0xff] << 16) ^
1232 	        		(Te4[(temp >>  8) & 0xff] <<  8) ^
1233 	        		(Te4[(temp      ) & 0xff]      );
1234 	        	rk[13] = rk[ 5] ^ rk[12];
1235 	        	rk[14] = rk[ 6] ^ rk[13];
1236 	        	rk[15] = rk[ 7] ^ rk[14];
1237 			rk += 8;
1238 	        }
1239 	}
1240 	return 0;
1241 }
1242 
1243 /**
1244  * Expand the cipher key into the decryption key schedule.
1245  *
1246  * @return	the number of rounds for the given cipher key size.
1247  */
1248 static int
aes_setupDec(ulong rk[],const uchar cipherKey[],int keyBits)1249 aes_setupDec(ulong rk[/* 4*(Nr + 1) */], const uchar cipherKey[], int keyBits)
1250 {
1251 	int Nr, i, j;
1252 	ulong temp;
1253 
1254 	/* expand the cipher key: */
1255 	Nr = aes_setupEnc(rk, cipherKey, keyBits);
1256 	/* invert the order of the round keys: */
1257 	for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
1258 		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
1259 		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
1260 		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
1261 		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
1262 	}
1263 	/*
1264 	 * apply the inverse MixColumn transform to all round keys
1265 	 * but the first and the last:
1266 	 */
1267 	for (i = 1; i < Nr; i++) {
1268 		rk += 4;
1269 		rk[0] =
1270 			Td0[Te4[(rk[0] >> 24)       ]] ^
1271 			Td1[Te4[(rk[0] >> 16) & 0xff]] ^
1272 			Td2[Te4[(rk[0] >>  8) & 0xff]] ^
1273 			Td3[Te4[(rk[0]      ) & 0xff]];
1274 		rk[1] =
1275 			Td0[Te4[(rk[1] >> 24)       ]] ^
1276 			Td1[Te4[(rk[1] >> 16) & 0xff]] ^
1277 			Td2[Te4[(rk[1] >>  8) & 0xff]] ^
1278 			Td3[Te4[(rk[1]      ) & 0xff]];
1279 		rk[2] =
1280 			Td0[Te4[(rk[2] >> 24)       ]] ^
1281 			Td1[Te4[(rk[2] >> 16) & 0xff]] ^
1282 			Td2[Te4[(rk[2] >>  8) & 0xff]] ^
1283 			Td3[Te4[(rk[2]      ) & 0xff]];
1284 		rk[3] =
1285 			Td0[Te4[(rk[3] >> 24)       ]] ^
1286 			Td1[Te4[(rk[3] >> 16) & 0xff]] ^
1287 			Td2[Te4[(rk[3] >>  8) & 0xff]] ^
1288 			Td3[Te4[(rk[3]      ) & 0xff]];
1289 	}
1290 	return Nr;
1291 }
1292 
1293 /* using round keys in rk, perform Nr rounds of encrypting pt into ct */
1294 void
aes_encrypt(const ulong rk[],int Nr,const uchar pt[16],uchar ct[16])1295 aes_encrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar pt[16],
1296 	uchar ct[16])
1297 {
1298 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
1299 #ifndef FULL_UNROLL
1300 	int r;
1301 #endif /* ?FULL_UNROLL */
1302 
1303 	/*
1304 	 * map byte array block to cipher state
1305 	 * and add initial round key:
1306 	 */
1307 	s0 = GETU32(pt     ) ^ rk[0];
1308 	s1 = GETU32(pt +  4) ^ rk[1];
1309 	s2 = GETU32(pt +  8) ^ rk[2];
1310 	s3 = GETU32(pt + 12) ^ rk[3];
1311 #ifdef FULL_UNROLL
1312 	/* round 1: */
1313    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
1314    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
1315    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
1316    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
1317    	/* round 2: */
1318    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
1319    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
1320    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
1321    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
1322 	/* round 3: */
1323    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
1324    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
1325    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
1326    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
1327    	/* round 4: */
1328    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
1329    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
1330    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
1331    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
1332 	/* round 5: */
1333    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
1334    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
1335    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
1336    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
1337    	/* round 6: */
1338    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
1339    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
1340    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
1341    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
1342 	/* round 7: */
1343    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
1344    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
1345    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
1346    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
1347    	/* round 8: */
1348    	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
1349    	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
1350    	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
1351    	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
1352 	/* round 9: */
1353    	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
1354    	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
1355    	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
1356    	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
1357 	if (Nr > 10) {
1358 		/* round 10: */
1359 		s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
1360 		s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
1361 		s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
1362 		s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
1363 		/* round 11: */
1364 		t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
1365 		t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
1366 		t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
1367 		t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
1368 		if (Nr > 12) {
1369 			/* round 12: */
1370 			s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
1371 			s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
1372 			s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
1373 			s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
1374 			/* round 13: */
1375 			t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
1376 			t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
1377 			t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
1378 			t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
1379 		}
1380 	}
1381 	rk += Nr << 2;
1382 #else					/* !FULL_UNROLL */
1383 	/*
1384 	 * Nr - 1 full rounds:
1385 	 */
1386 	r = Nr >> 1;
1387 	for (;;) {
1388 	        t0 =
1389 	            Te0[(s0 >> 24)       ] ^
1390 	            Te1[(s1 >> 16) & 0xff] ^
1391 	            Te2[(s2 >>  8) & 0xff] ^
1392 	            Te3[(s3      ) & 0xff] ^
1393 	            rk[4];
1394 	        t1 =
1395 	            Te0[(s1 >> 24)       ] ^
1396 	            Te1[(s2 >> 16) & 0xff] ^
1397 	            Te2[(s3 >>  8) & 0xff] ^
1398 	            Te3[(s0      ) & 0xff] ^
1399 	            rk[5];
1400 	        t2 =
1401 	            Te0[(s2 >> 24)       ] ^
1402 	            Te1[(s3 >> 16) & 0xff] ^
1403 	            Te2[(s0 >>  8) & 0xff] ^
1404 	            Te3[(s1      ) & 0xff] ^
1405 	            rk[6];
1406 	        t3 =
1407 	            Te0[(s3 >> 24)       ] ^
1408 	            Te1[(s0 >> 16) & 0xff] ^
1409 	            Te2[(s1 >>  8) & 0xff] ^
1410 	            Te3[(s2      ) & 0xff] ^
1411 	            rk[7];
1412 
1413 	        rk += 8;
1414 	        if (--r == 0)
1415 	            break;
1416 
1417 	        s0 =
1418 	            Te0[(t0 >> 24)       ] ^
1419 	            Te1[(t1 >> 16) & 0xff] ^
1420 	            Te2[(t2 >>  8) & 0xff] ^
1421 	            Te3[(t3      ) & 0xff] ^
1422 	            rk[0];
1423 	        s1 =
1424 	            Te0[(t1 >> 24)       ] ^
1425 	            Te1[(t2 >> 16) & 0xff] ^
1426 	            Te2[(t3 >>  8) & 0xff] ^
1427 	            Te3[(t0      ) & 0xff] ^
1428 	            rk[1];
1429 	        s2 =
1430 	            Te0[(t2 >> 24)       ] ^
1431 	            Te1[(t3 >> 16) & 0xff] ^
1432 	            Te2[(t0 >>  8) & 0xff] ^
1433 	            Te3[(t1      ) & 0xff] ^
1434 	            rk[2];
1435 	        s3 =
1436 	            Te0[(t3 >> 24)       ] ^
1437 	            Te1[(t0 >> 16) & 0xff] ^
1438 	            Te2[(t1 >>  8) & 0xff] ^
1439 	            Te3[(t2      ) & 0xff] ^
1440 	            rk[3];
1441 	}
1442 #endif					/* ?FULL_UNROLL */
1443 	/*
1444 	 * apply last round and
1445 	 * map cipher state to byte array block:
1446 	 */
1447 	s0 =
1448 		(Te4[(t0 >> 24)       ] << 24) ^
1449 		(Te4[(t1 >> 16) & 0xff] << 16) ^
1450 		(Te4[(t2 >>  8) & 0xff] <<  8) ^
1451 		(Te4[(t3      ) & 0xff]      ) ^
1452 		rk[0];
1453 	PUTU32(ct     , s0);
1454 	s1 =
1455 		(Te4[(t1 >> 24)       ] << 24) ^
1456 		(Te4[(t2 >> 16) & 0xff] << 16) ^
1457 		(Te4[(t3 >>  8) & 0xff] <<  8) ^
1458 		(Te4[(t0      ) & 0xff]      ) ^
1459 		rk[1];
1460 	PUTU32(ct +  4, s1);
1461 	s2 =
1462 		(Te4[(t2 >> 24)       ] << 24) ^
1463 		(Te4[(t3 >> 16) & 0xff] << 16) ^
1464 		(Te4[(t0 >>  8) & 0xff] <<  8) ^
1465 		(Te4[(t1      ) & 0xff]      ) ^
1466 		rk[2];
1467 	PUTU32(ct +  8, s2);
1468 	s3 =
1469 		(Te4[(t3 >> 24)       ] << 24) ^
1470 		(Te4[(t0 >> 16) & 0xff] << 16) ^
1471 		(Te4[(t1 >>  8) & 0xff] <<  8) ^
1472 		(Te4[(t2      ) & 0xff]      ) ^
1473 		rk[3];
1474 	PUTU32(ct + 12, s3);
1475 }
1476 
1477 void
aes_decrypt(const ulong rk[],int Nr,const uchar ct[16],uchar pt[16])1478 aes_decrypt(const ulong rk[/* 4*(Nr + 1) */], int Nr, const uchar ct[16],
1479 	uchar pt[16])
1480 {
1481 	ulong s0, s1, s2, s3, t0, t1, t2, t3;
1482 #ifndef FULL_UNROLL
1483 	int r;
1484 #endif		/* ?FULL_UNROLL */
1485 
1486 	/*
1487 	 * map byte array block to cipher state
1488 	 * and add initial round key:
1489 	 */
1490     s0 = GETU32(ct     ) ^ rk[0];
1491     s1 = GETU32(ct +  4) ^ rk[1];
1492     s2 = GETU32(ct +  8) ^ rk[2];
1493     s3 = GETU32(ct + 12) ^ rk[3];
1494 #ifdef FULL_UNROLL
1495     /* round 1: */
1496     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
1497     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
1498     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
1499     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
1500     /* round 2: */
1501     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
1502     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
1503     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
1504     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
1505     /* round 3: */
1506     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
1507     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
1508     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
1509     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
1510     /* round 4: */
1511     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
1512     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
1513     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
1514     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
1515     /* round 5: */
1516     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
1517     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
1518     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
1519     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
1520     /* round 6: */
1521     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
1522     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
1523     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
1524     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
1525     /* round 7: */
1526     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
1527     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
1528     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
1529     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
1530     /* round 8: */
1531     s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
1532     s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
1533     s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
1534     s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
1535     /* round 9: */
1536     t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
1537     t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
1538     t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
1539     t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
1540     if (Nr > 10) {
1541         /* round 10: */
1542         s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
1543         s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
1544         s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
1545         s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
1546         /* round 11: */
1547         t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
1548         t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
1549         t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
1550         t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
1551         if (Nr > 12) {
1552             /* round 12: */
1553             s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
1554             s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
1555             s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
1556             s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
1557             /* round 13: */
1558             t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
1559             t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
1560             t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
1561             t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
1562         }
1563     }
1564     rk += Nr << 2;
1565 #else					/* !FULL_UNROLL */
1566     /*
1567      * Nr - 1 full rounds:
1568      */
1569     r = Nr >> 1;
1570     for (;;) {
1571         t0 =
1572             Td0[(s0 >> 24)       ] ^
1573             Td1[(s3 >> 16) & 0xff] ^
1574             Td2[(s2 >>  8) & 0xff] ^
1575             Td3[(s1      ) & 0xff] ^
1576             rk[4];
1577         t1 =
1578             Td0[(s1 >> 24)       ] ^
1579             Td1[(s0 >> 16) & 0xff] ^
1580             Td2[(s3 >>  8) & 0xff] ^
1581             Td3[(s2      ) & 0xff] ^
1582             rk[5];
1583         t2 =
1584             Td0[(s2 >> 24)       ] ^
1585             Td1[(s1 >> 16) & 0xff] ^
1586             Td2[(s0 >>  8) & 0xff] ^
1587             Td3[(s3      ) & 0xff] ^
1588             rk[6];
1589         t3 =
1590             Td0[(s3 >> 24)       ] ^
1591             Td1[(s2 >> 16) & 0xff] ^
1592             Td2[(s1 >>  8) & 0xff] ^
1593             Td3[(s0      ) & 0xff] ^
1594             rk[7];
1595 
1596         rk += 8;
1597         if (--r == 0)
1598             break;
1599 
1600         s0 =
1601             Td0[(t0 >> 24)       ] ^
1602             Td1[(t3 >> 16) & 0xff] ^
1603             Td2[(t2 >>  8) & 0xff] ^
1604             Td3[(t1      ) & 0xff] ^
1605             rk[0];
1606         s1 =
1607             Td0[(t1 >> 24)       ] ^
1608             Td1[(t0 >> 16) & 0xff] ^
1609             Td2[(t3 >>  8) & 0xff] ^
1610             Td3[(t2      ) & 0xff] ^
1611             rk[1];
1612         s2 =
1613             Td0[(t2 >> 24)       ] ^
1614             Td1[(t1 >> 16) & 0xff] ^
1615             Td2[(t0 >>  8) & 0xff] ^
1616             Td3[(t3      ) & 0xff] ^
1617             rk[2];
1618         s3 =
1619             Td0[(t3 >> 24)       ] ^
1620             Td1[(t2 >> 16) & 0xff] ^
1621             Td2[(t1 >>  8) & 0xff] ^
1622             Td3[(t0      ) & 0xff] ^
1623             rk[3];
1624     }
1625 #endif					/* ?FULL_UNROLL */
1626 	/*
1627 	 * apply last round and
1628 	 * map cipher state to byte array block:
1629 	 */
1630    	s0 =
1631    		(Td4[(t0 >> 24)       ] << 24) ^
1632    		(Td4[(t3 >> 16) & 0xff] << 16) ^
1633    		(Td4[(t2 >>  8) & 0xff] <<  8) ^
1634    		(Td4[(t1      ) & 0xff]      ) ^
1635    		rk[0];
1636 	PUTU32(pt     , s0);
1637    	s1 =
1638    		(Td4[(t1 >> 24)       ] << 24) ^
1639    		(Td4[(t0 >> 16) & 0xff] << 16) ^
1640    		(Td4[(t3 >>  8) & 0xff] <<  8) ^
1641    		(Td4[(t2      ) & 0xff]      ) ^
1642    		rk[1];
1643 	PUTU32(pt +  4, s1);
1644    	s2 =
1645    		(Td4[(t2 >> 24)       ] << 24) ^
1646    		(Td4[(t1 >> 16) & 0xff] << 16) ^
1647    		(Td4[(t0 >>  8) & 0xff] <<  8) ^
1648    		(Td4[(t3      ) & 0xff]      ) ^
1649    		rk[2];
1650 	PUTU32(pt +  8, s2);
1651    	s3 =
1652    		(Td4[(t3 >> 24)       ] << 24) ^
1653    		(Td4[(t2 >> 16) & 0xff] << 16) ^
1654    		(Td4[(t1 >>  8) & 0xff] <<  8) ^
1655    		(Td4[(t0      ) & 0xff]      ) ^
1656    		rk[3];
1657 	PUTU32(pt + 12, s3);
1658 }
1659 
1660 #ifdef INTERMEDIATE_VALUE_KAT
1661 
1662 static void
aes_encryptRound(const u32 rk[],int Nr,u8 block[16],int rounds)1663 aes_encryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1664 	int rounds)
1665 {
1666 	int r;
1667 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
1668 
1669 	/*
1670 	 * map byte array block to cipher state
1671 	 * and add initial round key:
1672 	 */
1673 	s0 = GETU32(block     ) ^ rk[0];
1674 	s1 = GETU32(block +  4) ^ rk[1];
1675 	s2 = GETU32(block +  8) ^ rk[2];
1676 	s3 = GETU32(block + 12) ^ rk[3];
1677 	rk += 4;
1678 
1679 	/*
1680 	 * Nr - 1 full rounds:
1681 	 */
1682 	for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
1683 		t0 =
1684 			Te0[(s0 >> 24)       ] ^
1685 			Te1[(s1 >> 16) & 0xff] ^
1686 			Te2[(s2 >>  8) & 0xff] ^
1687 			Te3[(s3      ) & 0xff] ^
1688 			rk[0];
1689 		t1 =
1690 			Te0[(s1 >> 24)       ] ^
1691 			Te1[(s2 >> 16) & 0xff] ^
1692 			Te2[(s3 >>  8) & 0xff] ^
1693 			Te3[(s0      ) & 0xff] ^
1694 			rk[1];
1695 		t2 =
1696 			Te0[(s2 >> 24)       ] ^
1697 			Te1[(s3 >> 16) & 0xff] ^
1698 			Te2[(s0 >>  8) & 0xff] ^
1699 			Te3[(s1      ) & 0xff] ^
1700 			rk[2];
1701 		t3 =
1702 			Te0[(s3 >> 24)       ] ^
1703 			Te1[(s0 >> 16) & 0xff] ^
1704 			Te2[(s1 >>  8) & 0xff] ^
1705 			Te3[(s2      ) & 0xff] ^
1706 			rk[3];
1707 		s0 = t0;
1708 		s1 = t1;
1709 		s2 = t2;
1710 		s3 = t3;
1711 		rk += 4;
1712 	}
1713 
1714 	/*
1715 	 * apply last round and
1716 	 * map cipher state to byte array block:
1717 	 */
1718 	if (rounds == Nr) {
1719 	    	t0 =
1720 	    		(Te4[(s0 >> 24)       ] << 24) ^
1721 	    		(Te4[(s1 >> 16) & 0xff] << 16) ^
1722 	    		(Te4[(s2 >>  8) & 0xff] <<  8) ^
1723 	    		(Te4[(s3      ) & 0xff]      ) ^
1724 	    		rk[0];
1725 	    	t1 =
1726 	    		(Te4[(s1 >> 24)       ] << 24) ^
1727 	    		(Te4[(s2 >> 16) & 0xff] << 16) ^
1728 	    		(Te4[(s3 >>  8) & 0xff] <<  8) ^
1729 	    		(Te4[(s0      ) & 0xff]      ) ^
1730 	    		rk[1];
1731 	    	t2 =
1732 	    		(Te4[(s2 >> 24)       ] << 24) ^
1733 	    		(Te4[(s3 >> 16) & 0xff] << 16) ^
1734 	    		(Te4[(s0 >>  8) & 0xff] <<  8) ^
1735 	    		(Te4[(s1      ) & 0xff]      ) ^
1736 	    		rk[2];
1737 	    	t3 =
1738 	    		(Te4[(s3 >> 24)       ] << 24) ^
1739 	    		(Te4[(s0 >> 16) & 0xff] << 16) ^
1740 	    		(Te4[(s1 >>  8) & 0xff] <<  8) ^
1741 	    		(Te4[(s2      ) & 0xff]      ) ^
1742 	    		rk[3];
1743 		s0 = t0;
1744 		s1 = t1;
1745 		s2 = t2;
1746 		s3 = t3;
1747 	}
1748 
1749 	PUTU32(block     , s0);
1750 	PUTU32(block +  4, s1);
1751 	PUTU32(block +  8, s2);
1752 	PUTU32(block + 12, s3);
1753 }
1754 
1755 static void
aes_decryptRound(const u32 rk[],int Nr,u8 block[16],int rounds)1756 aes_decryptRound(const u32 rk[/* 4*(Nr + 1) */], int Nr, u8 block[16],
1757 	int rounds)
1758 {
1759 	int r;
1760 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
1761 
1762 	/*
1763 	 * map byte array block to cipher state
1764 	 * and add initial round key:
1765 	 */
1766 	s0 = GETU32(block     ) ^ rk[0];
1767 	s1 = GETU32(block +  4) ^ rk[1];
1768 	s2 = GETU32(block +  8) ^ rk[2];
1769 	s3 = GETU32(block + 12) ^ rk[3];
1770 	rk += 4;
1771 
1772 	/*
1773 	 * Nr - 1 full rounds:
1774 	 */
1775 	for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
1776 		t0 =
1777 			Td0[(s0 >> 24)       ] ^
1778 			Td1[(s3 >> 16) & 0xff] ^
1779 			Td2[(s2 >>  8) & 0xff] ^
1780 			Td3[(s1      ) & 0xff] ^
1781 			rk[0];
1782 		t1 =
1783 			Td0[(s1 >> 24)       ] ^
1784 			Td1[(s0 >> 16) & 0xff] ^
1785 			Td2[(s3 >>  8) & 0xff] ^
1786 			Td3[(s2      ) & 0xff] ^
1787 			rk[1];
1788 		t2 =
1789 			Td0[(s2 >> 24)       ] ^
1790 			Td1[(s1 >> 16) & 0xff] ^
1791 			Td2[(s0 >>  8) & 0xff] ^
1792 			Td3[(s3      ) & 0xff] ^
1793 			rk[2];
1794 		t3 =
1795 			Td0[(s3 >> 24)       ] ^
1796 			Td1[(s2 >> 16) & 0xff] ^
1797 			Td2[(s1 >>  8) & 0xff] ^
1798 			Td3[(s0      ) & 0xff] ^
1799 			rk[3];
1800 
1801 		s0 = t0;
1802 		s1 = t1;
1803 		s2 = t2;
1804 		s3 = t3;
1805 		rk += 4;
1806 	}
1807 
1808 	/*
1809 	 * complete the last round and
1810 	 * map cipher state to byte array block:
1811 	 */
1812 	t0 =
1813 		(Td4[(s0 >> 24)       ] << 24) ^
1814 		(Td4[(s3 >> 16) & 0xff] << 16) ^
1815 		(Td4[(s2 >>  8) & 0xff] <<  8) ^
1816 		(Td4[(s1      ) & 0xff]      );
1817 	t1 =
1818 		(Td4[(s1 >> 24)       ] << 24) ^
1819 		(Td4[(s0 >> 16) & 0xff] << 16) ^
1820 		(Td4[(s3 >>  8) & 0xff] <<  8) ^
1821 		(Td4[(s2      ) & 0xff]      );
1822 	t2 =
1823 		(Td4[(s2 >> 24)       ] << 24) ^
1824 		(Td4[(s1 >> 16) & 0xff] << 16) ^
1825 		(Td4[(s0 >>  8) & 0xff] <<  8) ^
1826 		(Td4[(s3      ) & 0xff]      );
1827 	t3 =
1828 		(Td4[(s3 >> 24)       ] << 24) ^
1829 		(Td4[(s2 >> 16) & 0xff] << 16) ^
1830 		(Td4[(s1 >>  8) & 0xff] <<  8) ^
1831 		(Td4[(s0      ) & 0xff]      );
1832 
1833 	if (rounds == Nr) {
1834 		t0 ^= rk[0];
1835 		t1 ^= rk[1];
1836 		t2 ^= rk[2];
1837 		t3 ^= rk[3];
1838 	}
1839 
1840 	PUTU32(block     , t0);
1841 	PUTU32(block +  4, t1);
1842 	PUTU32(block +  8, t2);
1843 	PUTU32(block + 12, t3);
1844 }
1845 
1846 #endif			/* INTERMEDIATE_VALUE_KAT */
1847