xref: /dflybsd-src/sys/opencrypto/skipjack.c (revision 86d7f5d305c6adaa56ff4582ece9859d73106103)
186d7f5d3SJohn Marino /*	$FreeBSD: src/sys/opencrypto/skipjack.c,v 1.3 2005/01/07 02:29:16 imp Exp $	*/
286d7f5d3SJohn Marino /*	$OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $	*/
386d7f5d3SJohn Marino /*-
486d7f5d3SJohn Marino  * Further optimized test implementation of SKIPJACK algorithm
586d7f5d3SJohn Marino  * Mark Tillotson <markt@chaos.org.uk>, 25 June 98
686d7f5d3SJohn Marino  * Optimizations suit RISC (lots of registers) machine best.
786d7f5d3SJohn Marino  *
886d7f5d3SJohn Marino  * based on unoptimized implementation of
986d7f5d3SJohn Marino  * Panu Rissanen <bande@lut.fi> 960624
1086d7f5d3SJohn Marino  *
1186d7f5d3SJohn Marino  * SKIPJACK and KEA Algorithm Specifications
1286d7f5d3SJohn Marino  * Version 2.0
1386d7f5d3SJohn Marino  * 29 May 1998
1486d7f5d3SJohn Marino */
1586d7f5d3SJohn Marino 
1686d7f5d3SJohn Marino #include <sys/param.h>
1786d7f5d3SJohn Marino 
1886d7f5d3SJohn Marino #include <opencrypto/skipjack.h>
1986d7f5d3SJohn Marino 
2086d7f5d3SJohn Marino static const u_int8_t ftable[0x100] =
2186d7f5d3SJohn Marino {
2286d7f5d3SJohn Marino 	0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
2386d7f5d3SJohn Marino 	0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
2486d7f5d3SJohn Marino 	0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e,
2586d7f5d3SJohn Marino 	0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28,
2686d7f5d3SJohn Marino 	0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68,
2786d7f5d3SJohn Marino 	0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53,
2886d7f5d3SJohn Marino 	0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19,
2986d7f5d3SJohn Marino 	0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2,
3086d7f5d3SJohn Marino 	0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b,
3186d7f5d3SJohn Marino 	0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8,
3286d7f5d3SJohn Marino 	0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0,
3386d7f5d3SJohn Marino 	0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90,
3486d7f5d3SJohn Marino 	0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69,
3586d7f5d3SJohn Marino 	0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76,
3686d7f5d3SJohn Marino 	0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20,
3786d7f5d3SJohn Marino 	0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d,
3886d7f5d3SJohn Marino 	0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43,
3986d7f5d3SJohn Marino 	0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18,
4086d7f5d3SJohn Marino 	0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa,
4186d7f5d3SJohn Marino 	0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4,
4286d7f5d3SJohn Marino 	0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87,
4386d7f5d3SJohn Marino 	0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40,
4486d7f5d3SJohn Marino 	0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b,
4586d7f5d3SJohn Marino 	0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5,
4686d7f5d3SJohn Marino 	0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0,
4786d7f5d3SJohn Marino 	0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2,
4886d7f5d3SJohn Marino 	0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1,
4986d7f5d3SJohn Marino 	0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8,
5086d7f5d3SJohn Marino 	0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5,
5186d7f5d3SJohn Marino 	0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac,
5286d7f5d3SJohn Marino 	0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3,
5386d7f5d3SJohn Marino 	0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
5486d7f5d3SJohn Marino };
5586d7f5d3SJohn Marino 
5686d7f5d3SJohn Marino /*
5786d7f5d3SJohn Marino  * For each key byte generate a table to represent the function
5886d7f5d3SJohn Marino  *    ftable [in ^ keybyte]
5986d7f5d3SJohn Marino  *
6086d7f5d3SJohn Marino  * These tables used to save an XOR in each stage of the G-function
6186d7f5d3SJohn Marino  * the tables are hopefully pointed to by register allocated variables
6286d7f5d3SJohn Marino  * k0, k1..k9
6386d7f5d3SJohn Marino  */
6486d7f5d3SJohn Marino 
6586d7f5d3SJohn Marino void
subkey_table_gen(u_int8_t * key,u_int8_t ** key_tables)6686d7f5d3SJohn Marino subkey_table_gen (u_int8_t *key, u_int8_t **key_tables)
6786d7f5d3SJohn Marino {
6886d7f5d3SJohn Marino 	int i, k;
6986d7f5d3SJohn Marino 
7086d7f5d3SJohn Marino 	for (k = 0; k < 10; k++) {
7186d7f5d3SJohn Marino 		u_int8_t   key_byte = key [k];
7286d7f5d3SJohn Marino 		u_int8_t * table = key_tables[k];
7386d7f5d3SJohn Marino 		for (i = 0; i < 0x100; i++)
7486d7f5d3SJohn Marino 			table [i] = ftable [i ^ key_byte];
7586d7f5d3SJohn Marino 	}
7686d7f5d3SJohn Marino }
7786d7f5d3SJohn Marino 
7886d7f5d3SJohn Marino 
7986d7f5d3SJohn Marino #define g(k0, k1, k2, k3, ih, il, oh, ol) \
8086d7f5d3SJohn Marino { \
8186d7f5d3SJohn Marino 	oh = k##k0 [il] ^ ih; \
8286d7f5d3SJohn Marino 	ol = k##k1 [oh] ^ il; \
8386d7f5d3SJohn Marino 	oh = k##k2 [ol] ^ oh; \
8486d7f5d3SJohn Marino 	ol = k##k3 [oh] ^ ol; \
8586d7f5d3SJohn Marino }
8686d7f5d3SJohn Marino 
8786d7f5d3SJohn Marino #define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol)
8886d7f5d3SJohn Marino #define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol)
8986d7f5d3SJohn Marino #define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol)
9086d7f5d3SJohn Marino #define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol)
9186d7f5d3SJohn Marino #define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol)
9286d7f5d3SJohn Marino 
9386d7f5d3SJohn Marino 
9486d7f5d3SJohn Marino #define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \
9586d7f5d3SJohn Marino { \
9686d7f5d3SJohn Marino 	ol = k##k3 [ih] ^ il; \
9786d7f5d3SJohn Marino 	oh = k##k2 [ol] ^ ih; \
9886d7f5d3SJohn Marino 	ol = k##k1 [oh] ^ ol; \
9986d7f5d3SJohn Marino 	oh = k##k0 [ol] ^ oh; \
10086d7f5d3SJohn Marino }
10186d7f5d3SJohn Marino 
10286d7f5d3SJohn Marino 
10386d7f5d3SJohn Marino #define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol)
10486d7f5d3SJohn Marino #define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol)
10586d7f5d3SJohn Marino #define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol)
10686d7f5d3SJohn Marino #define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol)
10786d7f5d3SJohn Marino #define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol)
10886d7f5d3SJohn Marino 
10986d7f5d3SJohn Marino /* optimized version of Skipjack algorithm
11086d7f5d3SJohn Marino  *
11186d7f5d3SJohn Marino  * the appropriate g-function is inlined for each round
11286d7f5d3SJohn Marino  *
11386d7f5d3SJohn Marino  * the data movement is minimized by rotating the names of the
11486d7f5d3SJohn Marino  * variables w1..w4, not their contents (saves 3 moves per round)
11586d7f5d3SJohn Marino  *
11686d7f5d3SJohn Marino  * the loops are completely unrolled (needed to staticize choice of g)
11786d7f5d3SJohn Marino  *
11886d7f5d3SJohn Marino  * compiles to about 470 instructions on a Sparc (gcc -O)
11986d7f5d3SJohn Marino  * which is about 58 instructions per byte, 14 per round.
12086d7f5d3SJohn Marino  * gcc seems to leave in some unnecessary and with 0xFF operations
12186d7f5d3SJohn Marino  * but only in the latter part of the functions.  Perhaps it
12286d7f5d3SJohn Marino  * runs out of resources to properly optimize long inlined function?
12386d7f5d3SJohn Marino  * in theory should get about 11 instructions per round, not 14
12486d7f5d3SJohn Marino  */
12586d7f5d3SJohn Marino 
12686d7f5d3SJohn Marino void
skipjack_forwards(u_int8_t * plain,u_int8_t * cipher,u_int8_t ** key_tables)12786d7f5d3SJohn Marino skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables)
12886d7f5d3SJohn Marino {
12986d7f5d3SJohn Marino 	u_int8_t wh1 = plain[0];  u_int8_t wl1 = plain[1];
13086d7f5d3SJohn Marino 	u_int8_t wh2 = plain[2];  u_int8_t wl2 = plain[3];
13186d7f5d3SJohn Marino 	u_int8_t wh3 = plain[4];  u_int8_t wl3 = plain[5];
13286d7f5d3SJohn Marino 	u_int8_t wh4 = plain[6];  u_int8_t wl4 = plain[7];
13386d7f5d3SJohn Marino 
13486d7f5d3SJohn Marino 	u_int8_t * k0 = key_tables [0];
13586d7f5d3SJohn Marino 	u_int8_t * k1 = key_tables [1];
13686d7f5d3SJohn Marino 	u_int8_t * k2 = key_tables [2];
13786d7f5d3SJohn Marino 	u_int8_t * k3 = key_tables [3];
13886d7f5d3SJohn Marino 	u_int8_t * k4 = key_tables [4];
13986d7f5d3SJohn Marino 	u_int8_t * k5 = key_tables [5];
14086d7f5d3SJohn Marino 	u_int8_t * k6 = key_tables [6];
14186d7f5d3SJohn Marino 	u_int8_t * k7 = key_tables [7];
14286d7f5d3SJohn Marino 	u_int8_t * k8 = key_tables [8];
14386d7f5d3SJohn Marino 	u_int8_t * k9 = key_tables [9];
14486d7f5d3SJohn Marino 
14586d7f5d3SJohn Marino 	/* first 8 rounds */
14686d7f5d3SJohn Marino 	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1;
14786d7f5d3SJohn Marino 	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4;
14886d7f5d3SJohn Marino 	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3;
14986d7f5d3SJohn Marino 	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2;
15086d7f5d3SJohn Marino 	g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1;
15186d7f5d3SJohn Marino 	g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4;
15286d7f5d3SJohn Marino 	g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3;
15386d7f5d3SJohn Marino 	g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2;
15486d7f5d3SJohn Marino 
15586d7f5d3SJohn Marino 	/* second 8 rounds */
15686d7f5d3SJohn Marino 	wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1);
15786d7f5d3SJohn Marino 	wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4);
15886d7f5d3SJohn Marino 	wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3);
15986d7f5d3SJohn Marino 	wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2);
16086d7f5d3SJohn Marino 	wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1);
16186d7f5d3SJohn Marino 	wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4);
16286d7f5d3SJohn Marino 	wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3);
16386d7f5d3SJohn Marino 	wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2);
16486d7f5d3SJohn Marino 
16586d7f5d3SJohn Marino 	/* third 8 rounds */
16686d7f5d3SJohn Marino 	g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1;
16786d7f5d3SJohn Marino 	g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4;
16886d7f5d3SJohn Marino 	g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3;
16986d7f5d3SJohn Marino 	g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2;
17086d7f5d3SJohn Marino 	g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1;
17186d7f5d3SJohn Marino 	g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4;
17286d7f5d3SJohn Marino 	g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3;
17386d7f5d3SJohn Marino 	g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2;
17486d7f5d3SJohn Marino 
17586d7f5d3SJohn Marino 	/* last 8 rounds */
17686d7f5d3SJohn Marino 	wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1);
17786d7f5d3SJohn Marino 	wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4);
17886d7f5d3SJohn Marino 	wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3);
17986d7f5d3SJohn Marino 	wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2);
18086d7f5d3SJohn Marino 	wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1);
18186d7f5d3SJohn Marino 	wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4);
18286d7f5d3SJohn Marino 	wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3);
18386d7f5d3SJohn Marino 	wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2);
18486d7f5d3SJohn Marino 
18586d7f5d3SJohn Marino 	/* pack into byte vector */
18686d7f5d3SJohn Marino 	cipher [0] = wh1;  cipher [1] = wl1;
18786d7f5d3SJohn Marino 	cipher [2] = wh2;  cipher [3] = wl2;
18886d7f5d3SJohn Marino 	cipher [4] = wh3;  cipher [5] = wl3;
18986d7f5d3SJohn Marino 	cipher [6] = wh4;  cipher [7] = wl4;
19086d7f5d3SJohn Marino }
19186d7f5d3SJohn Marino 
19286d7f5d3SJohn Marino 
19386d7f5d3SJohn Marino void
skipjack_backwards(u_int8_t * cipher,u_int8_t * plain,u_int8_t ** key_tables)19486d7f5d3SJohn Marino skipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables)
19586d7f5d3SJohn Marino {
19686d7f5d3SJohn Marino 	/* setup 4 16-bit portions */
19786d7f5d3SJohn Marino 	u_int8_t wh1 = cipher[0];  u_int8_t wl1 = cipher[1];
19886d7f5d3SJohn Marino 	u_int8_t wh2 = cipher[2];  u_int8_t wl2 = cipher[3];
19986d7f5d3SJohn Marino 	u_int8_t wh3 = cipher[4];  u_int8_t wl3 = cipher[5];
20086d7f5d3SJohn Marino 	u_int8_t wh4 = cipher[6];  u_int8_t wl4 = cipher[7];
20186d7f5d3SJohn Marino 
20286d7f5d3SJohn Marino 	u_int8_t * k0 = key_tables [0];
20386d7f5d3SJohn Marino 	u_int8_t * k1 = key_tables [1];
20486d7f5d3SJohn Marino 	u_int8_t * k2 = key_tables [2];
20586d7f5d3SJohn Marino 	u_int8_t * k3 = key_tables [3];
20686d7f5d3SJohn Marino 	u_int8_t * k4 = key_tables [4];
20786d7f5d3SJohn Marino 	u_int8_t * k5 = key_tables [5];
20886d7f5d3SJohn Marino 	u_int8_t * k6 = key_tables [6];
20986d7f5d3SJohn Marino 	u_int8_t * k7 = key_tables [7];
21086d7f5d3SJohn Marino 	u_int8_t * k8 = key_tables [8];
21186d7f5d3SJohn Marino 	u_int8_t * k9 = key_tables [9];
21286d7f5d3SJohn Marino 
21386d7f5d3SJohn Marino 	/* first 8 rounds */
21486d7f5d3SJohn Marino 	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2;
21586d7f5d3SJohn Marino 	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3;
21686d7f5d3SJohn Marino 	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4;
21786d7f5d3SJohn Marino 	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1;
21886d7f5d3SJohn Marino 	g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2;
21986d7f5d3SJohn Marino 	g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3;
22086d7f5d3SJohn Marino 	g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4;
22186d7f5d3SJohn Marino 	g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1;
22286d7f5d3SJohn Marino 
22386d7f5d3SJohn Marino 	/* second 8 rounds */
22486d7f5d3SJohn Marino 	wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2);
22586d7f5d3SJohn Marino 	wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3);
22686d7f5d3SJohn Marino 	wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4);
22786d7f5d3SJohn Marino 	wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1);
22886d7f5d3SJohn Marino 	wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2);
22986d7f5d3SJohn Marino 	wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3);
23086d7f5d3SJohn Marino 	wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4);
23186d7f5d3SJohn Marino 	wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1);
23286d7f5d3SJohn Marino 
23386d7f5d3SJohn Marino 	/* third 8 rounds */
23486d7f5d3SJohn Marino 	g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2;
23586d7f5d3SJohn Marino 	g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3;
23686d7f5d3SJohn Marino 	g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4;
23786d7f5d3SJohn Marino 	g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1;
23886d7f5d3SJohn Marino 	g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2;
23986d7f5d3SJohn Marino 	g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3;
24086d7f5d3SJohn Marino 	g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4;
24186d7f5d3SJohn Marino 	g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9;  wh2 ^= wh1;
24286d7f5d3SJohn Marino 
24386d7f5d3SJohn Marino 	/* last 8 rounds */
24486d7f5d3SJohn Marino 	wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2);
24586d7f5d3SJohn Marino 	wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3);
24686d7f5d3SJohn Marino 	wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4);
24786d7f5d3SJohn Marino 	wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1);
24886d7f5d3SJohn Marino 	wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2);
24986d7f5d3SJohn Marino 	wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3);
25086d7f5d3SJohn Marino 	wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4);
25186d7f5d3SJohn Marino 	wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1);
25286d7f5d3SJohn Marino 
25386d7f5d3SJohn Marino 	/* pack into byte vector */
25486d7f5d3SJohn Marino 	plain [0] = wh1;  plain [1] = wl1;
25586d7f5d3SJohn Marino 	plain [2] = wh2;  plain [3] = wl2;
25686d7f5d3SJohn Marino 	plain [4] = wh3;  plain [5] = wl3;
25786d7f5d3SJohn Marino 	plain [6] = wh4;  plain [7] = wl4;
25886d7f5d3SJohn Marino }
259