1*f45c6e8aSpgoyette /* $NetBSD: skipjack.c,v 1.4 2014/01/01 15:18:57 pgoyette Exp $ */
21f93975cStls /* $OpenBSD: skipjack.c,v 1.3 2001/05/05 00:31:34 angelos Exp $ */
31f93975cStls
41f93975cStls /*
51f93975cStls * Further optimized test implementation of SKIPJACK algorithm
61f93975cStls * Mark Tillotson <markt@chaos.org.uk>, 25 June 98
71f93975cStls * Optimizations suit RISC (lots of registers) machine best.
81f93975cStls *
91f93975cStls * based on unoptimized implementation of
101f93975cStls * Panu Rissanen <bande@lut.fi> 960624
111f93975cStls *
121f93975cStls * SKIPJACK and KEA Algorithm Specifications
131f93975cStls * Version 2.0
141f93975cStls * 29 May 1998
151f93975cStls */
161f93975cStls
171f93975cStls #include <sys/cdefs.h>
18*f45c6e8aSpgoyette __KERNEL_RCSID(0, "$NetBSD: skipjack.c,v 1.4 2014/01/01 15:18:57 pgoyette Exp $");
191f93975cStls
20*f45c6e8aSpgoyette #include <sys/errno.h>
211f93975cStls #include <sys/malloc.h>
22*f45c6e8aSpgoyette #include <sys/module.h>
23*f45c6e8aSpgoyette #include <sys/param.h>
24*f45c6e8aSpgoyette
25*f45c6e8aSpgoyette #include <crypto/skipjack/skipjack.h>
261f93975cStls #include <opencrypto/cryptodev.h>
271f93975cStls
281f93975cStls static const u_int8_t ftable[0x100] =
291f93975cStls {
301f93975cStls 0xa3, 0xd7, 0x09, 0x83, 0xf8, 0x48, 0xf6, 0xf4,
311f93975cStls 0xb3, 0x21, 0x15, 0x78, 0x99, 0xb1, 0xaf, 0xf9,
321f93975cStls 0xe7, 0x2d, 0x4d, 0x8a, 0xce, 0x4c, 0xca, 0x2e,
331f93975cStls 0x52, 0x95, 0xd9, 0x1e, 0x4e, 0x38, 0x44, 0x28,
341f93975cStls 0x0a, 0xdf, 0x02, 0xa0, 0x17, 0xf1, 0x60, 0x68,
351f93975cStls 0x12, 0xb7, 0x7a, 0xc3, 0xe9, 0xfa, 0x3d, 0x53,
361f93975cStls 0x96, 0x84, 0x6b, 0xba, 0xf2, 0x63, 0x9a, 0x19,
371f93975cStls 0x7c, 0xae, 0xe5, 0xf5, 0xf7, 0x16, 0x6a, 0xa2,
381f93975cStls 0x39, 0xb6, 0x7b, 0x0f, 0xc1, 0x93, 0x81, 0x1b,
391f93975cStls 0xee, 0xb4, 0x1a, 0xea, 0xd0, 0x91, 0x2f, 0xb8,
401f93975cStls 0x55, 0xb9, 0xda, 0x85, 0x3f, 0x41, 0xbf, 0xe0,
411f93975cStls 0x5a, 0x58, 0x80, 0x5f, 0x66, 0x0b, 0xd8, 0x90,
421f93975cStls 0x35, 0xd5, 0xc0, 0xa7, 0x33, 0x06, 0x65, 0x69,
431f93975cStls 0x45, 0x00, 0x94, 0x56, 0x6d, 0x98, 0x9b, 0x76,
441f93975cStls 0x97, 0xfc, 0xb2, 0xc2, 0xb0, 0xfe, 0xdb, 0x20,
451f93975cStls 0xe1, 0xeb, 0xd6, 0xe4, 0xdd, 0x47, 0x4a, 0x1d,
461f93975cStls 0x42, 0xed, 0x9e, 0x6e, 0x49, 0x3c, 0xcd, 0x43,
471f93975cStls 0x27, 0xd2, 0x07, 0xd4, 0xde, 0xc7, 0x67, 0x18,
481f93975cStls 0x89, 0xcb, 0x30, 0x1f, 0x8d, 0xc6, 0x8f, 0xaa,
491f93975cStls 0xc8, 0x74, 0xdc, 0xc9, 0x5d, 0x5c, 0x31, 0xa4,
501f93975cStls 0x70, 0x88, 0x61, 0x2c, 0x9f, 0x0d, 0x2b, 0x87,
511f93975cStls 0x50, 0x82, 0x54, 0x64, 0x26, 0x7d, 0x03, 0x40,
521f93975cStls 0x34, 0x4b, 0x1c, 0x73, 0xd1, 0xc4, 0xfd, 0x3b,
531f93975cStls 0xcc, 0xfb, 0x7f, 0xab, 0xe6, 0x3e, 0x5b, 0xa5,
541f93975cStls 0xad, 0x04, 0x23, 0x9c, 0x14, 0x51, 0x22, 0xf0,
551f93975cStls 0x29, 0x79, 0x71, 0x7e, 0xff, 0x8c, 0x0e, 0xe2,
561f93975cStls 0x0c, 0xef, 0xbc, 0x72, 0x75, 0x6f, 0x37, 0xa1,
571f93975cStls 0xec, 0xd3, 0x8e, 0x62, 0x8b, 0x86, 0x10, 0xe8,
581f93975cStls 0x08, 0x77, 0x11, 0xbe, 0x92, 0x4f, 0x24, 0xc5,
591f93975cStls 0x32, 0x36, 0x9d, 0xcf, 0xf3, 0xa6, 0xbb, 0xac,
601f93975cStls 0x5e, 0x6c, 0xa9, 0x13, 0x57, 0x25, 0xb5, 0xe3,
611f93975cStls 0xbd, 0xa8, 0x3a, 0x01, 0x05, 0x59, 0x2a, 0x46
621f93975cStls };
631f93975cStls
641f93975cStls /*
651f93975cStls * For each key byte generate a table to represent the function
661f93975cStls * ftable [in ^ keybyte]
671f93975cStls *
681f93975cStls * These tables used to save an XOR in each stage of the G-function
691f93975cStls * the tables are hopefully pointed to by register allocated variables
701f93975cStls * k0, k1..k9
711f93975cStls */
721f93975cStls void
subkey_table_gen(const u_int8_t * key,u_int8_t ** key_tables)731f93975cStls subkey_table_gen (const u_int8_t *key, u_int8_t **key_tables)
741f93975cStls {
751f93975cStls int i, k;
761f93975cStls
771f93975cStls for (k = 0; k < 10; k++) {
781f93975cStls u_int8_t key_byte = key [k];
791f93975cStls u_int8_t * table = key_tables[k];
801f93975cStls for (i = 0; i < 0x100; i++)
811f93975cStls table [i] = ftable [i ^ key_byte];
821f93975cStls }
831f93975cStls }
841f93975cStls
851f93975cStls
861f93975cStls #define g(k0, k1, k2, k3, ih, il, oh, ol) \
871f93975cStls { \
881f93975cStls oh = k##k0 [il] ^ ih; \
891f93975cStls ol = k##k1 [oh] ^ il; \
901f93975cStls oh = k##k2 [ol] ^ oh; \
911f93975cStls ol = k##k3 [oh] ^ ol; \
921f93975cStls }
931f93975cStls
941f93975cStls #define g0(ih, il, oh, ol) g(0, 1, 2, 3, ih, il, oh, ol)
951f93975cStls #define g4(ih, il, oh, ol) g(4, 5, 6, 7, ih, il, oh, ol)
961f93975cStls #define g8(ih, il, oh, ol) g(8, 9, 0, 1, ih, il, oh, ol)
971f93975cStls #define g2(ih, il, oh, ol) g(2, 3, 4, 5, ih, il, oh, ol)
981f93975cStls #define g6(ih, il, oh, ol) g(6, 7, 8, 9, ih, il, oh, ol)
991f93975cStls
1001f93975cStls
1011f93975cStls #define g_inv(k0, k1, k2, k3, ih, il, oh, ol) \
1021f93975cStls { \
1031f93975cStls ol = k##k3 [ih] ^ il; \
1041f93975cStls oh = k##k2 [ol] ^ ih; \
1051f93975cStls ol = k##k1 [oh] ^ ol; \
1061f93975cStls oh = k##k0 [ol] ^ oh; \
1071f93975cStls }
1081f93975cStls
1091f93975cStls
1101f93975cStls #define g0_inv(ih, il, oh, ol) g_inv(0, 1, 2, 3, ih, il, oh, ol)
1111f93975cStls #define g4_inv(ih, il, oh, ol) g_inv(4, 5, 6, 7, ih, il, oh, ol)
1121f93975cStls #define g8_inv(ih, il, oh, ol) g_inv(8, 9, 0, 1, ih, il, oh, ol)
1131f93975cStls #define g2_inv(ih, il, oh, ol) g_inv(2, 3, 4, 5, ih, il, oh, ol)
1141f93975cStls #define g6_inv(ih, il, oh, ol) g_inv(6, 7, 8, 9, ih, il, oh, ol)
1151f93975cStls
1161f93975cStls /* optimized version of Skipjack algorithm
1171f93975cStls *
1181f93975cStls * the appropriate g-function is inlined for each round
1191f93975cStls *
1201f93975cStls * the data movement is minimized by rotating the names of the
1211f93975cStls * variables w1..w4, not their contents (saves 3 moves per round)
1221f93975cStls *
1231f93975cStls * the loops are completely unrolled (needed to staticize choice of g)
1241f93975cStls *
1251f93975cStls * compiles to about 470 instructions on a Sparc (gcc -O)
1261f93975cStls * which is about 58 instructions per byte, 14 per round.
1271f93975cStls * gcc seems to leave in some unnecessary and with 0xFF operations
1281f93975cStls * but only in the latter part of the functions. Perhaps it
1291f93975cStls * runs out of resources to properly optimize long inlined function?
1301f93975cStls * in theory should get about 11 instructions per round, not 14
1311f93975cStls */
1321f93975cStls
1331f93975cStls void
skipjack_forwards(u_int8_t * plain,u_int8_t * cipher,u_int8_t ** key_tables)1341f93975cStls skipjack_forwards(u_int8_t *plain, u_int8_t *cipher, u_int8_t **key_tables)
1351f93975cStls {
1361f93975cStls u_int8_t wh1 = plain[0]; u_int8_t wl1 = plain[1];
1371f93975cStls u_int8_t wh2 = plain[2]; u_int8_t wl2 = plain[3];
1381f93975cStls u_int8_t wh3 = plain[4]; u_int8_t wl3 = plain[5];
1391f93975cStls u_int8_t wh4 = plain[6]; u_int8_t wl4 = plain[7];
1401f93975cStls
1411f93975cStls u_int8_t * k0 = key_tables [0];
1421f93975cStls u_int8_t * k1 = key_tables [1];
1431f93975cStls u_int8_t * k2 = key_tables [2];
1441f93975cStls u_int8_t * k3 = key_tables [3];
1451f93975cStls u_int8_t * k4 = key_tables [4];
1461f93975cStls u_int8_t * k5 = key_tables [5];
1471f93975cStls u_int8_t * k6 = key_tables [6];
1481f93975cStls u_int8_t * k7 = key_tables [7];
1491f93975cStls u_int8_t * k8 = key_tables [8];
1501f93975cStls u_int8_t * k9 = key_tables [9];
1511f93975cStls
1521f93975cStls /* first 8 rounds */
1531f93975cStls g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 1; wh4 ^= wh1;
1541f93975cStls g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 2; wh3 ^= wh4;
1551f93975cStls g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 3; wh2 ^= wh3;
1561f93975cStls g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 4; wh1 ^= wh2;
1571f93975cStls g6 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 5; wh4 ^= wh1;
1581f93975cStls g0 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 6; wh3 ^= wh4;
1591f93975cStls g4 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 7; wh2 ^= wh3;
1601f93975cStls g8 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 8; wh1 ^= wh2;
1611f93975cStls
1621f93975cStls /* second 8 rounds */
1631f93975cStls wh2 ^= wh1; wl2 ^= wl1 ^ 9 ; g2 (wh1,wl1, wh1,wl1);
1641f93975cStls wh1 ^= wh4; wl1 ^= wl4 ^ 10; g6 (wh4,wl4, wh4,wl4);
1651f93975cStls wh4 ^= wh3; wl4 ^= wl3 ^ 11; g0 (wh3,wl3, wh3,wl3);
1661f93975cStls wh3 ^= wh2; wl3 ^= wl2 ^ 12; g4 (wh2,wl2, wh2,wl2);
1671f93975cStls wh2 ^= wh1; wl2 ^= wl1 ^ 13; g8 (wh1,wl1, wh1,wl1);
1681f93975cStls wh1 ^= wh4; wl1 ^= wl4 ^ 14; g2 (wh4,wl4, wh4,wl4);
1691f93975cStls wh4 ^= wh3; wl4 ^= wl3 ^ 15; g6 (wh3,wl3, wh3,wl3);
1701f93975cStls wh3 ^= wh2; wl3 ^= wl2 ^ 16; g0 (wh2,wl2, wh2,wl2);
1711f93975cStls
1721f93975cStls /* third 8 rounds */
1731f93975cStls g4 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 17; wh4 ^= wh1;
1741f93975cStls g8 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 18; wh3 ^= wh4;
1751f93975cStls g2 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 19; wh2 ^= wh3;
1761f93975cStls g6 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 20; wh1 ^= wh2;
1771f93975cStls g0 (wh1,wl1, wh1,wl1); wl4 ^= wl1 ^ 21; wh4 ^= wh1;
1781f93975cStls g4 (wh4,wl4, wh4,wl4); wl3 ^= wl4 ^ 22; wh3 ^= wh4;
1791f93975cStls g8 (wh3,wl3, wh3,wl3); wl2 ^= wl3 ^ 23; wh2 ^= wh3;
1801f93975cStls g2 (wh2,wl2, wh2,wl2); wl1 ^= wl2 ^ 24; wh1 ^= wh2;
1811f93975cStls
1821f93975cStls /* last 8 rounds */
1831f93975cStls wh2 ^= wh1; wl2 ^= wl1 ^ 25; g6 (wh1,wl1, wh1,wl1);
1841f93975cStls wh1 ^= wh4; wl1 ^= wl4 ^ 26; g0 (wh4,wl4, wh4,wl4);
1851f93975cStls wh4 ^= wh3; wl4 ^= wl3 ^ 27; g4 (wh3,wl3, wh3,wl3);
1861f93975cStls wh3 ^= wh2; wl3 ^= wl2 ^ 28; g8 (wh2,wl2, wh2,wl2);
1871f93975cStls wh2 ^= wh1; wl2 ^= wl1 ^ 29; g2 (wh1,wl1, wh1,wl1);
1881f93975cStls wh1 ^= wh4; wl1 ^= wl4 ^ 30; g6 (wh4,wl4, wh4,wl4);
1891f93975cStls wh4 ^= wh3; wl4 ^= wl3 ^ 31; g0 (wh3,wl3, wh3,wl3);
1901f93975cStls wh3 ^= wh2; wl3 ^= wl2 ^ 32; g4 (wh2,wl2, wh2,wl2);
1911f93975cStls
1921f93975cStls /* pack into byte vector */
1931f93975cStls cipher [0] = wh1; cipher [1] = wl1;
1941f93975cStls cipher [2] = wh2; cipher [3] = wl2;
1951f93975cStls cipher [4] = wh3; cipher [5] = wl3;
1961f93975cStls cipher [6] = wh4; cipher [7] = wl4;
1971f93975cStls }
1981f93975cStls
1991f93975cStls
2001f93975cStls void
skipjack_backwards(u_int8_t * cipher,u_int8_t * plain,u_int8_t ** key_tables)2011f93975cStls skipjack_backwards (u_int8_t *cipher, u_int8_t *plain, u_int8_t **key_tables)
2021f93975cStls {
2031f93975cStls /* setup 4 16-bit portions */
2041f93975cStls u_int8_t wh1 = cipher[0]; u_int8_t wl1 = cipher[1];
2051f93975cStls u_int8_t wh2 = cipher[2]; u_int8_t wl2 = cipher[3];
2061f93975cStls u_int8_t wh3 = cipher[4]; u_int8_t wl3 = cipher[5];
2071f93975cStls u_int8_t wh4 = cipher[6]; u_int8_t wl4 = cipher[7];
2081f93975cStls
2091f93975cStls u_int8_t * k0 = key_tables [0];
2101f93975cStls u_int8_t * k1 = key_tables [1];
2111f93975cStls u_int8_t * k2 = key_tables [2];
2121f93975cStls u_int8_t * k3 = key_tables [3];
2131f93975cStls u_int8_t * k4 = key_tables [4];
2141f93975cStls u_int8_t * k5 = key_tables [5];
2151f93975cStls u_int8_t * k6 = key_tables [6];
2161f93975cStls u_int8_t * k7 = key_tables [7];
2171f93975cStls u_int8_t * k8 = key_tables [8];
2181f93975cStls u_int8_t * k9 = key_tables [9];
2191f93975cStls
2201f93975cStls /* first 8 rounds */
2211f93975cStls g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 32; wh3 ^= wh2;
2221f93975cStls g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 31; wh4 ^= wh3;
2231f93975cStls g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 30; wh1 ^= wh4;
2241f93975cStls g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 29; wh2 ^= wh1;
2251f93975cStls g8_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 28; wh3 ^= wh2;
2261f93975cStls g4_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 27; wh4 ^= wh3;
2271f93975cStls g0_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 26; wh1 ^= wh4;
2281f93975cStls g6_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 25; wh2 ^= wh1;
2291f93975cStls
2301f93975cStls /* second 8 rounds */
2311f93975cStls wh1 ^= wh2; wl1 ^= wl2 ^ 24; g2_inv (wh2,wl2, wh2,wl2);
2321f93975cStls wh2 ^= wh3; wl2 ^= wl3 ^ 23; g8_inv (wh3,wl3, wh3,wl3);
2331f93975cStls wh3 ^= wh4; wl3 ^= wl4 ^ 22; g4_inv (wh4,wl4, wh4,wl4);
2341f93975cStls wh4 ^= wh1; wl4 ^= wl1 ^ 21; g0_inv (wh1,wl1, wh1,wl1);
2351f93975cStls wh1 ^= wh2; wl1 ^= wl2 ^ 20; g6_inv (wh2,wl2, wh2,wl2);
2361f93975cStls wh2 ^= wh3; wl2 ^= wl3 ^ 19; g2_inv (wh3,wl3, wh3,wl3);
2371f93975cStls wh3 ^= wh4; wl3 ^= wl4 ^ 18; g8_inv (wh4,wl4, wh4,wl4);
2381f93975cStls wh4 ^= wh1; wl4 ^= wl1 ^ 17; g4_inv (wh1,wl1, wh1,wl1);
2391f93975cStls
2401f93975cStls /* third 8 rounds */
2411f93975cStls g0_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 16; wh3 ^= wh2;
2421f93975cStls g6_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 15; wh4 ^= wh3;
2431f93975cStls g2_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 14; wh1 ^= wh4;
2441f93975cStls g8_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 13; wh2 ^= wh1;
2451f93975cStls g4_inv (wh2,wl2, wh2,wl2); wl3 ^= wl2 ^ 12; wh3 ^= wh2;
2461f93975cStls g0_inv (wh3,wl3, wh3,wl3); wl4 ^= wl3 ^ 11; wh4 ^= wh3;
2471f93975cStls g6_inv (wh4,wl4, wh4,wl4); wl1 ^= wl4 ^ 10; wh1 ^= wh4;
2481f93975cStls g2_inv (wh1,wl1, wh1,wl1); wl2 ^= wl1 ^ 9; wh2 ^= wh1;
2491f93975cStls
2501f93975cStls /* last 8 rounds */
2511f93975cStls wh1 ^= wh2; wl1 ^= wl2 ^ 8; g8_inv (wh2,wl2, wh2,wl2);
2521f93975cStls wh2 ^= wh3; wl2 ^= wl3 ^ 7; g4_inv (wh3,wl3, wh3,wl3);
2531f93975cStls wh3 ^= wh4; wl3 ^= wl4 ^ 6; g0_inv (wh4,wl4, wh4,wl4);
2541f93975cStls wh4 ^= wh1; wl4 ^= wl1 ^ 5; g6_inv (wh1,wl1, wh1,wl1);
2551f93975cStls wh1 ^= wh2; wl1 ^= wl2 ^ 4; g2_inv (wh2,wl2, wh2,wl2);
2561f93975cStls wh2 ^= wh3; wl2 ^= wl3 ^ 3; g8_inv (wh3,wl3, wh3,wl3);
2571f93975cStls wh3 ^= wh4; wl3 ^= wl4 ^ 2; g4_inv (wh4,wl4, wh4,wl4);
2581f93975cStls wh4 ^= wh1; wl4 ^= wl1 ^ 1; g0_inv (wh1,wl1, wh1,wl1);
2591f93975cStls
2601f93975cStls /* pack into byte vector */
2611f93975cStls plain [0] = wh1; plain [1] = wl1;
2621f93975cStls plain [2] = wh2; plain [3] = wl2;
2631f93975cStls plain [4] = wh3; plain [5] = wl3;
2641f93975cStls plain [6] = wh4; plain [7] = wl4;
2651f93975cStls }
266*f45c6e8aSpgoyette
267*f45c6e8aSpgoyette MODULE(MODULE_CLASS_MISC, skipjack, NULL);
268*f45c6e8aSpgoyette
269*f45c6e8aSpgoyette static int
skipjack_modcmd(modcmd_t cmd,void * opaque)270*f45c6e8aSpgoyette skipjack_modcmd(modcmd_t cmd, void *opaque)
271*f45c6e8aSpgoyette {
272*f45c6e8aSpgoyette
273*f45c6e8aSpgoyette switch (cmd) {
274*f45c6e8aSpgoyette case MODULE_CMD_INIT:
275*f45c6e8aSpgoyette return 0;
276*f45c6e8aSpgoyette case MODULE_CMD_FINI:
277*f45c6e8aSpgoyette return 0;
278*f45c6e8aSpgoyette default:
279*f45c6e8aSpgoyette return ENOTTY;
280*f45c6e8aSpgoyette }
281*f45c6e8aSpgoyette }
282