xref: /openbsd-src/sys/crypto/cast.c (revision a28daedfc357b214be5c701aa8ba8adb29a7f1c2)
1 /*      $OpenBSD: cast.c,v 1.3 2005/03/24 11:45:28 hshoexer Exp $       */
2 
3 /*
4  *	CAST-128 in C
5  *	Written by Steve Reid <sreid@sea-to-sky.net>
6  *	100% Public Domain - no warranty
7  *	Released 1997.10.11
8  */
9 
10 #include <sys/types.h>
11 #include <crypto/cast.h>
12 #include <crypto/castsb.h>
13 
14 /* Macros to access 8-bit bytes out of a 32-bit word */
15 #define U_INT8_Ta(x) ( (u_int8_t) (x>>24) )
16 #define U_INT8_Tb(x) ( (u_int8_t) ((x>>16)&255) )
17 #define U_INT8_Tc(x) ( (u_int8_t) ((x>>8)&255) )
18 #define U_INT8_Td(x) ( (u_int8_t) ((x)&255) )
19 
20 /* Circular left shift */
21 #define ROL(x, n) ( ((x)<<(n)) | ((x)>>(32-(n))) )
22 
23 /* CAST-128 uses three different round functions */
24 #define F1(l, r, i) \
25 	t = ROL(key->xkey[i] + r, key->xkey[i+16]); \
26 	l ^= ((cast_sbox1[U_INT8_Ta(t)] ^ cast_sbox2[U_INT8_Tb(t)]) - \
27 	 cast_sbox3[U_INT8_Tc(t)]) + cast_sbox4[U_INT8_Td(t)];
28 #define F2(l, r, i) \
29 	t = ROL(key->xkey[i] ^ r, key->xkey[i+16]); \
30 	l ^= ((cast_sbox1[U_INT8_Ta(t)] - cast_sbox2[U_INT8_Tb(t)]) + \
31 	 cast_sbox3[U_INT8_Tc(t)]) ^ cast_sbox4[U_INT8_Td(t)];
32 #define F3(l, r, i) \
33 	t = ROL(key->xkey[i] - r, key->xkey[i+16]); \
34 	l ^= ((cast_sbox1[U_INT8_Ta(t)] + cast_sbox2[U_INT8_Tb(t)]) ^ \
35 	 cast_sbox3[U_INT8_Tc(t)]) - cast_sbox4[U_INT8_Td(t)];
36 
37 
38 /***** Encryption Function *****/
39 
40 void
41 cast_encrypt(cast_key *key, u_int8_t *inblock, u_int8_t *outblock)
42 {
43 	u_int32_t t, l, r;
44 
45 	/* Get inblock into l,r */
46 	l = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
47 	    ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
48 	r = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
49 	    ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
50 	/* Do the work */
51 	F1(l, r,  0);
52 	F2(r, l,  1);
53 	F3(l, r,  2);
54 	F1(r, l,  3);
55 	F2(l, r,  4);
56 	F3(r, l,  5);
57 	F1(l, r,  6);
58 	F2(r, l,  7);
59 	F3(l, r,  8);
60 	F1(r, l,  9);
61 	F2(l, r, 10);
62 	F3(r, l, 11);
63 	/* Only do full 16 rounds if key length > 80 bits */
64 	if (key->rounds > 12) {
65 		F1(l, r, 12);
66 		F2(r, l, 13);
67 		F3(l, r, 14);
68 		F1(r, l, 15);
69 	}
70 	/* Put l,r into outblock */
71 	outblock[0] = U_INT8_Ta(r);
72 	outblock[1] = U_INT8_Tb(r);
73 	outblock[2] = U_INT8_Tc(r);
74 	outblock[3] = U_INT8_Td(r);
75 	outblock[4] = U_INT8_Ta(l);
76 	outblock[5] = U_INT8_Tb(l);
77 	outblock[6] = U_INT8_Tc(l);
78 	outblock[7] = U_INT8_Td(l);
79 	/* Wipe clean */
80 	t = l = r = 0;
81 }
82 
83 
84 /***** Decryption Function *****/
85 
86 void
87 cast_decrypt(cast_key *key, u_int8_t *inblock, u_int8_t *outblock)
88 {
89 	u_int32_t t, l, r;
90 
91 	/* Get inblock into l,r */
92 	r = ((u_int32_t)inblock[0] << 24) | ((u_int32_t)inblock[1] << 16) |
93 	    ((u_int32_t)inblock[2] << 8) | (u_int32_t)inblock[3];
94 	l = ((u_int32_t)inblock[4] << 24) | ((u_int32_t)inblock[5] << 16) |
95 	    ((u_int32_t)inblock[6] << 8) | (u_int32_t)inblock[7];
96 	/* Do the work */
97 	/* Only do full 16 rounds if key length > 80 bits */
98 	if (key->rounds > 12) {
99 		F1(r, l, 15);
100 		F3(l, r, 14);
101 		F2(r, l, 13);
102 		F1(l, r, 12);
103 	}
104 	F3(r, l, 11);
105 	F2(l, r, 10);
106 	F1(r, l,  9);
107 	F3(l, r,  8);
108 	F2(r, l,  7);
109 	F1(l, r,  6);
110 	F3(r, l,  5);
111 	F2(l, r,  4);
112 	F1(r, l,  3);
113 	F3(l, r,  2);
114 	F2(r, l,  1);
115 	F1(l, r,  0);
116 	/* Put l,r into outblock */
117 	outblock[0] = U_INT8_Ta(l);
118 	outblock[1] = U_INT8_Tb(l);
119 	outblock[2] = U_INT8_Tc(l);
120 	outblock[3] = U_INT8_Td(l);
121 	outblock[4] = U_INT8_Ta(r);
122 	outblock[5] = U_INT8_Tb(r);
123 	outblock[6] = U_INT8_Tc(r);
124 	outblock[7] = U_INT8_Td(r);
125 	/* Wipe clean */
126 	t = l = r = 0;
127 }
128 
129 
130 /***** Key Schedule *****/
131 
132 void
133 cast_setkey(cast_key *key, u_int8_t *rawkey, int keybytes)
134 {
135 	u_int32_t t[4], z[4], x[4];
136 	int i;
137 
138 	/* Set number of rounds to 12 or 16, depending on key length */
139 	key->rounds = (keybytes <= 10 ? 12 : 16);
140 
141 	/* Copy key to workspace x */
142 	for (i = 0; i < 4; i++) {
143 		x[i] = 0;
144 		if ((i*4+0) < keybytes) x[i] = (u_int32_t)rawkey[i*4+0] << 24;
145 		if ((i*4+1) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+1] << 16;
146 		if ((i*4+2) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+2] << 8;
147 		if ((i*4+3) < keybytes) x[i] |= (u_int32_t)rawkey[i*4+3];
148 	}
149 	/* Generate 32 subkeys, four at a time */
150 	for (i = 0; i < 32; i+=4) {
151 		switch (i & 4) {
152 		case 0:
153 			t[0] = z[0] = x[0] ^ cast_sbox5[U_INT8_Tb(x[3])] ^
154 			    cast_sbox6[U_INT8_Td(x[3])] ^
155 			    cast_sbox7[U_INT8_Ta(x[3])] ^
156 			    cast_sbox8[U_INT8_Tc(x[3])] ^
157 			    cast_sbox7[U_INT8_Ta(x[2])];
158 			t[1] = z[1] = x[2] ^ cast_sbox5[U_INT8_Ta(z[0])] ^
159 			    cast_sbox6[U_INT8_Tc(z[0])] ^
160 			    cast_sbox7[U_INT8_Tb(z[0])] ^
161 			    cast_sbox8[U_INT8_Td(z[0])] ^
162 			    cast_sbox8[U_INT8_Tc(x[2])];
163 			t[2] = z[2] = x[3] ^ cast_sbox5[U_INT8_Td(z[1])] ^
164 			    cast_sbox6[U_INT8_Tc(z[1])] ^
165 			    cast_sbox7[U_INT8_Tb(z[1])] ^
166 			    cast_sbox8[U_INT8_Ta(z[1])] ^
167 			    cast_sbox5[U_INT8_Tb(x[2])];
168 			t[3] = z[3] = x[1] ^ cast_sbox5[U_INT8_Tc(z[2])] ^
169 			    cast_sbox6[U_INT8_Tb(z[2])] ^
170 			    cast_sbox7[U_INT8_Td(z[2])] ^
171 			    cast_sbox8[U_INT8_Ta(z[2])] ^
172 			    cast_sbox6[U_INT8_Td(x[2])];
173 			break;
174 		 case 4:
175 			t[0] = x[0] = z[2] ^ cast_sbox5[U_INT8_Tb(z[1])] ^
176 			    cast_sbox6[U_INT8_Td(z[1])] ^
177 			    cast_sbox7[U_INT8_Ta(z[1])] ^
178 			    cast_sbox8[U_INT8_Tc(z[1])] ^
179 			    cast_sbox7[U_INT8_Ta(z[0])];
180 			t[1] = x[1] = z[0] ^ cast_sbox5[U_INT8_Ta(x[0])] ^
181 			    cast_sbox6[U_INT8_Tc(x[0])] ^
182 			    cast_sbox7[U_INT8_Tb(x[0])] ^
183 			    cast_sbox8[U_INT8_Td(x[0])] ^
184 			    cast_sbox8[U_INT8_Tc(z[0])];
185 			t[2] = x[2] = z[1] ^ cast_sbox5[U_INT8_Td(x[1])] ^
186 			    cast_sbox6[U_INT8_Tc(x[1])] ^
187 			    cast_sbox7[U_INT8_Tb(x[1])] ^
188 			    cast_sbox8[U_INT8_Ta(x[1])] ^
189 			    cast_sbox5[U_INT8_Tb(z[0])];
190 			t[3] = x[3] = z[3] ^ cast_sbox5[U_INT8_Tc(x[2])] ^
191 			    cast_sbox6[U_INT8_Tb(x[2])] ^
192 			    cast_sbox7[U_INT8_Td(x[2])] ^
193 			    cast_sbox8[U_INT8_Ta(x[2])] ^
194 			    cast_sbox6[U_INT8_Td(z[0])];
195 			break;
196 		}
197 		switch (i & 12) {
198 		case 0:
199 		case 12:
200 			key->xkey[i+0] = cast_sbox5[U_INT8_Ta(t[2])] ^
201 			    cast_sbox6[U_INT8_Tb(t[2])] ^
202 			    cast_sbox7[U_INT8_Td(t[1])] ^
203 			    cast_sbox8[U_INT8_Tc(t[1])];
204 			key->xkey[i+1] = cast_sbox5[U_INT8_Tc(t[2])] ^
205 			    cast_sbox6[U_INT8_Td(t[2])] ^
206 			    cast_sbox7[U_INT8_Tb(t[1])] ^
207 			    cast_sbox8[U_INT8_Ta(t[1])];
208 			key->xkey[i+2] = cast_sbox5[U_INT8_Ta(t[3])] ^
209 			    cast_sbox6[U_INT8_Tb(t[3])] ^
210 			    cast_sbox7[U_INT8_Td(t[0])] ^
211 			    cast_sbox8[U_INT8_Tc(t[0])];
212 			key->xkey[i+3] = cast_sbox5[U_INT8_Tc(t[3])] ^
213 			    cast_sbox6[U_INT8_Td(t[3])] ^
214 			    cast_sbox7[U_INT8_Tb(t[0])] ^
215 			    cast_sbox8[U_INT8_Ta(t[0])];
216 			break;
217 		case 4:
218 		case 8:
219 			key->xkey[i+0] = cast_sbox5[U_INT8_Td(t[0])] ^
220 			    cast_sbox6[U_INT8_Tc(t[0])] ^
221 			    cast_sbox7[U_INT8_Ta(t[3])] ^
222 			    cast_sbox8[U_INT8_Tb(t[3])];
223 			key->xkey[i+1] = cast_sbox5[U_INT8_Tb(t[0])] ^
224 			    cast_sbox6[U_INT8_Ta(t[0])] ^
225 			    cast_sbox7[U_INT8_Tc(t[3])] ^
226 			    cast_sbox8[U_INT8_Td(t[3])];
227 			key->xkey[i+2] = cast_sbox5[U_INT8_Td(t[1])] ^
228 			    cast_sbox6[U_INT8_Tc(t[1])] ^
229 			    cast_sbox7[U_INT8_Ta(t[2])] ^
230 			    cast_sbox8[U_INT8_Tb(t[2])];
231 			key->xkey[i+3] = cast_sbox5[U_INT8_Tb(t[1])] ^
232 			    cast_sbox6[U_INT8_Ta(t[1])] ^
233 			    cast_sbox7[U_INT8_Tc(t[2])] ^
234 			    cast_sbox8[U_INT8_Td(t[2])];
235 			break;
236 		}
237 		switch (i & 12) {
238 		case 0:
239 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tc(z[0])];
240 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tc(z[1])];
241 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tb(z[2])];
242 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Ta(z[3])];
243 			break;
244 		case 4:
245 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Ta(x[2])];
246 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Tb(x[3])];
247 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Td(x[0])];
248 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Td(x[1])];
249 			break;
250 		case 8:
251 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Tb(z[2])];
252 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Ta(z[3])];
253 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Tc(z[0])];
254 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tc(z[1])];
255 			break;
256 		case 12:
257 			key->xkey[i+0] ^= cast_sbox5[U_INT8_Td(x[0])];
258 			key->xkey[i+1] ^= cast_sbox6[U_INT8_Td(x[1])];
259 			key->xkey[i+2] ^= cast_sbox7[U_INT8_Ta(x[2])];
260 			key->xkey[i+3] ^= cast_sbox8[U_INT8_Tb(x[3])];
261 			break;
262 		}
263 		if (i >= 16) {
264 			key->xkey[i+0] &= 31;
265 			key->xkey[i+1] &= 31;
266 			key->xkey[i+2] &= 31;
267 			key->xkey[i+3] &= 31;
268 		}
269 	}
270 	/* Wipe clean */
271 	for (i = 0; i < 4; i++) {
272 		t[i] = x[i] = z[i] = 0;
273 	}
274 }
275 
276 /* Made in Canada */
277