xref: /netbsd-src/sys/crypto/aes/aes_bear.c (revision e5b7ee8558d491c727177ef388e0410a0c115301)
1 /*	$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $");
31 
32 #include <sys/types.h>
33 #include <sys/endian.h>
34 
35 #ifdef _KERNEL
36 #include <sys/systm.h>
37 #else
38 #include <assert.h>
39 #include <err.h>
40 #include <string.h>
41 #define	KASSERT			assert
42 #define	panic(fmt, args...)	err(1, fmt, args)
43 #endif
44 
45 #include <crypto/aes/aes.h>
46 #include <crypto/aes/aes_bear.h>
47 #include <crypto/aes/aes_impl.h>
48 
49 static void
aesbear_setkey(uint32_t rk[static60],const void * key,uint32_t nrounds)50 aesbear_setkey(uint32_t rk[static 60], const void *key, uint32_t nrounds)
51 {
52 	size_t key_len;
53 
54 	switch (nrounds) {
55 	case 10:
56 		key_len = 16;
57 		break;
58 	case 12:
59 		key_len = 24;
60 		break;
61 	case 14:
62 		key_len = 32;
63 		break;
64 	default:
65 		panic("invalid AES nrounds: %u", nrounds);
66 	}
67 
68 	br_aes_ct_keysched(rk, key, key_len);
69 }
70 
71 static void
aesbear_setenckey(struct aesenc * enc,const uint8_t * key,uint32_t nrounds)72 aesbear_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
73 {
74 
75 	aesbear_setkey(enc->aese_aes.aes_rk, key, nrounds);
76 }
77 
78 static void
aesbear_setdeckey(struct aesdec * dec,const uint8_t * key,uint32_t nrounds)79 aesbear_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
80 {
81 
82 	/*
83 	 * BearSSL computes InvMixColumns on the fly -- no need for
84 	 * distinct decryption round keys.
85 	 */
86 	aesbear_setkey(dec->aesd_aes.aes_rk, key, nrounds);
87 }
88 
89 static void
aesbear_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)90 aesbear_enc(const struct aesenc *enc, const uint8_t in[static 16],
91     uint8_t out[static 16], uint32_t nrounds)
92 {
93 	uint32_t sk_exp[120];
94 	uint32_t q[8];
95 
96 	/* Expand round keys for bitslicing.  */
97 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
98 
99 	/* Load input block interleaved with garbage block.  */
100 	q[2*0] = le32dec(in + 4*0);
101 	q[2*1] = le32dec(in + 4*1);
102 	q[2*2] = le32dec(in + 4*2);
103 	q[2*3] = le32dec(in + 4*3);
104 	q[1] = q[3] = q[5] = q[7] = 0;
105 
106 	/* Transform to bitslice, decrypt, transform from bitslice.  */
107 	br_aes_ct_ortho(q);
108 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
109 	br_aes_ct_ortho(q);
110 
111 	/* Store output block.  */
112 	le32enc(out + 4*0, q[2*0]);
113 	le32enc(out + 4*1, q[2*1]);
114 	le32enc(out + 4*2, q[2*2]);
115 	le32enc(out + 4*3, q[2*3]);
116 
117 	/* Paranoia: Zero temporary buffers.  */
118 	explicit_memset(sk_exp, 0, sizeof sk_exp);
119 	explicit_memset(q, 0, sizeof q);
120 }
121 
122 static void
aesbear_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)123 aesbear_dec(const struct aesdec *dec, const uint8_t in[static 16],
124     uint8_t out[static 16], uint32_t nrounds)
125 {
126 	uint32_t sk_exp[120];
127 	uint32_t q[8];
128 
129 	/* Expand round keys for bitslicing.  */
130 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
131 
132 	/* Load input block interleaved with garbage.  */
133 	q[2*0] = le32dec(in + 4*0);
134 	q[2*1] = le32dec(in + 4*1);
135 	q[2*2] = le32dec(in + 4*2);
136 	q[2*3] = le32dec(in + 4*3);
137 	q[1] = q[3] = q[5] = q[7] = 0;
138 
139 	/* Transform to bitslice, decrypt, transform from bitslice.  */
140 	br_aes_ct_ortho(q);
141 	br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
142 	br_aes_ct_ortho(q);
143 
144 	/* Store output block.  */
145 	le32enc(out + 4*0, q[2*0]);
146 	le32enc(out + 4*1, q[2*1]);
147 	le32enc(out + 4*2, q[2*2]);
148 	le32enc(out + 4*3, q[2*3]);
149 
150 	/* Paranoia: Zero temporary buffers.  */
151 	explicit_memset(sk_exp, 0, sizeof sk_exp);
152 	explicit_memset(q, 0, sizeof q);
153 }
154 
155 static void
aesbear_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)156 aesbear_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
157     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
158     uint32_t nrounds)
159 {
160 	uint32_t sk_exp[120];
161 	uint32_t q[8];
162 	uint32_t cv0, cv1, cv2, cv3;
163 
164 	KASSERT(nbytes % 16 == 0);
165 
166 	/* Skip if there's nothing to do.  */
167 	if (nbytes == 0)
168 		return;
169 
170 	/* Expand round keys for bitslicing.  */
171 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
172 
173 	/* Initialize garbage block.  */
174 	q[1] = q[3] = q[5] = q[7] = 0;
175 
176 	/* Load IV.  */
177 	cv0 = le32dec(iv + 4*0);
178 	cv1 = le32dec(iv + 4*1);
179 	cv2 = le32dec(iv + 4*2);
180 	cv3 = le32dec(iv + 4*3);
181 
182 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
183 		/* Load input block and apply CV.  */
184 		q[2*0] = cv0 ^ le32dec(in + 4*0);
185 		q[2*1] = cv1 ^ le32dec(in + 4*1);
186 		q[2*2] = cv2 ^ le32dec(in + 4*2);
187 		q[2*3] = cv3 ^ le32dec(in + 4*3);
188 
189 		/* Transform to bitslice, encrypt, transform from bitslice.  */
190 		br_aes_ct_ortho(q);
191 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
192 		br_aes_ct_ortho(q);
193 
194 		/* Remember ciphertext as CV and store output block.  */
195 		cv0 = q[2*0];
196 		cv1 = q[2*1];
197 		cv2 = q[2*2];
198 		cv3 = q[2*3];
199 		le32enc(out + 4*0, cv0);
200 		le32enc(out + 4*1, cv1);
201 		le32enc(out + 4*2, cv2);
202 		le32enc(out + 4*3, cv3);
203 	}
204 
205 	/* Store updated IV.  */
206 	le32enc(iv + 4*0, cv0);
207 	le32enc(iv + 4*1, cv1);
208 	le32enc(iv + 4*2, cv2);
209 	le32enc(iv + 4*3, cv3);
210 
211 	/* Paranoia: Zero temporary buffers.  */
212 	explicit_memset(sk_exp, 0, sizeof sk_exp);
213 	explicit_memset(q, 0, sizeof q);
214 }
215 
216 static void
aesbear_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)217 aesbear_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
218     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
219     uint32_t nrounds)
220 {
221 	uint32_t sk_exp[120];
222 	uint32_t q[8];
223 	uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
224 
225 	KASSERT(nbytes % 16 == 0);
226 
227 	/* Skip if there's nothing to do.  */
228 	if (nbytes == 0)
229 		return;
230 
231 	/* Expand round keys for bitslicing.  */
232 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
233 
234 	/* Load the IV.  */
235 	iv0 = le32dec(iv + 4*0);
236 	iv1 = le32dec(iv + 4*1);
237 	iv2 = le32dec(iv + 4*2);
238 	iv3 = le32dec(iv + 4*3);
239 
240 	/* Load the last cipher block.  */
241 	cv0 = le32dec(in + nbytes - 16 + 4*0);
242 	cv1 = le32dec(in + nbytes - 16 + 4*1);
243 	cv2 = le32dec(in + nbytes - 16 + 4*2);
244 	cv3 = le32dec(in + nbytes - 16 + 4*3);
245 
246 	/* Store the updated IV.  */
247 	le32enc(iv + 4*0, cv0);
248 	le32enc(iv + 4*1, cv1);
249 	le32enc(iv + 4*2, cv2);
250 	le32enc(iv + 4*3, cv3);
251 
252 	/* Handle the last cipher block separately if odd number.  */
253 	if (nbytes % 32) {
254 		KASSERT(nbytes % 32 == 16);
255 
256 		/* Set up the last cipher block and a garbage block.  */
257 		q[2*0] = cv0;
258 		q[2*1] = cv1;
259 		q[2*2] = cv2;
260 		q[2*3] = cv3;
261 		q[1] = q[3] = q[5] = q[7] = 0;
262 
263 		/* Decrypt.  */
264 		br_aes_ct_ortho(q);
265 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
266 		br_aes_ct_ortho(q);
267 
268 		/* If this was the only cipher block, we're done.  */
269 		nbytes -= 16;
270 		if (nbytes == 0)
271 			goto out;
272 
273 		/*
274 		 * Otherwise, load up the penultimate cipher block, and
275 		 * store the output block.
276 		 */
277 		cv0 = le32dec(in + nbytes - 16 + 4*0);
278 		cv1 = le32dec(in + nbytes - 16 + 4*1);
279 		cv2 = le32dec(in + nbytes - 16 + 4*2);
280 		cv3 = le32dec(in + nbytes - 16 + 4*3);
281 		le32enc(out + nbytes + 4*0, cv0 ^ q[2*0]);
282 		le32enc(out + nbytes + 4*1, cv1 ^ q[2*1]);
283 		le32enc(out + nbytes + 4*2, cv2 ^ q[2*2]);
284 		le32enc(out + nbytes + 4*3, cv3 ^ q[2*3]);
285 	}
286 
287 	for (;;) {
288 		KASSERT(nbytes >= 32);
289 
290 		/*
291 		 * 1. Set up upper cipher block from cvN.
292 		 * 2. Load lower cipher block into cvN and set it up.
293 		 * 3. Decrypt.
294 		 */
295 		q[2*0 + 1] = cv0;
296 		q[2*1 + 1] = cv1;
297 		q[2*2 + 1] = cv2;
298 		q[2*3 + 1] = cv3;
299 		cv0 = q[2*0] = le32dec(in + nbytes - 32 + 4*0);
300 		cv1 = q[2*1] = le32dec(in + nbytes - 32 + 4*1);
301 		cv2 = q[2*2] = le32dec(in + nbytes - 32 + 4*2);
302 		cv3 = q[2*3] = le32dec(in + nbytes - 32 + 4*3);
303 
304 		br_aes_ct_ortho(q);
305 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
306 		br_aes_ct_ortho(q);
307 
308 		/* Store the upper output block.  */
309 		le32enc(out + nbytes - 16 + 4*0, q[2*0 + 1] ^ cv0);
310 		le32enc(out + nbytes - 16 + 4*1, q[2*1 + 1] ^ cv1);
311 		le32enc(out + nbytes - 16 + 4*2, q[2*2 + 1] ^ cv2);
312 		le32enc(out + nbytes - 16 + 4*3, q[2*3 + 1] ^ cv3);
313 
314 		/* Stop if we've reached the first output block.  */
315 		nbytes -= 32;
316 		if (nbytes == 0)
317 			goto out;
318 
319 		/*
320 		 * Load the preceding cipher block, and apply it as the
321 		 * chaining value to this one.
322 		 */
323 		cv0 = le32dec(in + nbytes - 16 + 4*0);
324 		cv1 = le32dec(in + nbytes - 16 + 4*1);
325 		cv2 = le32dec(in + nbytes - 16 + 4*2);
326 		cv3 = le32dec(in + nbytes - 16 + 4*3);
327 		le32enc(out + nbytes + 4*0, q[2*0] ^ cv0);
328 		le32enc(out + nbytes + 4*1, q[2*1] ^ cv1);
329 		le32enc(out + nbytes + 4*2, q[2*2] ^ cv2);
330 		le32enc(out + nbytes + 4*3, q[2*3] ^ cv3);
331 	}
332 
333 out:	/* Store the first output block.  */
334 	le32enc(out + 4*0, q[2*0] ^ iv0);
335 	le32enc(out + 4*1, q[2*1] ^ iv1);
336 	le32enc(out + 4*2, q[2*2] ^ iv2);
337 	le32enc(out + 4*3, q[2*3] ^ iv3);
338 
339 	/* Paranoia: Zero temporary buffers.  */
340 	explicit_memset(sk_exp, 0, sizeof sk_exp);
341 	explicit_memset(q, 0, sizeof q);
342 }
343 
344 static inline void
aesbear_xts_update(uint32_t * t0,uint32_t * t1,uint32_t * t2,uint32_t * t3)345 aesbear_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
346 {
347 	uint32_t s0, s1, s2, s3;
348 
349 	s0 = *t0 >> 31;
350 	s1 = *t1 >> 31;
351 	s2 = *t2 >> 31;
352 	s3 = *t3 >> 31;
353 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
354 	*t1 = (*t1 << 1) ^ s0;
355 	*t2 = (*t2 << 1) ^ s1;
356 	*t3 = (*t3 << 1) ^ s2;
357 }
358 
359 static int
aesbear_xts_update_selftest(void)360 aesbear_xts_update_selftest(void)
361 {
362 	static const struct {
363 		uint32_t in[4], out[4];
364 	} cases[] = {
365 		{ {1}, {2} },
366 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
367 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
368 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
369 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
370 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
371 	};
372 	unsigned i;
373 	uint32_t t0, t1, t2, t3;
374 
375 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
376 		t0 = cases[i].in[0];
377 		t1 = cases[i].in[1];
378 		t2 = cases[i].in[2];
379 		t3 = cases[i].in[3];
380 		aesbear_xts_update(&t0, &t1, &t2, &t3);
381 		if (t0 != cases[i].out[0] ||
382 		    t1 != cases[i].out[1] ||
383 		    t2 != cases[i].out[2] ||
384 		    t3 != cases[i].out[3])
385 			return -1;
386 	}
387 
388 	/* Success!  */
389 	return 0;
390 }
391 
392 static void
aesbear_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)393 aesbear_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
394     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
395     uint32_t nrounds)
396 {
397 	uint32_t sk_exp[120];
398 	uint32_t q[8];
399 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
400 
401 	KASSERT(nbytes % 16 == 0);
402 
403 	/* Skip if there's nothing to do.  */
404 	if (nbytes == 0)
405 		return;
406 
407 	/* Expand round keys for bitslicing.  */
408 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
409 
410 	/* Load tweak.  */
411 	t0 = le32dec(tweak + 4*0);
412 	t1 = le32dec(tweak + 4*1);
413 	t2 = le32dec(tweak + 4*2);
414 	t3 = le32dec(tweak + 4*3);
415 
416 	/* Handle the first block separately if odd number.  */
417 	if (nbytes % 32) {
418 		KASSERT(nbytes % 32 == 16);
419 
420 		/* Load up the first block and a garbage block.  */
421 		q[2*0] = le32dec(in + 4*0) ^ t0;
422 		q[2*1] = le32dec(in + 4*1) ^ t1;
423 		q[2*2] = le32dec(in + 4*2) ^ t2;
424 		q[2*3] = le32dec(in + 4*3) ^ t3;
425 		q[1] = q[3] = q[5] = q[7] = 0;
426 
427 		/* Encrypt two blocks.  */
428 		br_aes_ct_ortho(q);
429 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
430 		br_aes_ct_ortho(q);
431 
432 		/* Store the first cipher block.  */
433 		le32enc(out + 4*0, q[2*0] ^ t0);
434 		le32enc(out + 4*1, q[2*1] ^ t1);
435 		le32enc(out + 4*2, q[2*2] ^ t2);
436 		le32enc(out + 4*3, q[2*3] ^ t3);
437 
438 		/* Advance to the next block.  */
439 		aesbear_xts_update(&t0, &t1, &t2, &t3);
440 		if ((nbytes -= 16) == 0)
441 			goto out;
442 		in += 16;
443 		out += 16;
444 	}
445 
446 	do {
447 		KASSERT(nbytes >= 32);
448 
449 		/* Compute the upper tweak.  */
450 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
451 		aesbear_xts_update(&u0, &u1, &u2, &u3);
452 
453 		/* Load lower and upper blocks.  */
454 		q[2*0] = le32dec(in + 4*0) ^ t0;
455 		q[2*1] = le32dec(in + 4*1) ^ t1;
456 		q[2*2] = le32dec(in + 4*2) ^ t2;
457 		q[2*3] = le32dec(in + 4*3) ^ t3;
458 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
459 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
460 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
461 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
462 
463 		/* Encrypt two blocks.  */
464 		br_aes_ct_ortho(q);
465 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
466 		br_aes_ct_ortho(q);
467 
468 		/* Store lower and upper blocks.  */
469 		le32enc(out + 4*0, q[2*0] ^ t0);
470 		le32enc(out + 4*1, q[2*1] ^ t1);
471 		le32enc(out + 4*2, q[2*2] ^ t2);
472 		le32enc(out + 4*3, q[2*3] ^ t3);
473 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
474 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
475 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
476 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
477 
478 		/* Advance to the next pair of blocks.  */
479 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
480 		aesbear_xts_update(&t0, &t1, &t2, &t3);
481 		in += 32;
482 		out += 32;
483 	} while (nbytes -= 32, nbytes);
484 
485 out:	/* Store the updated tweak.  */
486 	le32enc(tweak + 4*0, t0);
487 	le32enc(tweak + 4*1, t1);
488 	le32enc(tweak + 4*2, t2);
489 	le32enc(tweak + 4*3, t3);
490 
491 	/* Paranoia: Zero temporary buffers.  */
492 	explicit_memset(sk_exp, 0, sizeof sk_exp);
493 	explicit_memset(q, 0, sizeof q);
494 }
495 
496 static void
aesbear_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)497 aesbear_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
498     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
499     uint32_t nrounds)
500 {
501 	uint32_t sk_exp[120];
502 	uint32_t q[8];
503 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
504 
505 	KASSERT(nbytes % 16 == 0);
506 
507 	/* Skip if there's nothing to do.  */
508 	if (nbytes == 0)
509 		return;
510 
511 	/* Expand round keys for bitslicing.  */
512 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
513 
514 	/* Load tweak.  */
515 	t0 = le32dec(tweak + 4*0);
516 	t1 = le32dec(tweak + 4*1);
517 	t2 = le32dec(tweak + 4*2);
518 	t3 = le32dec(tweak + 4*3);
519 
520 	/* Handle the first block separately if odd number.  */
521 	if (nbytes % 32) {
522 		KASSERT(nbytes % 32 == 16);
523 
524 		/* Load up the first block and a garbage block.  */
525 		q[2*0] = le32dec(in + 4*0) ^ t0;
526 		q[2*1] = le32dec(in + 4*1) ^ t1;
527 		q[2*2] = le32dec(in + 4*2) ^ t2;
528 		q[2*3] = le32dec(in + 4*3) ^ t3;
529 		q[1] = q[3] = q[5] = q[7] = 0;
530 
531 		/* Decrypt two blocks.  */
532 		br_aes_ct_ortho(q);
533 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
534 		br_aes_ct_ortho(q);
535 
536 		/* Store the first cipher block.  */
537 		le32enc(out + 4*0, q[2*0] ^ t0);
538 		le32enc(out + 4*1, q[2*1] ^ t1);
539 		le32enc(out + 4*2, q[2*2] ^ t2);
540 		le32enc(out + 4*3, q[2*3] ^ t3);
541 
542 		/* Advance to the next block.  */
543 		aesbear_xts_update(&t0, &t1, &t2, &t3);
544 		if ((nbytes -= 16) == 0)
545 			goto out;
546 		in += 16;
547 		out += 16;
548 	}
549 
550 	do {
551 		KASSERT(nbytes >= 32);
552 
553 		/* Compute the upper tweak.  */
554 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
555 		aesbear_xts_update(&u0, &u1, &u2, &u3);
556 
557 		/* Load lower and upper blocks.  */
558 		q[2*0] = le32dec(in + 4*0) ^ t0;
559 		q[2*1] = le32dec(in + 4*1) ^ t1;
560 		q[2*2] = le32dec(in + 4*2) ^ t2;
561 		q[2*3] = le32dec(in + 4*3) ^ t3;
562 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
563 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
564 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
565 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
566 
567 		/* Encrypt two blocks.  */
568 		br_aes_ct_ortho(q);
569 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
570 		br_aes_ct_ortho(q);
571 
572 		/* Store lower and upper blocks.  */
573 		le32enc(out + 4*0, q[2*0] ^ t0);
574 		le32enc(out + 4*1, q[2*1] ^ t1);
575 		le32enc(out + 4*2, q[2*2] ^ t2);
576 		le32enc(out + 4*3, q[2*3] ^ t3);
577 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
578 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
579 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
580 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
581 
582 		/* Advance to the next pair of blocks.  */
583 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
584 		aesbear_xts_update(&t0, &t1, &t2, &t3);
585 		in += 32;
586 		out += 32;
587 	} while (nbytes -= 32, nbytes);
588 
589 out:	/* Store the updated tweak.  */
590 	le32enc(tweak + 4*0, t0);
591 	le32enc(tweak + 4*1, t1);
592 	le32enc(tweak + 4*2, t2);
593 	le32enc(tweak + 4*3, t3);
594 
595 	/* Paranoia: Zero temporary buffers.  */
596 	explicit_memset(sk_exp, 0, sizeof sk_exp);
597 	explicit_memset(q, 0, sizeof q);
598 }
599 
600 static void
aesbear_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth[static16],uint32_t nrounds)601 aesbear_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
602     size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
603 {
604 	uint32_t sk_exp[120];
605 	uint32_t q[8];
606 
607 	KASSERT(nbytes);
608 	KASSERT(nbytes % 16 == 0);
609 
610 	/* Expand round keys for bitslicing.  */
611 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
612 
613 	/* Initialize garbage block.  */
614 	q[1] = q[3] = q[5] = q[7] = 0;
615 
616 	/* Load initial authenticator.  */
617 	q[2*0] = le32dec(auth + 4*0);
618 	q[2*1] = le32dec(auth + 4*1);
619 	q[2*2] = le32dec(auth + 4*2);
620 	q[2*3] = le32dec(auth + 4*3);
621 
622 	for (; nbytes; nbytes -= 16, in += 16) {
623 		/* Combine input block.  */
624 		q[2*0] ^= le32dec(in + 4*0);
625 		q[2*1] ^= le32dec(in + 4*1);
626 		q[2*2] ^= le32dec(in + 4*2);
627 		q[2*3] ^= le32dec(in + 4*3);
628 
629 		/* Transform to bitslice, encrypt, transform from bitslice.  */
630 		br_aes_ct_ortho(q);
631 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
632 		br_aes_ct_ortho(q);
633 	}
634 
635 	/* Store updated authenticator.  */
636 	le32enc(auth + 4*0, q[2*0]);
637 	le32enc(auth + 4*1, q[2*1]);
638 	le32enc(auth + 4*2, q[2*2]);
639 	le32enc(auth + 4*3, q[2*3]);
640 
641 	/* Paranoia: Zero temporary buffers.  */
642 	explicit_memset(sk_exp, 0, sizeof sk_exp);
643 	explicit_memset(q, 0, sizeof q);
644 }
645 
646 static void
aesbear_ccm_enc1(const struct aesenc * enc,const uint8_t * in,uint8_t * out,size_t nbytes,uint8_t authctr[32],uint32_t nrounds)647 aesbear_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
648     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
649 {
650 	uint32_t sk_exp[120];
651 	uint32_t q[8];
652 	uint32_t c0, c1, c2, c3;
653 
654 	KASSERT(nbytes);
655 	KASSERT(nbytes % 16 == 0);
656 
657 	/* Expand round keys for bitslicing.  */
658 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
659 
660 	/* Set first block to authenticator.  */
661 	q[2*0] = le32dec(authctr + 4*0);
662 	q[2*1] = le32dec(authctr + 4*1);
663 	q[2*2] = le32dec(authctr + 4*2);
664 	q[2*3] = le32dec(authctr + 4*3);
665 
666 	/* Load initial counter block, big-endian so we can increment it.  */
667 	c0 = le32dec(authctr + 16 + 4*0);
668 	c1 = le32dec(authctr + 16 + 4*1);
669 	c2 = le32dec(authctr + 16 + 4*2);
670 	c3 = be32dec(authctr + 16 + 4*3);
671 
672 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
673 		/* Update authenticator.  */
674 		q[2*0] ^= le32dec(in + 4*0);
675 		q[2*1] ^= le32dec(in + 4*1);
676 		q[2*2] ^= le32dec(in + 4*2);
677 		q[2*3] ^= le32dec(in + 4*3);
678 
679 		/* Increment 32-bit counter.  */
680 		q[2*0 + 1] = c0;
681 		q[2*1 + 1] = c1;
682 		q[2*2 + 1] = c2;
683 		q[2*3 + 1] = bswap32(++c3);
684 
685 		/* Encrypt authenticator and counter.  */
686 		br_aes_ct_ortho(q);
687 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
688 		br_aes_ct_ortho(q);
689 
690 		/* Encrypt with CTR output.  */
691 		le32enc(out + 4*0, le32dec(in + 4*0) ^ q[2*0 + 1]);
692 		le32enc(out + 4*1, le32dec(in + 4*1) ^ q[2*1 + 1]);
693 		le32enc(out + 4*2, le32dec(in + 4*2) ^ q[2*2 + 1]);
694 		le32enc(out + 4*3, le32dec(in + 4*3) ^ q[2*3 + 1]);
695 	}
696 
697 	/* Update authenticator.  */
698 	le32enc(authctr + 4*0, q[2*0]);
699 	le32enc(authctr + 4*1, q[2*1]);
700 	le32enc(authctr + 4*2, q[2*2]);
701 	le32enc(authctr + 4*3, q[2*3]);
702 
703 	/* Update counter.  */
704 	be32enc(authctr + 16 + 4*3, c3);
705 
706 	/* Paranoia: Zero temporary buffers.  */
707 	explicit_memset(sk_exp, 0, sizeof sk_exp);
708 	explicit_memset(q, 0, sizeof q);
709 }
710 
711 static void
aesbear_ccm_dec1(const struct aesenc * enc,const uint8_t * in,uint8_t * out,size_t nbytes,uint8_t authctr[32],uint32_t nrounds)712 aesbear_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
713     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
714 {
715 	uint32_t sk_exp[120];
716 	uint32_t q[8];
717 	uint32_t c0, c1, c2, c3;
718 	uint32_t b0, b1, b2, b3;
719 
720 	KASSERT(nbytes);
721 	KASSERT(nbytes % 16 == 0);
722 
723 	/* Expand round keys for bitslicing.  */
724 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
725 
726 	/* Load initial counter block, big-endian so we can increment it.  */
727 	c0 = le32dec(authctr + 16 + 4*0);
728 	c1 = le32dec(authctr + 16 + 4*1);
729 	c2 = le32dec(authctr + 16 + 4*2);
730 	c3 = be32dec(authctr + 16 + 4*3);
731 
732 	/* Increment 32-bit counter.  */
733 	q[2*0] = c0;
734 	q[2*1] = c1;
735 	q[2*2] = c2;
736 	q[2*3] = bswap32(++c3);
737 
738 	/*
739 	 * Set the second block to garbage -- we don't have any
740 	 * plaintext to authenticate yet.
741 	 */
742 	q[1] = q[3] = q[5] = q[7] = 0;
743 
744 	/* Encrypt first CTR.  */
745 	br_aes_ct_ortho(q);
746 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
747 	br_aes_ct_ortho(q);
748 
749 	/* Load the initial authenticator.  */
750 	q[2*0 + 1] = le32dec(authctr + 4*0);
751 	q[2*1 + 1] = le32dec(authctr + 4*1);
752 	q[2*2 + 1] = le32dec(authctr + 4*2);
753 	q[2*3 + 1] = le32dec(authctr + 4*3);
754 
755 	for (;; in += 16, out += 16) {
756 		/* Decrypt the block.  */
757 		b0 = le32dec(in + 4*0) ^ q[2*0];
758 		b1 = le32dec(in + 4*1) ^ q[2*1];
759 		b2 = le32dec(in + 4*2) ^ q[2*2];
760 		b3 = le32dec(in + 4*3) ^ q[2*3];
761 
762 		/* Update authenticator.  */
763 		q[2*0 + 1] ^= b0;
764 		q[2*1 + 1] ^= b1;
765 		q[2*2 + 1] ^= b2;
766 		q[2*3 + 1] ^= b3;
767 
768 		/* Store plaintext.  */
769 		le32enc(out + 4*0, b0);
770 		le32enc(out + 4*1, b1);
771 		le32enc(out + 4*2, b2);
772 		le32enc(out + 4*3, b3);
773 
774 		/* If this is the last block, stop.  */
775 		if ((nbytes -= 16) == 0)
776 			break;
777 
778 		/* Increment 32-bit counter.  */
779 		q[2*0] = c0;
780 		q[2*1] = c1;
781 		q[2*2] = c2;
782 		q[2*3] = bswap32(++c3);
783 
784 		/* Authenticate previous plaintext, encrypt next CTR.  */
785 		br_aes_ct_ortho(q);
786 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
787 		br_aes_ct_ortho(q);
788 	}
789 
790 	/*
791 	 * Authenticate last plaintext.  We're only doing this for the
792 	 * authenticator, not for the counter, so don't bother to
793 	 * initialize q[2*i].  (Even for the sake of sanitizers,
794 	 * they're already initialized to something by now.)
795 	 */
796 	br_aes_ct_ortho(q);
797 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
798 	br_aes_ct_ortho(q);
799 
800 	/* Update authenticator.  */
801 	le32enc(authctr + 4*0, q[2*0 + 1]);
802 	le32enc(authctr + 4*1, q[2*1 + 1]);
803 	le32enc(authctr + 4*2, q[2*2 + 1]);
804 	le32enc(authctr + 4*3, q[2*3 + 1]);
805 
806 	/* Update counter.  */
807 	be32enc(authctr + 16 + 4*3, c3);
808 
809 	/* Paranoia: Zero temporary buffers.  */
810 	explicit_memset(sk_exp, 0, sizeof sk_exp);
811 	explicit_memset(q, 0, sizeof q);
812 }
813 
814 static int
aesbear_probe(void)815 aesbear_probe(void)
816 {
817 
818 	if (aesbear_xts_update_selftest())
819 		return -1;
820 
821 	/* XXX test br_aes_ct_bitslice_decrypt */
822 	/* XXX test br_aes_ct_bitslice_encrypt */
823 	/* XXX test br_aes_ct_keysched */
824 	/* XXX test br_aes_ct_ortho */
825 	/* XXX test br_aes_ct_skey_expand */
826 
827 	return 0;
828 }
829 
830 struct aes_impl aes_bear_impl = {
831 	.ai_name = "BearSSL aes_ct",
832 	.ai_probe = aesbear_probe,
833 	.ai_setenckey = aesbear_setenckey,
834 	.ai_setdeckey = aesbear_setdeckey,
835 	.ai_enc = aesbear_enc,
836 	.ai_dec = aesbear_dec,
837 	.ai_cbc_enc = aesbear_cbc_enc,
838 	.ai_cbc_dec = aesbear_cbc_dec,
839 	.ai_xts_enc = aesbear_xts_enc,
840 	.ai_xts_dec = aesbear_xts_dec,
841 	.ai_cbcmac_update1 = aesbear_cbcmac_update1,
842 	.ai_ccm_enc1 = aesbear_ccm_enc1,
843 	.ai_ccm_dec1 = aesbear_ccm_dec1,
844 };
845