1 /* $NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $");
31
32 #include <sys/types.h>
33 #include <sys/endian.h>
34
35 #ifdef _KERNEL
36 #include <sys/systm.h>
37 #else
38 #include <assert.h>
39 #include <err.h>
40 #include <string.h>
41 #define KASSERT assert
42 #define panic(fmt, args...) err(1, fmt, args)
43 #endif
44
45 #include <crypto/aes/aes.h>
46 #include <crypto/aes/aes_bear.h>
47 #include <crypto/aes/aes_impl.h>
48
49 static void
aesbear_setkey(uint32_t rk[static60],const void * key,uint32_t nrounds)50 aesbear_setkey(uint32_t rk[static 60], const void *key, uint32_t nrounds)
51 {
52 size_t key_len;
53
54 switch (nrounds) {
55 case 10:
56 key_len = 16;
57 break;
58 case 12:
59 key_len = 24;
60 break;
61 case 14:
62 key_len = 32;
63 break;
64 default:
65 panic("invalid AES nrounds: %u", nrounds);
66 }
67
68 br_aes_ct_keysched(rk, key, key_len);
69 }
70
71 static void
aesbear_setenckey(struct aesenc * enc,const uint8_t * key,uint32_t nrounds)72 aesbear_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
73 {
74
75 aesbear_setkey(enc->aese_aes.aes_rk, key, nrounds);
76 }
77
78 static void
aesbear_setdeckey(struct aesdec * dec,const uint8_t * key,uint32_t nrounds)79 aesbear_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
80 {
81
82 /*
83 * BearSSL computes InvMixColumns on the fly -- no need for
84 * distinct decryption round keys.
85 */
86 aesbear_setkey(dec->aesd_aes.aes_rk, key, nrounds);
87 }
88
89 static void
aesbear_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)90 aesbear_enc(const struct aesenc *enc, const uint8_t in[static 16],
91 uint8_t out[static 16], uint32_t nrounds)
92 {
93 uint32_t sk_exp[120];
94 uint32_t q[8];
95
96 /* Expand round keys for bitslicing. */
97 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
98
99 /* Load input block interleaved with garbage block. */
100 q[2*0] = le32dec(in + 4*0);
101 q[2*1] = le32dec(in + 4*1);
102 q[2*2] = le32dec(in + 4*2);
103 q[2*3] = le32dec(in + 4*3);
104 q[1] = q[3] = q[5] = q[7] = 0;
105
106 /* Transform to bitslice, decrypt, transform from bitslice. */
107 br_aes_ct_ortho(q);
108 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
109 br_aes_ct_ortho(q);
110
111 /* Store output block. */
112 le32enc(out + 4*0, q[2*0]);
113 le32enc(out + 4*1, q[2*1]);
114 le32enc(out + 4*2, q[2*2]);
115 le32enc(out + 4*3, q[2*3]);
116
117 /* Paranoia: Zero temporary buffers. */
118 explicit_memset(sk_exp, 0, sizeof sk_exp);
119 explicit_memset(q, 0, sizeof q);
120 }
121
122 static void
aesbear_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)123 aesbear_dec(const struct aesdec *dec, const uint8_t in[static 16],
124 uint8_t out[static 16], uint32_t nrounds)
125 {
126 uint32_t sk_exp[120];
127 uint32_t q[8];
128
129 /* Expand round keys for bitslicing. */
130 br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
131
132 /* Load input block interleaved with garbage. */
133 q[2*0] = le32dec(in + 4*0);
134 q[2*1] = le32dec(in + 4*1);
135 q[2*2] = le32dec(in + 4*2);
136 q[2*3] = le32dec(in + 4*3);
137 q[1] = q[3] = q[5] = q[7] = 0;
138
139 /* Transform to bitslice, decrypt, transform from bitslice. */
140 br_aes_ct_ortho(q);
141 br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
142 br_aes_ct_ortho(q);
143
144 /* Store output block. */
145 le32enc(out + 4*0, q[2*0]);
146 le32enc(out + 4*1, q[2*1]);
147 le32enc(out + 4*2, q[2*2]);
148 le32enc(out + 4*3, q[2*3]);
149
150 /* Paranoia: Zero temporary buffers. */
151 explicit_memset(sk_exp, 0, sizeof sk_exp);
152 explicit_memset(q, 0, sizeof q);
153 }
154
155 static void
aesbear_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)156 aesbear_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
157 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
158 uint32_t nrounds)
159 {
160 uint32_t sk_exp[120];
161 uint32_t q[8];
162 uint32_t cv0, cv1, cv2, cv3;
163
164 KASSERT(nbytes % 16 == 0);
165
166 /* Skip if there's nothing to do. */
167 if (nbytes == 0)
168 return;
169
170 /* Expand round keys for bitslicing. */
171 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
172
173 /* Initialize garbage block. */
174 q[1] = q[3] = q[5] = q[7] = 0;
175
176 /* Load IV. */
177 cv0 = le32dec(iv + 4*0);
178 cv1 = le32dec(iv + 4*1);
179 cv2 = le32dec(iv + 4*2);
180 cv3 = le32dec(iv + 4*3);
181
182 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
183 /* Load input block and apply CV. */
184 q[2*0] = cv0 ^ le32dec(in + 4*0);
185 q[2*1] = cv1 ^ le32dec(in + 4*1);
186 q[2*2] = cv2 ^ le32dec(in + 4*2);
187 q[2*3] = cv3 ^ le32dec(in + 4*3);
188
189 /* Transform to bitslice, encrypt, transform from bitslice. */
190 br_aes_ct_ortho(q);
191 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
192 br_aes_ct_ortho(q);
193
194 /* Remember ciphertext as CV and store output block. */
195 cv0 = q[2*0];
196 cv1 = q[2*1];
197 cv2 = q[2*2];
198 cv3 = q[2*3];
199 le32enc(out + 4*0, cv0);
200 le32enc(out + 4*1, cv1);
201 le32enc(out + 4*2, cv2);
202 le32enc(out + 4*3, cv3);
203 }
204
205 /* Store updated IV. */
206 le32enc(iv + 4*0, cv0);
207 le32enc(iv + 4*1, cv1);
208 le32enc(iv + 4*2, cv2);
209 le32enc(iv + 4*3, cv3);
210
211 /* Paranoia: Zero temporary buffers. */
212 explicit_memset(sk_exp, 0, sizeof sk_exp);
213 explicit_memset(q, 0, sizeof q);
214 }
215
216 static void
aesbear_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)217 aesbear_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
218 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
219 uint32_t nrounds)
220 {
221 uint32_t sk_exp[120];
222 uint32_t q[8];
223 uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
224
225 KASSERT(nbytes % 16 == 0);
226
227 /* Skip if there's nothing to do. */
228 if (nbytes == 0)
229 return;
230
231 /* Expand round keys for bitslicing. */
232 br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
233
234 /* Load the IV. */
235 iv0 = le32dec(iv + 4*0);
236 iv1 = le32dec(iv + 4*1);
237 iv2 = le32dec(iv + 4*2);
238 iv3 = le32dec(iv + 4*3);
239
240 /* Load the last cipher block. */
241 cv0 = le32dec(in + nbytes - 16 + 4*0);
242 cv1 = le32dec(in + nbytes - 16 + 4*1);
243 cv2 = le32dec(in + nbytes - 16 + 4*2);
244 cv3 = le32dec(in + nbytes - 16 + 4*3);
245
246 /* Store the updated IV. */
247 le32enc(iv + 4*0, cv0);
248 le32enc(iv + 4*1, cv1);
249 le32enc(iv + 4*2, cv2);
250 le32enc(iv + 4*3, cv3);
251
252 /* Handle the last cipher block separately if odd number. */
253 if (nbytes % 32) {
254 KASSERT(nbytes % 32 == 16);
255
256 /* Set up the last cipher block and a garbage block. */
257 q[2*0] = cv0;
258 q[2*1] = cv1;
259 q[2*2] = cv2;
260 q[2*3] = cv3;
261 q[1] = q[3] = q[5] = q[7] = 0;
262
263 /* Decrypt. */
264 br_aes_ct_ortho(q);
265 br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
266 br_aes_ct_ortho(q);
267
268 /* If this was the only cipher block, we're done. */
269 nbytes -= 16;
270 if (nbytes == 0)
271 goto out;
272
273 /*
274 * Otherwise, load up the penultimate cipher block, and
275 * store the output block.
276 */
277 cv0 = le32dec(in + nbytes - 16 + 4*0);
278 cv1 = le32dec(in + nbytes - 16 + 4*1);
279 cv2 = le32dec(in + nbytes - 16 + 4*2);
280 cv3 = le32dec(in + nbytes - 16 + 4*3);
281 le32enc(out + nbytes + 4*0, cv0 ^ q[2*0]);
282 le32enc(out + nbytes + 4*1, cv1 ^ q[2*1]);
283 le32enc(out + nbytes + 4*2, cv2 ^ q[2*2]);
284 le32enc(out + nbytes + 4*3, cv3 ^ q[2*3]);
285 }
286
287 for (;;) {
288 KASSERT(nbytes >= 32);
289
290 /*
291 * 1. Set up upper cipher block from cvN.
292 * 2. Load lower cipher block into cvN and set it up.
293 * 3. Decrypt.
294 */
295 q[2*0 + 1] = cv0;
296 q[2*1 + 1] = cv1;
297 q[2*2 + 1] = cv2;
298 q[2*3 + 1] = cv3;
299 cv0 = q[2*0] = le32dec(in + nbytes - 32 + 4*0);
300 cv1 = q[2*1] = le32dec(in + nbytes - 32 + 4*1);
301 cv2 = q[2*2] = le32dec(in + nbytes - 32 + 4*2);
302 cv3 = q[2*3] = le32dec(in + nbytes - 32 + 4*3);
303
304 br_aes_ct_ortho(q);
305 br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
306 br_aes_ct_ortho(q);
307
308 /* Store the upper output block. */
309 le32enc(out + nbytes - 16 + 4*0, q[2*0 + 1] ^ cv0);
310 le32enc(out + nbytes - 16 + 4*1, q[2*1 + 1] ^ cv1);
311 le32enc(out + nbytes - 16 + 4*2, q[2*2 + 1] ^ cv2);
312 le32enc(out + nbytes - 16 + 4*3, q[2*3 + 1] ^ cv3);
313
314 /* Stop if we've reached the first output block. */
315 nbytes -= 32;
316 if (nbytes == 0)
317 goto out;
318
319 /*
320 * Load the preceding cipher block, and apply it as the
321 * chaining value to this one.
322 */
323 cv0 = le32dec(in + nbytes - 16 + 4*0);
324 cv1 = le32dec(in + nbytes - 16 + 4*1);
325 cv2 = le32dec(in + nbytes - 16 + 4*2);
326 cv3 = le32dec(in + nbytes - 16 + 4*3);
327 le32enc(out + nbytes + 4*0, q[2*0] ^ cv0);
328 le32enc(out + nbytes + 4*1, q[2*1] ^ cv1);
329 le32enc(out + nbytes + 4*2, q[2*2] ^ cv2);
330 le32enc(out + nbytes + 4*3, q[2*3] ^ cv3);
331 }
332
333 out: /* Store the first output block. */
334 le32enc(out + 4*0, q[2*0] ^ iv0);
335 le32enc(out + 4*1, q[2*1] ^ iv1);
336 le32enc(out + 4*2, q[2*2] ^ iv2);
337 le32enc(out + 4*3, q[2*3] ^ iv3);
338
339 /* Paranoia: Zero temporary buffers. */
340 explicit_memset(sk_exp, 0, sizeof sk_exp);
341 explicit_memset(q, 0, sizeof q);
342 }
343
344 static inline void
aesbear_xts_update(uint32_t * t0,uint32_t * t1,uint32_t * t2,uint32_t * t3)345 aesbear_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
346 {
347 uint32_t s0, s1, s2, s3;
348
349 s0 = *t0 >> 31;
350 s1 = *t1 >> 31;
351 s2 = *t2 >> 31;
352 s3 = *t3 >> 31;
353 *t0 = (*t0 << 1) ^ (-s3 & 0x87);
354 *t1 = (*t1 << 1) ^ s0;
355 *t2 = (*t2 << 1) ^ s1;
356 *t3 = (*t3 << 1) ^ s2;
357 }
358
359 static int
aesbear_xts_update_selftest(void)360 aesbear_xts_update_selftest(void)
361 {
362 static const struct {
363 uint32_t in[4], out[4];
364 } cases[] = {
365 { {1}, {2} },
366 { {0x80000000U,0,0,0}, {0,1,0,0} },
367 { {0,0x80000000U,0,0}, {0,0,1,0} },
368 { {0,0,0x80000000U,0}, {0,0,0,1} },
369 { {0,0,0,0x80000000U}, {0x87,0,0,0} },
370 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
371 };
372 unsigned i;
373 uint32_t t0, t1, t2, t3;
374
375 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
376 t0 = cases[i].in[0];
377 t1 = cases[i].in[1];
378 t2 = cases[i].in[2];
379 t3 = cases[i].in[3];
380 aesbear_xts_update(&t0, &t1, &t2, &t3);
381 if (t0 != cases[i].out[0] ||
382 t1 != cases[i].out[1] ||
383 t2 != cases[i].out[2] ||
384 t3 != cases[i].out[3])
385 return -1;
386 }
387
388 /* Success! */
389 return 0;
390 }
391
392 static void
aesbear_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)393 aesbear_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
394 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
395 uint32_t nrounds)
396 {
397 uint32_t sk_exp[120];
398 uint32_t q[8];
399 uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
400
401 KASSERT(nbytes % 16 == 0);
402
403 /* Skip if there's nothing to do. */
404 if (nbytes == 0)
405 return;
406
407 /* Expand round keys for bitslicing. */
408 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
409
410 /* Load tweak. */
411 t0 = le32dec(tweak + 4*0);
412 t1 = le32dec(tweak + 4*1);
413 t2 = le32dec(tweak + 4*2);
414 t3 = le32dec(tweak + 4*3);
415
416 /* Handle the first block separately if odd number. */
417 if (nbytes % 32) {
418 KASSERT(nbytes % 32 == 16);
419
420 /* Load up the first block and a garbage block. */
421 q[2*0] = le32dec(in + 4*0) ^ t0;
422 q[2*1] = le32dec(in + 4*1) ^ t1;
423 q[2*2] = le32dec(in + 4*2) ^ t2;
424 q[2*3] = le32dec(in + 4*3) ^ t3;
425 q[1] = q[3] = q[5] = q[7] = 0;
426
427 /* Encrypt two blocks. */
428 br_aes_ct_ortho(q);
429 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
430 br_aes_ct_ortho(q);
431
432 /* Store the first cipher block. */
433 le32enc(out + 4*0, q[2*0] ^ t0);
434 le32enc(out + 4*1, q[2*1] ^ t1);
435 le32enc(out + 4*2, q[2*2] ^ t2);
436 le32enc(out + 4*3, q[2*3] ^ t3);
437
438 /* Advance to the next block. */
439 aesbear_xts_update(&t0, &t1, &t2, &t3);
440 if ((nbytes -= 16) == 0)
441 goto out;
442 in += 16;
443 out += 16;
444 }
445
446 do {
447 KASSERT(nbytes >= 32);
448
449 /* Compute the upper tweak. */
450 u0 = t0; u1 = t1; u2 = t2; u3 = t3;
451 aesbear_xts_update(&u0, &u1, &u2, &u3);
452
453 /* Load lower and upper blocks. */
454 q[2*0] = le32dec(in + 4*0) ^ t0;
455 q[2*1] = le32dec(in + 4*1) ^ t1;
456 q[2*2] = le32dec(in + 4*2) ^ t2;
457 q[2*3] = le32dec(in + 4*3) ^ t3;
458 q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
459 q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
460 q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
461 q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
462
463 /* Encrypt two blocks. */
464 br_aes_ct_ortho(q);
465 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
466 br_aes_ct_ortho(q);
467
468 /* Store lower and upper blocks. */
469 le32enc(out + 4*0, q[2*0] ^ t0);
470 le32enc(out + 4*1, q[2*1] ^ t1);
471 le32enc(out + 4*2, q[2*2] ^ t2);
472 le32enc(out + 4*3, q[2*3] ^ t3);
473 le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
474 le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
475 le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
476 le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
477
478 /* Advance to the next pair of blocks. */
479 t0 = u0; t1 = u1; t2 = u2; t3 = u3;
480 aesbear_xts_update(&t0, &t1, &t2, &t3);
481 in += 32;
482 out += 32;
483 } while (nbytes -= 32, nbytes);
484
485 out: /* Store the updated tweak. */
486 le32enc(tweak + 4*0, t0);
487 le32enc(tweak + 4*1, t1);
488 le32enc(tweak + 4*2, t2);
489 le32enc(tweak + 4*3, t3);
490
491 /* Paranoia: Zero temporary buffers. */
492 explicit_memset(sk_exp, 0, sizeof sk_exp);
493 explicit_memset(q, 0, sizeof q);
494 }
495
496 static void
aesbear_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)497 aesbear_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
498 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
499 uint32_t nrounds)
500 {
501 uint32_t sk_exp[120];
502 uint32_t q[8];
503 uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
504
505 KASSERT(nbytes % 16 == 0);
506
507 /* Skip if there's nothing to do. */
508 if (nbytes == 0)
509 return;
510
511 /* Expand round keys for bitslicing. */
512 br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
513
514 /* Load tweak. */
515 t0 = le32dec(tweak + 4*0);
516 t1 = le32dec(tweak + 4*1);
517 t2 = le32dec(tweak + 4*2);
518 t3 = le32dec(tweak + 4*3);
519
520 /* Handle the first block separately if odd number. */
521 if (nbytes % 32) {
522 KASSERT(nbytes % 32 == 16);
523
524 /* Load up the first block and a garbage block. */
525 q[2*0] = le32dec(in + 4*0) ^ t0;
526 q[2*1] = le32dec(in + 4*1) ^ t1;
527 q[2*2] = le32dec(in + 4*2) ^ t2;
528 q[2*3] = le32dec(in + 4*3) ^ t3;
529 q[1] = q[3] = q[5] = q[7] = 0;
530
531 /* Decrypt two blocks. */
532 br_aes_ct_ortho(q);
533 br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
534 br_aes_ct_ortho(q);
535
536 /* Store the first cipher block. */
537 le32enc(out + 4*0, q[2*0] ^ t0);
538 le32enc(out + 4*1, q[2*1] ^ t1);
539 le32enc(out + 4*2, q[2*2] ^ t2);
540 le32enc(out + 4*3, q[2*3] ^ t3);
541
542 /* Advance to the next block. */
543 aesbear_xts_update(&t0, &t1, &t2, &t3);
544 if ((nbytes -= 16) == 0)
545 goto out;
546 in += 16;
547 out += 16;
548 }
549
550 do {
551 KASSERT(nbytes >= 32);
552
553 /* Compute the upper tweak. */
554 u0 = t0; u1 = t1; u2 = t2; u3 = t3;
555 aesbear_xts_update(&u0, &u1, &u2, &u3);
556
557 /* Load lower and upper blocks. */
558 q[2*0] = le32dec(in + 4*0) ^ t0;
559 q[2*1] = le32dec(in + 4*1) ^ t1;
560 q[2*2] = le32dec(in + 4*2) ^ t2;
561 q[2*3] = le32dec(in + 4*3) ^ t3;
562 q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
563 q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
564 q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
565 q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
566
567 /* Encrypt two blocks. */
568 br_aes_ct_ortho(q);
569 br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
570 br_aes_ct_ortho(q);
571
572 /* Store lower and upper blocks. */
573 le32enc(out + 4*0, q[2*0] ^ t0);
574 le32enc(out + 4*1, q[2*1] ^ t1);
575 le32enc(out + 4*2, q[2*2] ^ t2);
576 le32enc(out + 4*3, q[2*3] ^ t3);
577 le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
578 le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
579 le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
580 le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
581
582 /* Advance to the next pair of blocks. */
583 t0 = u0; t1 = u1; t2 = u2; t3 = u3;
584 aesbear_xts_update(&t0, &t1, &t2, &t3);
585 in += 32;
586 out += 32;
587 } while (nbytes -= 32, nbytes);
588
589 out: /* Store the updated tweak. */
590 le32enc(tweak + 4*0, t0);
591 le32enc(tweak + 4*1, t1);
592 le32enc(tweak + 4*2, t2);
593 le32enc(tweak + 4*3, t3);
594
595 /* Paranoia: Zero temporary buffers. */
596 explicit_memset(sk_exp, 0, sizeof sk_exp);
597 explicit_memset(q, 0, sizeof q);
598 }
599
600 static void
aesbear_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth[static16],uint32_t nrounds)601 aesbear_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
602 size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
603 {
604 uint32_t sk_exp[120];
605 uint32_t q[8];
606
607 KASSERT(nbytes);
608 KASSERT(nbytes % 16 == 0);
609
610 /* Expand round keys for bitslicing. */
611 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
612
613 /* Initialize garbage block. */
614 q[1] = q[3] = q[5] = q[7] = 0;
615
616 /* Load initial authenticator. */
617 q[2*0] = le32dec(auth + 4*0);
618 q[2*1] = le32dec(auth + 4*1);
619 q[2*2] = le32dec(auth + 4*2);
620 q[2*3] = le32dec(auth + 4*3);
621
622 for (; nbytes; nbytes -= 16, in += 16) {
623 /* Combine input block. */
624 q[2*0] ^= le32dec(in + 4*0);
625 q[2*1] ^= le32dec(in + 4*1);
626 q[2*2] ^= le32dec(in + 4*2);
627 q[2*3] ^= le32dec(in + 4*3);
628
629 /* Transform to bitslice, encrypt, transform from bitslice. */
630 br_aes_ct_ortho(q);
631 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
632 br_aes_ct_ortho(q);
633 }
634
635 /* Store updated authenticator. */
636 le32enc(auth + 4*0, q[2*0]);
637 le32enc(auth + 4*1, q[2*1]);
638 le32enc(auth + 4*2, q[2*2]);
639 le32enc(auth + 4*3, q[2*3]);
640
641 /* Paranoia: Zero temporary buffers. */
642 explicit_memset(sk_exp, 0, sizeof sk_exp);
643 explicit_memset(q, 0, sizeof q);
644 }
645
646 static void
aesbear_ccm_enc1(const struct aesenc * enc,const uint8_t * in,uint8_t * out,size_t nbytes,uint8_t authctr[32],uint32_t nrounds)647 aesbear_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
648 size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
649 {
650 uint32_t sk_exp[120];
651 uint32_t q[8];
652 uint32_t c0, c1, c2, c3;
653
654 KASSERT(nbytes);
655 KASSERT(nbytes % 16 == 0);
656
657 /* Expand round keys for bitslicing. */
658 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
659
660 /* Set first block to authenticator. */
661 q[2*0] = le32dec(authctr + 4*0);
662 q[2*1] = le32dec(authctr + 4*1);
663 q[2*2] = le32dec(authctr + 4*2);
664 q[2*3] = le32dec(authctr + 4*3);
665
666 /* Load initial counter block, big-endian so we can increment it. */
667 c0 = le32dec(authctr + 16 + 4*0);
668 c1 = le32dec(authctr + 16 + 4*1);
669 c2 = le32dec(authctr + 16 + 4*2);
670 c3 = be32dec(authctr + 16 + 4*3);
671
672 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
673 /* Update authenticator. */
674 q[2*0] ^= le32dec(in + 4*0);
675 q[2*1] ^= le32dec(in + 4*1);
676 q[2*2] ^= le32dec(in + 4*2);
677 q[2*3] ^= le32dec(in + 4*3);
678
679 /* Increment 32-bit counter. */
680 q[2*0 + 1] = c0;
681 q[2*1 + 1] = c1;
682 q[2*2 + 1] = c2;
683 q[2*3 + 1] = bswap32(++c3);
684
685 /* Encrypt authenticator and counter. */
686 br_aes_ct_ortho(q);
687 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
688 br_aes_ct_ortho(q);
689
690 /* Encrypt with CTR output. */
691 le32enc(out + 4*0, le32dec(in + 4*0) ^ q[2*0 + 1]);
692 le32enc(out + 4*1, le32dec(in + 4*1) ^ q[2*1 + 1]);
693 le32enc(out + 4*2, le32dec(in + 4*2) ^ q[2*2 + 1]);
694 le32enc(out + 4*3, le32dec(in + 4*3) ^ q[2*3 + 1]);
695 }
696
697 /* Update authenticator. */
698 le32enc(authctr + 4*0, q[2*0]);
699 le32enc(authctr + 4*1, q[2*1]);
700 le32enc(authctr + 4*2, q[2*2]);
701 le32enc(authctr + 4*3, q[2*3]);
702
703 /* Update counter. */
704 be32enc(authctr + 16 + 4*3, c3);
705
706 /* Paranoia: Zero temporary buffers. */
707 explicit_memset(sk_exp, 0, sizeof sk_exp);
708 explicit_memset(q, 0, sizeof q);
709 }
710
711 static void
aesbear_ccm_dec1(const struct aesenc * enc,const uint8_t * in,uint8_t * out,size_t nbytes,uint8_t authctr[32],uint32_t nrounds)712 aesbear_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
713 size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
714 {
715 uint32_t sk_exp[120];
716 uint32_t q[8];
717 uint32_t c0, c1, c2, c3;
718 uint32_t b0, b1, b2, b3;
719
720 KASSERT(nbytes);
721 KASSERT(nbytes % 16 == 0);
722
723 /* Expand round keys for bitslicing. */
724 br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
725
726 /* Load initial counter block, big-endian so we can increment it. */
727 c0 = le32dec(authctr + 16 + 4*0);
728 c1 = le32dec(authctr + 16 + 4*1);
729 c2 = le32dec(authctr + 16 + 4*2);
730 c3 = be32dec(authctr + 16 + 4*3);
731
732 /* Increment 32-bit counter. */
733 q[2*0] = c0;
734 q[2*1] = c1;
735 q[2*2] = c2;
736 q[2*3] = bswap32(++c3);
737
738 /*
739 * Set the second block to garbage -- we don't have any
740 * plaintext to authenticate yet.
741 */
742 q[1] = q[3] = q[5] = q[7] = 0;
743
744 /* Encrypt first CTR. */
745 br_aes_ct_ortho(q);
746 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
747 br_aes_ct_ortho(q);
748
749 /* Load the initial authenticator. */
750 q[2*0 + 1] = le32dec(authctr + 4*0);
751 q[2*1 + 1] = le32dec(authctr + 4*1);
752 q[2*2 + 1] = le32dec(authctr + 4*2);
753 q[2*3 + 1] = le32dec(authctr + 4*3);
754
755 for (;; in += 16, out += 16) {
756 /* Decrypt the block. */
757 b0 = le32dec(in + 4*0) ^ q[2*0];
758 b1 = le32dec(in + 4*1) ^ q[2*1];
759 b2 = le32dec(in + 4*2) ^ q[2*2];
760 b3 = le32dec(in + 4*3) ^ q[2*3];
761
762 /* Update authenticator. */
763 q[2*0 + 1] ^= b0;
764 q[2*1 + 1] ^= b1;
765 q[2*2 + 1] ^= b2;
766 q[2*3 + 1] ^= b3;
767
768 /* Store plaintext. */
769 le32enc(out + 4*0, b0);
770 le32enc(out + 4*1, b1);
771 le32enc(out + 4*2, b2);
772 le32enc(out + 4*3, b3);
773
774 /* If this is the last block, stop. */
775 if ((nbytes -= 16) == 0)
776 break;
777
778 /* Increment 32-bit counter. */
779 q[2*0] = c0;
780 q[2*1] = c1;
781 q[2*2] = c2;
782 q[2*3] = bswap32(++c3);
783
784 /* Authenticate previous plaintext, encrypt next CTR. */
785 br_aes_ct_ortho(q);
786 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
787 br_aes_ct_ortho(q);
788 }
789
790 /*
791 * Authenticate last plaintext. We're only doing this for the
792 * authenticator, not for the counter, so don't bother to
793 * initialize q[2*i]. (Even for the sake of sanitizers,
794 * they're already initialized to something by now.)
795 */
796 br_aes_ct_ortho(q);
797 br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
798 br_aes_ct_ortho(q);
799
800 /* Update authenticator. */
801 le32enc(authctr + 4*0, q[2*0 + 1]);
802 le32enc(authctr + 4*1, q[2*1 + 1]);
803 le32enc(authctr + 4*2, q[2*2 + 1]);
804 le32enc(authctr + 4*3, q[2*3 + 1]);
805
806 /* Update counter. */
807 be32enc(authctr + 16 + 4*3, c3);
808
809 /* Paranoia: Zero temporary buffers. */
810 explicit_memset(sk_exp, 0, sizeof sk_exp);
811 explicit_memset(q, 0, sizeof q);
812 }
813
814 static int
aesbear_probe(void)815 aesbear_probe(void)
816 {
817
818 if (aesbear_xts_update_selftest())
819 return -1;
820
821 /* XXX test br_aes_ct_bitslice_decrypt */
822 /* XXX test br_aes_ct_bitslice_encrypt */
823 /* XXX test br_aes_ct_keysched */
824 /* XXX test br_aes_ct_ortho */
825 /* XXX test br_aes_ct_skey_expand */
826
827 return 0;
828 }
829
830 struct aes_impl aes_bear_impl = {
831 .ai_name = "BearSSL aes_ct",
832 .ai_probe = aesbear_probe,
833 .ai_setenckey = aesbear_setenckey,
834 .ai_setdeckey = aesbear_setdeckey,
835 .ai_enc = aesbear_enc,
836 .ai_dec = aesbear_dec,
837 .ai_cbc_enc = aesbear_cbc_enc,
838 .ai_cbc_dec = aesbear_cbc_dec,
839 .ai_xts_enc = aesbear_xts_enc,
840 .ai_xts_dec = aesbear_xts_dec,
841 .ai_cbcmac_update1 = aesbear_cbcmac_update1,
842 .ai_ccm_enc1 = aesbear_ccm_enc1,
843 .ai_ccm_dec1 = aesbear_ccm_dec1,
844 };
845