1 /* $NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $");
31
32 #ifdef _KERNEL
33 #include <sys/types.h>
34 #include <sys/evcnt.h>
35 #include <sys/systm.h>
36 #else
37 #include <assert.h>
38 #include <err.h>
39 #include <stdint.h>
40 #include <string.h>
41 #define KASSERT assert
42 #define panic(fmt, args...) err(1, fmt, args)
43 struct evcnt { uint64_t ev_count; };
44 #define EVCNT_INITIALIZER(a,b,c,d) {0}
45 #define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0
46 #endif
47
48 #include <crypto/aes/aes.h>
49 #include <crypto/aes/aes_bear.h>
50 #include <crypto/aes/aes_impl.h>
51
52 #ifdef _KERNEL
53 #include <x86/cpufunc.h>
54 #include <x86/cpuvar.h>
55 #include <x86/fpu.h>
56 #include <x86/specialreg.h>
57 #include <x86/via_padlock.h>
58 #else
59 #include <cpuid.h>
60 #define fpu_kern_enter() ((void)0)
61 #define fpu_kern_leave() ((void)0)
62 #define C3_CRYPT_CWLO_ROUND_M 0x0000000f
63 #define C3_CRYPT_CWLO_ALG_M 0x00000070
64 #define C3_CRYPT_CWLO_ALG_AES 0x00000000
65 #define C3_CRYPT_CWLO_KEYGEN_M 0x00000080
66 #define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000
67 #define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080
68 #define C3_CRYPT_CWLO_NORMAL 0x00000000
69 #define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100
70 #define C3_CRYPT_CWLO_ENCRYPT 0x00000000
71 #define C3_CRYPT_CWLO_DECRYPT 0x00000200
72 #define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */
73 #define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */
74 #define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */
75 #endif
76
77 static void
aesvia_reload_keys(void)78 aesvia_reload_keys(void)
79 {
80
81 asm volatile("pushf; popf");
82 }
83
84 static uint32_t
aesvia_keylen_cw0(unsigned nrounds)85 aesvia_keylen_cw0(unsigned nrounds)
86 {
87
88 /*
89 * Determine the control word bits for the key size / number of
90 * rounds. For AES-128, the hardware can do key expansion on
91 * the fly; for AES-192 and AES-256, software must do it.
92 */
93 switch (nrounds) {
94 case AES_128_NROUNDS:
95 return C3_CRYPT_CWLO_KEY128;
96 case AES_192_NROUNDS:
97 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
98 case AES_256_NROUNDS:
99 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
100 default:
101 panic("invalid AES nrounds: %u", nrounds);
102 }
103 }
104
105 static void
aesvia_setenckey(struct aesenc * enc,const uint8_t * key,uint32_t nrounds)106 aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
107 {
108 size_t key_len;
109
110 switch (nrounds) {
111 case AES_128_NROUNDS:
112 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
113 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
114 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
115 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
116 return;
117 case AES_192_NROUNDS:
118 key_len = 24;
119 break;
120 case AES_256_NROUNDS:
121 key_len = 32;
122 break;
123 default:
124 panic("invalid AES nrounds: %u", nrounds);
125 }
126 br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
127 }
128
129 static void
aesvia_setdeckey(struct aesdec * dec,const uint8_t * key,uint32_t nrounds)130 aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
131 {
132 size_t key_len;
133
134 switch (nrounds) {
135 case AES_128_NROUNDS:
136 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
137 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
138 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
139 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
140 return;
141 case AES_192_NROUNDS:
142 key_len = 24;
143 break;
144 case AES_256_NROUNDS:
145 key_len = 32;
146 break;
147 default:
148 panic("invalid AES nrounds: %u", nrounds);
149 }
150 br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
151 }
152
153 static inline void
aesvia_encN(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint32_t cw0)154 aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
155 uint8_t out[static 16], size_t nblocks, uint32_t cw0)
156 {
157 const uint32_t cw[4] __aligned(16) = {
158 [0] = (cw0
159 | C3_CRYPT_CWLO_ALG_AES
160 | C3_CRYPT_CWLO_ENCRYPT
161 | C3_CRYPT_CWLO_NORMAL),
162 };
163
164 KASSERT(((uintptr_t)enc & 0xf) == 0);
165 KASSERT(((uintptr_t)in & 0xf) == 0);
166 KASSERT(((uintptr_t)out & 0xf) == 0);
167
168 asm volatile("rep xcryptecb"
169 : "+c"(nblocks), "+S"(in), "+D"(out)
170 : "b"(enc), "d"(cw)
171 : "memory", "cc");
172 }
173
174 static inline void
aesvia_decN(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint32_t cw0)175 aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
176 uint8_t out[static 16], size_t nblocks, uint32_t cw0)
177 {
178 const uint32_t cw[4] __aligned(16) = {
179 [0] = (cw0
180 | C3_CRYPT_CWLO_ALG_AES
181 | C3_CRYPT_CWLO_DECRYPT
182 | C3_CRYPT_CWLO_NORMAL),
183 };
184
185 KASSERT(((uintptr_t)dec & 0xf) == 0);
186 KASSERT(((uintptr_t)in & 0xf) == 0);
187 KASSERT(((uintptr_t)out & 0xf) == 0);
188
189 asm volatile("rep xcryptecb"
190 : "+c"(nblocks), "+S"(in), "+D"(out)
191 : "b"(dec), "d"(cw)
192 : "memory", "cc");
193 }
194
195 static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
196 NULL, "aesvia", "enc aligned");
197 EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
198 static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
199 NULL, "aesvia", "dec unaligned");
200 EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
201
202 static void
aesvia_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)203 aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
204 uint8_t out[static 16], uint32_t nrounds)
205 {
206 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
207
208 fpu_kern_enter();
209 aesvia_reload_keys();
210 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
211 ((uintptr_t)in & 0xff0) != 0xff0) {
212 enc_aligned_evcnt.ev_count++;
213 aesvia_encN(enc, in, out, 1, cw0);
214 } else {
215 enc_unaligned_evcnt.ev_count++;
216 /*
217 * VIA requires 16-byte/128-bit alignment, and
218 * xcrypt-ecb reads one block past the one we're
219 * working on -- which may go past the end of the page
220 * into unmapped territory. Use a bounce buffer if
221 * either constraint is violated.
222 */
223 uint8_t inbuf[16] __aligned(16);
224 uint8_t outbuf[16] __aligned(16);
225
226 memcpy(inbuf, in, 16);
227 aesvia_encN(enc, inbuf, outbuf, 1, cw0);
228 memcpy(out, outbuf, 16);
229
230 explicit_memset(inbuf, 0, sizeof inbuf);
231 explicit_memset(outbuf, 0, sizeof outbuf);
232 }
233 fpu_kern_leave();
234 }
235
236 static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
237 NULL, "aesvia", "dec aligned");
238 EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
239 static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
240 NULL, "aesvia", "dec unaligned");
241 EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
242
243 static void
aesvia_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)244 aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
245 uint8_t out[static 16], uint32_t nrounds)
246 {
247 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
248
249 fpu_kern_enter();
250 aesvia_reload_keys();
251 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
252 ((uintptr_t)in & 0xff0) != 0xff0) {
253 dec_aligned_evcnt.ev_count++;
254 aesvia_decN(dec, in, out, 1, cw0);
255 } else {
256 dec_unaligned_evcnt.ev_count++;
257 /*
258 * VIA requires 16-byte/128-bit alignment, and
259 * xcrypt-ecb reads one block past the one we're
260 * working on -- which may go past the end of the page
261 * into unmapped territory. Use a bounce buffer if
262 * either constraint is violated.
263 */
264 uint8_t inbuf[16] __aligned(16);
265 uint8_t outbuf[16] __aligned(16);
266
267 memcpy(inbuf, in, 16);
268 aesvia_decN(dec, inbuf, outbuf, 1, cw0);
269 memcpy(out, outbuf, 16);
270
271 explicit_memset(inbuf, 0, sizeof inbuf);
272 explicit_memset(outbuf, 0, sizeof outbuf);
273 }
274 fpu_kern_leave();
275 }
276
277 static inline void
aesvia_cbc_encN(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint8_t ** ivp,uint32_t cw0)278 aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
279 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
280 {
281 const uint32_t cw[4] __aligned(16) = {
282 [0] = (cw0
283 | C3_CRYPT_CWLO_ALG_AES
284 | C3_CRYPT_CWLO_ENCRYPT
285 | C3_CRYPT_CWLO_NORMAL),
286 };
287
288 KASSERT(((uintptr_t)enc & 0xf) == 0);
289 KASSERT(((uintptr_t)in & 0xf) == 0);
290 KASSERT(((uintptr_t)out & 0xf) == 0);
291 KASSERT(((uintptr_t)*ivp & 0xf) == 0);
292
293 /*
294 * Register effects:
295 * - Counts nblocks down to zero.
296 * - Advances in by nblocks (units of blocks).
297 * - Advances out by nblocks (units of blocks).
298 * - Updates *ivp to point at the last block of out.
299 */
300 asm volatile("rep xcryptcbc"
301 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
302 : "b"(enc), "d"(cw)
303 : "memory", "cc");
304 }
305
306 static inline void
aesvia_cbc_decN(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint8_t iv[static16],uint32_t cw0)307 aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
308 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
309 uint32_t cw0)
310 {
311 const uint32_t cw[4] __aligned(16) = {
312 [0] = (cw0
313 | C3_CRYPT_CWLO_ALG_AES
314 | C3_CRYPT_CWLO_DECRYPT
315 | C3_CRYPT_CWLO_NORMAL),
316 };
317
318 KASSERT(((uintptr_t)dec & 0xf) == 0);
319 KASSERT(((uintptr_t)in & 0xf) == 0);
320 KASSERT(((uintptr_t)out & 0xf) == 0);
321 KASSERT(((uintptr_t)iv & 0xf) == 0);
322
323 /*
324 * Register effects:
325 * - Counts nblocks down to zero.
326 * - Advances in by nblocks (units of blocks).
327 * - Advances out by nblocks (units of blocks).
328 * Memory side effects:
329 * - Writes what was the last block of in at the address iv.
330 */
331 asm volatile("rep xcryptcbc"
332 : "+c"(nblocks), "+S"(in), "+D"(out)
333 : "a"(iv), "b"(dec), "d"(cw)
334 : "memory", "cc");
335 }
336
337 static inline void
xor128(void * x,const void * a,const void * b)338 xor128(void *x, const void *a, const void *b)
339 {
340 uint32_t *x32 = x;
341 const uint32_t *a32 = a;
342 const uint32_t *b32 = b;
343
344 x32[0] = a32[0] ^ b32[0];
345 x32[1] = a32[1] ^ b32[1];
346 x32[2] = a32[2] ^ b32[2];
347 x32[3] = a32[3] ^ b32[3];
348 }
349
350 static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
351 NULL, "aesvia", "cbcenc aligned");
352 EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
353 static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
354 NULL, "aesvia", "cbcenc unaligned");
355 EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
356
357 static void
aesvia_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)358 aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
359 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
360 uint32_t nrounds)
361 {
362 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
363
364 KASSERT(nbytes % 16 == 0);
365 if (nbytes == 0)
366 return;
367
368 fpu_kern_enter();
369 aesvia_reload_keys();
370 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
371 cbcenc_aligned_evcnt.ev_count++;
372 uint8_t *ivp = iv;
373 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
374 memcpy(iv, ivp, 16);
375 } else {
376 cbcenc_unaligned_evcnt.ev_count++;
377 uint8_t cv[16] __aligned(16);
378 uint8_t tmp[16] __aligned(16);
379
380 memcpy(cv, iv, 16);
381 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
382 memcpy(tmp, in, 16);
383 xor128(tmp, tmp, cv);
384 aesvia_encN(enc, tmp, cv, 1, cw0);
385 memcpy(out, cv, 16);
386 }
387 memcpy(iv, cv, 16);
388 }
389 fpu_kern_leave();
390 }
391
392 static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
393 NULL, "aesvia", "cbcdec aligned");
394 EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
395 static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
396 NULL, "aesvia", "cbcdec unaligned");
397 EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
398
399 static void
aesvia_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)400 aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
401 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
402 uint32_t nrounds)
403 {
404 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
405
406 KASSERT(nbytes % 16 == 0);
407 if (nbytes == 0)
408 return;
409
410 fpu_kern_enter();
411 aesvia_reload_keys();
412 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
413 cbcdec_aligned_evcnt.ev_count++;
414 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
415 } else {
416 cbcdec_unaligned_evcnt.ev_count++;
417 uint8_t iv0[16] __aligned(16);
418 uint8_t cv[16] __aligned(16);
419 uint8_t tmp[16] __aligned(16);
420
421 memcpy(iv0, iv, 16);
422 memcpy(cv, in + nbytes - 16, 16);
423 memcpy(iv, cv, 16);
424
425 for (;;) {
426 aesvia_decN(dec, cv, tmp, 1, cw0);
427 if ((nbytes -= 16) == 0)
428 break;
429 memcpy(cv, in + nbytes - 16, 16);
430 xor128(tmp, tmp, cv);
431 memcpy(out + nbytes, tmp, 16);
432 }
433
434 xor128(tmp, tmp, iv0);
435 memcpy(out, tmp, 16);
436 explicit_memset(tmp, 0, sizeof tmp);
437 }
438 fpu_kern_leave();
439 }
440
441 static inline void
aesvia_xts_update(uint32_t * t0,uint32_t * t1,uint32_t * t2,uint32_t * t3)442 aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
443 {
444 uint32_t s0, s1, s2, s3;
445
446 s0 = *t0 >> 31;
447 s1 = *t1 >> 31;
448 s2 = *t2 >> 31;
449 s3 = *t3 >> 31;
450 *t0 = (*t0 << 1) ^ (-s3 & 0x87);
451 *t1 = (*t1 << 1) ^ s0;
452 *t2 = (*t2 << 1) ^ s1;
453 *t3 = (*t3 << 1) ^ s2;
454 }
455
456 static int
aesvia_xts_update_selftest(void)457 aesvia_xts_update_selftest(void)
458 {
459 static const struct {
460 uint32_t in[4], out[4];
461 } cases[] = {
462 { {1}, {2} },
463 { {0x80000000U,0,0,0}, {0,1,0,0} },
464 { {0,0x80000000U,0,0}, {0,0,1,0} },
465 { {0,0,0x80000000U,0}, {0,0,0,1} },
466 { {0,0,0,0x80000000U}, {0x87,0,0,0} },
467 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
468 };
469 unsigned i;
470 uint32_t t0, t1, t2, t3;
471
472 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
473 t0 = cases[i].in[0];
474 t1 = cases[i].in[1];
475 t2 = cases[i].in[2];
476 t3 = cases[i].in[3];
477 aesvia_xts_update(&t0, &t1, &t2, &t3);
478 if (t0 != cases[i].out[0] ||
479 t1 != cases[i].out[1] ||
480 t2 != cases[i].out[2] ||
481 t3 != cases[i].out[3])
482 return -1;
483 }
484
485 /* Success! */
486 return 0;
487 }
488
489 static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
490 NULL, "aesvia", "xtsenc aligned");
491 EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
492 static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
493 NULL, "aesvia", "xtsenc unaligned");
494 EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
495
496 static void
aesvia_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)497 aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
498 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
499 uint32_t nrounds)
500 {
501 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
502 uint32_t t[4];
503
504 KASSERT(nbytes % 16 == 0);
505
506 memcpy(t, tweak, 16);
507
508 fpu_kern_enter();
509 aesvia_reload_keys();
510 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
511 xtsenc_aligned_evcnt.ev_count++;
512 unsigned lastblock = 0;
513 uint32_t buf[8*4] __aligned(16);
514
515 /*
516 * Make sure the last block is not the last block of a
517 * page. (Note that we store the AES input in `out' as
518 * a temporary buffer, rather than reading it directly
519 * from `in', since we have to combine the tweak
520 * first.)
521 */
522 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
523 nbytes -= lastblock;
524
525 /*
526 * Handle an odd number of initial blocks so we can
527 * process the rest in eight-block (128-byte) chunks.
528 */
529 if (nbytes % 128) {
530 unsigned nbytes128 = nbytes % 128;
531
532 nbytes -= nbytes128;
533 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
534 {
535 xor128(out, in, t);
536 aesvia_encN(enc, out, out, 1, cw0);
537 xor128(out, out, t);
538 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
539 }
540 }
541
542 /* Process eight blocks at a time. */
543 for (; nbytes; nbytes -= 128, in += 128, out += 128) {
544 unsigned i;
545 for (i = 0; i < 8; i++) {
546 memcpy(buf + 4*i, t, 16);
547 xor128(out + 4*i, in + 4*i, t);
548 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
549 }
550 aesvia_encN(enc, out, out, 8, cw0);
551 for (i = 0; i < 8; i++)
552 xor128(out + 4*i, in + 4*i, buf + 4*i);
553 }
554
555 /* Handle the last block of a page, if necessary. */
556 if (lastblock) {
557 xor128(buf, in, t);
558 aesvia_encN(enc, (const void *)buf, out, 1, cw0);
559 }
560
561 explicit_memset(buf, 0, sizeof buf);
562 } else {
563 xtsenc_unaligned_evcnt.ev_count++;
564 uint8_t buf[16] __aligned(16);
565
566 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
567 memcpy(buf, in, 16);
568 xor128(buf, buf, t);
569 aesvia_encN(enc, buf, buf, 1, cw0);
570 xor128(buf, buf, t);
571 memcpy(out, buf, 16);
572 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
573 }
574
575 explicit_memset(buf, 0, sizeof buf);
576 }
577 fpu_kern_leave();
578
579 memcpy(tweak, t, 16);
580 explicit_memset(t, 0, sizeof t);
581 }
582
583 static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
584 NULL, "aesvia", "xtsdec aligned");
585 EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
586 static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
587 NULL, "aesvia", "xtsdec unaligned");
588 EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
589
590 static void
aesvia_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)591 aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
592 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
593 uint32_t nrounds)
594 {
595 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
596 uint32_t t[4];
597
598 KASSERT(nbytes % 16 == 0);
599
600 memcpy(t, tweak, 16);
601
602 fpu_kern_enter();
603 aesvia_reload_keys();
604 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
605 xtsdec_aligned_evcnt.ev_count++;
606 unsigned lastblock = 0;
607 uint32_t buf[8*4] __aligned(16);
608
609 /*
610 * Make sure the last block is not the last block of a
611 * page. (Note that we store the AES input in `out' as
612 * a temporary buffer, rather than reading it directly
613 * from `in', since we have to combine the tweak
614 * first.)
615 */
616 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
617 nbytes -= lastblock;
618
619 /*
620 * Handle an odd number of initial blocks so we can
621 * process the rest in eight-block (128-byte) chunks.
622 */
623 if (nbytes % 128) {
624 unsigned nbytes128 = nbytes % 128;
625
626 nbytes -= nbytes128;
627 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
628 {
629 xor128(out, in, t);
630 aesvia_decN(dec, out, out, 1, cw0);
631 xor128(out, out, t);
632 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
633 }
634 }
635
636 /* Process eight blocks at a time. */
637 for (; nbytes; nbytes -= 128, in += 128, out += 128) {
638 unsigned i;
639 for (i = 0; i < 8; i++) {
640 memcpy(buf + 4*i, t, 16);
641 xor128(out + 4*i, in + 4*i, t);
642 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
643 }
644 aesvia_decN(dec, out, out, 8, cw0);
645 for (i = 0; i < 8; i++)
646 xor128(out + 4*i, in + 4*i, buf + 4*i);
647 }
648
649 /* Handle the last block of a page, if necessary. */
650 if (lastblock) {
651 xor128(buf, in, t);
652 aesvia_decN(dec, (const void *)buf, out, 1, cw0);
653 }
654
655 explicit_memset(buf, 0, sizeof buf);
656 } else {
657 xtsdec_unaligned_evcnt.ev_count++;
658 uint8_t buf[16] __aligned(16);
659
660 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
661 memcpy(buf, in, 16);
662 xor128(buf, buf, t);
663 aesvia_decN(dec, buf, buf, 1, cw0);
664 xor128(buf, buf, t);
665 memcpy(out, buf, 16);
666 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
667 }
668
669 explicit_memset(buf, 0, sizeof buf);
670 }
671 fpu_kern_leave();
672
673 memcpy(tweak, t, 16);
674 explicit_memset(t, 0, sizeof t);
675 }
676
677 static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
678 NULL, "aesvia", "cbcmac aligned");
679 EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
680 static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
681 NULL, "aesvia", "cbcmac unaligned");
682 EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
683
684 static void
aesvia_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth0[static16],uint32_t nrounds)685 aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
686 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
687 {
688 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
689 uint8_t authbuf[16] __aligned(16);
690 uint8_t *auth = auth0;
691
692 KASSERT(nbytes);
693 KASSERT(nbytes % 16 == 0);
694
695 if ((uintptr_t)auth0 & 0xf) {
696 memcpy(authbuf, auth0, 16);
697 auth = authbuf;
698 cbcmac_unaligned_evcnt.ev_count++;
699 } else {
700 cbcmac_aligned_evcnt.ev_count++;
701 }
702
703 fpu_kern_enter();
704 aesvia_reload_keys();
705 for (; nbytes; nbytes -= 16, in += 16) {
706 xor128(auth, auth, in);
707 aesvia_encN(enc, auth, auth, 1, cw0);
708 }
709 fpu_kern_leave();
710
711 if ((uintptr_t)auth0 & 0xf) {
712 memcpy(auth0, authbuf, 16);
713 explicit_memset(authbuf, 0, sizeof authbuf);
714 }
715 }
716
717 static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
718 NULL, "aesvia", "ccmenc aligned");
719 EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
720 static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
721 NULL, "aesvia", "ccmenc unaligned");
722 EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
723
724 static void
aesvia_ccm_enc1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr0[static32],uint32_t nrounds)725 aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
726 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
727 uint32_t nrounds)
728 {
729 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
730 uint8_t authctrbuf[32] __aligned(16);
731 uint8_t *authctr;
732 uint32_t c0, c1, c2, c3;
733
734 KASSERT(nbytes);
735 KASSERT(nbytes % 16 == 0);
736
737 if ((uintptr_t)authctr0 & 0xf) {
738 memcpy(authctrbuf, authctr0, 16);
739 authctr = authctrbuf;
740 ccmenc_unaligned_evcnt.ev_count++;
741 } else {
742 authctr = authctr0;
743 ccmenc_aligned_evcnt.ev_count++;
744 }
745 c0 = le32dec(authctr0 + 16 + 4*0);
746 c1 = le32dec(authctr0 + 16 + 4*1);
747 c2 = le32dec(authctr0 + 16 + 4*2);
748 c3 = be32dec(authctr0 + 16 + 4*3);
749
750 /*
751 * In principle we could use REP XCRYPTCTR here, but that
752 * doesn't help to compute the CBC-MAC step, and certain VIA
753 * CPUs have some weird errata with REP XCRYPTCTR that make it
754 * kind of a pain to use. So let's just use REP XCRYPTECB to
755 * simultaneously compute the CBC-MAC step and the CTR step.
756 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
757 * who knows...)
758 */
759 fpu_kern_enter();
760 aesvia_reload_keys();
761 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
762 xor128(authctr, authctr, in);
763 le32enc(authctr + 16 + 4*0, c0);
764 le32enc(authctr + 16 + 4*1, c1);
765 le32enc(authctr + 16 + 4*2, c2);
766 be32enc(authctr + 16 + 4*3, ++c3);
767 aesvia_encN(enc, authctr, authctr, 2, cw0);
768 xor128(out, in, authctr + 16);
769 }
770 fpu_kern_leave();
771
772 if ((uintptr_t)authctr0 & 0xf) {
773 memcpy(authctr0, authctrbuf, 16);
774 explicit_memset(authctrbuf, 0, sizeof authctrbuf);
775 }
776
777 le32enc(authctr0 + 16 + 4*0, c0);
778 le32enc(authctr0 + 16 + 4*1, c1);
779 le32enc(authctr0 + 16 + 4*2, c2);
780 be32enc(authctr0 + 16 + 4*3, c3);
781 }
782
783 static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
784 NULL, "aesvia", "ccmdec aligned");
785 EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
786 static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
787 NULL, "aesvia", "ccmdec unaligned");
788 EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
789
790 static void
aesvia_ccm_dec1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr0[static32],uint32_t nrounds)791 aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
792 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
793 uint32_t nrounds)
794 {
795 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
796 uint8_t authctrbuf[32] __aligned(16);
797 uint8_t *authctr;
798 uint32_t c0, c1, c2, c3;
799
800 KASSERT(nbytes);
801 KASSERT(nbytes % 16 == 0);
802
803 c0 = le32dec(authctr0 + 16 + 4*0);
804 c1 = le32dec(authctr0 + 16 + 4*1);
805 c2 = le32dec(authctr0 + 16 + 4*2);
806 c3 = be32dec(authctr0 + 16 + 4*3);
807
808 if ((uintptr_t)authctr0 & 0xf) {
809 memcpy(authctrbuf, authctr0, 16);
810 authctr = authctrbuf;
811 le32enc(authctr + 16 + 4*0, c0);
812 le32enc(authctr + 16 + 4*1, c1);
813 le32enc(authctr + 16 + 4*2, c2);
814 ccmdec_unaligned_evcnt.ev_count++;
815 } else {
816 authctr = authctr0;
817 ccmdec_aligned_evcnt.ev_count++;
818 }
819
820 fpu_kern_enter();
821 aesvia_reload_keys();
822 be32enc(authctr + 16 + 4*3, ++c3);
823 aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0);
824 for (;; in += 16, out += 16) {
825 xor128(out, authctr + 16, in);
826 xor128(authctr, authctr, out);
827 if ((nbytes -= 16) == 0)
828 break;
829 le32enc(authctr + 16 + 4*0, c0);
830 le32enc(authctr + 16 + 4*1, c1);
831 le32enc(authctr + 16 + 4*2, c2);
832 be32enc(authctr + 16 + 4*3, ++c3);
833 aesvia_encN(enc, authctr, authctr, 2, cw0);
834 }
835 aesvia_encN(enc, authctr, authctr, 1, cw0);
836 fpu_kern_leave();
837
838 if ((uintptr_t)authctr0 & 0xf) {
839 memcpy(authctr0, authctrbuf, 16);
840 explicit_memset(authctrbuf, 0, sizeof authctrbuf);
841 }
842
843 le32enc(authctr0 + 16 + 4*0, c0);
844 le32enc(authctr0 + 16 + 4*1, c1);
845 le32enc(authctr0 + 16 + 4*2, c2);
846 be32enc(authctr0 + 16 + 4*3, c3);
847 }
848
849 static int
aesvia_probe(void)850 aesvia_probe(void)
851 {
852
853 /* Verify that the CPU advertises VIA ACE support. */
854 #ifdef _KERNEL
855 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
856 return -1;
857 #else
858 /*
859 * From the VIA PadLock Programming Guide:
860 * https://web.archive.org/web/20220104214041/http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261
861 */
862 unsigned eax, ebx, ecx, edx;
863 if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx))
864 return -1;
865 if (ebx != signature_CENTAUR_ebx ||
866 ecx != signature_CENTAUR_ecx ||
867 edx != signature_CENTAUR_edx)
868 return -1;
869 if (eax < 0xc0000000)
870 return -1;
871 if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx))
872 return -1;
873 if (eax < 0xc0000001)
874 return -1;
875 if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx))
876 return -1;
877 /* Check whether ACE or ACE2 is both supported and enabled. */
878 if ((edx & 0x000000c0) != 0x000000c0 ||
879 (edx & 0x00000300) != 0x00000300)
880 return -1;
881 #endif
882
883 /* Verify that our XTS tweak update logic works. */
884 if (aesvia_xts_update_selftest())
885 return -1;
886
887 /* Success! */
888 return 0;
889 }
890
891 struct aes_impl aes_via_impl = {
892 .ai_name = "VIA ACE",
893 .ai_probe = aesvia_probe,
894 .ai_setenckey = aesvia_setenckey,
895 .ai_setdeckey = aesvia_setdeckey,
896 .ai_enc = aesvia_enc,
897 .ai_dec = aesvia_dec,
898 .ai_cbc_enc = aesvia_cbc_enc,
899 .ai_cbc_dec = aesvia_cbc_dec,
900 .ai_xts_enc = aesvia_xts_enc,
901 .ai_xts_dec = aesvia_xts_dec,
902 .ai_cbcmac_update1 = aesvia_cbcmac_update1,
903 .ai_ccm_enc1 = aesvia_ccm_enc1,
904 .ai_ccm_dec1 = aesvia_ccm_dec1,
905 };
906