xref: /netbsd-src/sys/crypto/aes/arch/x86/aes_via.c (revision e9e34bcb80f762949a5f571edd02123c33c41d4f)
1 /*	$NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $	*/
2 
3 /*-
4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $");
31 
32 #ifdef _KERNEL
33 #include <sys/types.h>
34 #include <sys/evcnt.h>
35 #include <sys/systm.h>
36 #else
37 #include <assert.h>
38 #include <err.h>
39 #include <stdint.h>
40 #include <string.h>
41 #define	KASSERT			assert
42 #define	panic(fmt, args...)	err(1, fmt, args)
43 struct evcnt { uint64_t ev_count; };
44 #define	EVCNT_INITIALIZER(a,b,c,d) {0}
45 #define	EVCNT_ATTACH_STATIC(name)	static char name##_attach __unused = 0
46 #endif
47 
48 #include <crypto/aes/aes.h>
49 #include <crypto/aes/aes_bear.h>
50 #include <crypto/aes/aes_impl.h>
51 
52 #ifdef _KERNEL
53 #include <x86/cpufunc.h>
54 #include <x86/cpuvar.h>
55 #include <x86/fpu.h>
56 #include <x86/specialreg.h>
57 #include <x86/via_padlock.h>
58 #else
59 #include <cpuid.h>
60 #define	fpu_kern_enter()	((void)0)
61 #define	fpu_kern_leave()	((void)0)
62 #define C3_CRYPT_CWLO_ROUND_M		0x0000000f
63 #define C3_CRYPT_CWLO_ALG_M		0x00000070
64 #define C3_CRYPT_CWLO_ALG_AES		0x00000000
65 #define C3_CRYPT_CWLO_KEYGEN_M		0x00000080
66 #define C3_CRYPT_CWLO_KEYGEN_HW		0x00000000
67 #define C3_CRYPT_CWLO_KEYGEN_SW		0x00000080
68 #define C3_CRYPT_CWLO_NORMAL		0x00000000
69 #define C3_CRYPT_CWLO_INTERMEDIATE	0x00000100
70 #define C3_CRYPT_CWLO_ENCRYPT		0x00000000
71 #define C3_CRYPT_CWLO_DECRYPT		0x00000200
72 #define C3_CRYPT_CWLO_KEY128		0x0000000a      /* 128bit, 10 rds */
73 #define C3_CRYPT_CWLO_KEY192		0x0000040c      /* 192bit, 12 rds */
74 #define C3_CRYPT_CWLO_KEY256		0x0000080e      /* 256bit, 15 rds */
75 #endif
76 
77 static void
aesvia_reload_keys(void)78 aesvia_reload_keys(void)
79 {
80 
81 	asm volatile("pushf; popf");
82 }
83 
84 static uint32_t
aesvia_keylen_cw0(unsigned nrounds)85 aesvia_keylen_cw0(unsigned nrounds)
86 {
87 
88 	/*
89 	 * Determine the control word bits for the key size / number of
90 	 * rounds.  For AES-128, the hardware can do key expansion on
91 	 * the fly; for AES-192 and AES-256, software must do it.
92 	 */
93 	switch (nrounds) {
94 	case AES_128_NROUNDS:
95 		return C3_CRYPT_CWLO_KEY128;
96 	case AES_192_NROUNDS:
97 		return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
98 	case AES_256_NROUNDS:
99 		return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
100 	default:
101 		panic("invalid AES nrounds: %u", nrounds);
102 	}
103 }
104 
105 static void
aesvia_setenckey(struct aesenc * enc,const uint8_t * key,uint32_t nrounds)106 aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
107 {
108 	size_t key_len;
109 
110 	switch (nrounds) {
111 	case AES_128_NROUNDS:
112 		enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
113 		enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
114 		enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
115 		enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
116 		return;
117 	case AES_192_NROUNDS:
118 		key_len = 24;
119 		break;
120 	case AES_256_NROUNDS:
121 		key_len = 32;
122 		break;
123 	default:
124 		panic("invalid AES nrounds: %u", nrounds);
125 	}
126 	br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
127 }
128 
129 static void
aesvia_setdeckey(struct aesdec * dec,const uint8_t * key,uint32_t nrounds)130 aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
131 {
132 	size_t key_len;
133 
134 	switch (nrounds) {
135 	case AES_128_NROUNDS:
136 		dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
137 		dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
138 		dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
139 		dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
140 		return;
141 	case AES_192_NROUNDS:
142 		key_len = 24;
143 		break;
144 	case AES_256_NROUNDS:
145 		key_len = 32;
146 		break;
147 	default:
148 		panic("invalid AES nrounds: %u", nrounds);
149 	}
150 	br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
151 }
152 
153 static inline void
aesvia_encN(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint32_t cw0)154 aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
155     uint8_t out[static 16], size_t nblocks, uint32_t cw0)
156 {
157 	const uint32_t cw[4] __aligned(16) = {
158 		[0] = (cw0
159 		    | C3_CRYPT_CWLO_ALG_AES
160 		    | C3_CRYPT_CWLO_ENCRYPT
161 		    | C3_CRYPT_CWLO_NORMAL),
162 	};
163 
164 	KASSERT(((uintptr_t)enc & 0xf) == 0);
165 	KASSERT(((uintptr_t)in & 0xf) == 0);
166 	KASSERT(((uintptr_t)out & 0xf) == 0);
167 
168 	asm volatile("rep xcryptecb"
169 	    : "+c"(nblocks), "+S"(in), "+D"(out)
170 	    : "b"(enc), "d"(cw)
171 	    : "memory", "cc");
172 }
173 
174 static inline void
aesvia_decN(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint32_t cw0)175 aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
176     uint8_t out[static 16], size_t nblocks, uint32_t cw0)
177 {
178 	const uint32_t cw[4] __aligned(16) = {
179 		[0] = (cw0
180 		    | C3_CRYPT_CWLO_ALG_AES
181 		    | C3_CRYPT_CWLO_DECRYPT
182 		    | C3_CRYPT_CWLO_NORMAL),
183 	};
184 
185 	KASSERT(((uintptr_t)dec & 0xf) == 0);
186 	KASSERT(((uintptr_t)in & 0xf) == 0);
187 	KASSERT(((uintptr_t)out & 0xf) == 0);
188 
189 	asm volatile("rep xcryptecb"
190 	    : "+c"(nblocks), "+S"(in), "+D"(out)
191 	    : "b"(dec), "d"(cw)
192 	    : "memory", "cc");
193 }
194 
195 static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
196     NULL, "aesvia", "enc aligned");
197 EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
198 static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
199     NULL, "aesvia", "dec unaligned");
200 EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
201 
202 static void
aesvia_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)203 aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
204     uint8_t out[static 16], uint32_t nrounds)
205 {
206 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
207 
208 	fpu_kern_enter();
209 	aesvia_reload_keys();
210 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
211 	    ((uintptr_t)in & 0xff0) != 0xff0) {
212 		enc_aligned_evcnt.ev_count++;
213 		aesvia_encN(enc, in, out, 1, cw0);
214 	} else {
215 		enc_unaligned_evcnt.ev_count++;
216 		/*
217 		 * VIA requires 16-byte/128-bit alignment, and
218 		 * xcrypt-ecb reads one block past the one we're
219 		 * working on -- which may go past the end of the page
220 		 * into unmapped territory.  Use a bounce buffer if
221 		 * either constraint is violated.
222 		 */
223 		uint8_t inbuf[16] __aligned(16);
224 		uint8_t outbuf[16] __aligned(16);
225 
226 		memcpy(inbuf, in, 16);
227 		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
228 		memcpy(out, outbuf, 16);
229 
230 		explicit_memset(inbuf, 0, sizeof inbuf);
231 		explicit_memset(outbuf, 0, sizeof outbuf);
232 	}
233 	fpu_kern_leave();
234 }
235 
236 static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
237     NULL, "aesvia", "dec aligned");
238 EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
239 static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
240     NULL, "aesvia", "dec unaligned");
241 EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
242 
243 static void
aesvia_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],uint32_t nrounds)244 aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
245     uint8_t out[static 16], uint32_t nrounds)
246 {
247 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
248 
249 	fpu_kern_enter();
250 	aesvia_reload_keys();
251 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
252 	    ((uintptr_t)in & 0xff0) != 0xff0) {
253 		dec_aligned_evcnt.ev_count++;
254 		aesvia_decN(dec, in, out, 1, cw0);
255 	} else {
256 		dec_unaligned_evcnt.ev_count++;
257 		/*
258 		 * VIA requires 16-byte/128-bit alignment, and
259 		 * xcrypt-ecb reads one block past the one we're
260 		 * working on -- which may go past the end of the page
261 		 * into unmapped territory.  Use a bounce buffer if
262 		 * either constraint is violated.
263 		 */
264 		uint8_t inbuf[16] __aligned(16);
265 		uint8_t outbuf[16] __aligned(16);
266 
267 		memcpy(inbuf, in, 16);
268 		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
269 		memcpy(out, outbuf, 16);
270 
271 		explicit_memset(inbuf, 0, sizeof inbuf);
272 		explicit_memset(outbuf, 0, sizeof outbuf);
273 	}
274 	fpu_kern_leave();
275 }
276 
277 static inline void
aesvia_cbc_encN(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint8_t ** ivp,uint32_t cw0)278 aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
279     uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
280 {
281 	const uint32_t cw[4] __aligned(16) = {
282 		[0] = (cw0
283 		    | C3_CRYPT_CWLO_ALG_AES
284 		    | C3_CRYPT_CWLO_ENCRYPT
285 		    | C3_CRYPT_CWLO_NORMAL),
286 	};
287 
288 	KASSERT(((uintptr_t)enc & 0xf) == 0);
289 	KASSERT(((uintptr_t)in & 0xf) == 0);
290 	KASSERT(((uintptr_t)out & 0xf) == 0);
291 	KASSERT(((uintptr_t)*ivp & 0xf) == 0);
292 
293 	/*
294 	 * Register effects:
295 	 * - Counts nblocks down to zero.
296 	 * - Advances in by nblocks (units of blocks).
297 	 * - Advances out by nblocks (units of blocks).
298 	 * - Updates *ivp to point at the last block of out.
299 	 */
300 	asm volatile("rep xcryptcbc"
301 	    : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
302 	    : "b"(enc), "d"(cw)
303 	    : "memory", "cc");
304 }
305 
306 static inline void
aesvia_cbc_decN(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nblocks,uint8_t iv[static16],uint32_t cw0)307 aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
308     uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
309     uint32_t cw0)
310 {
311 	const uint32_t cw[4] __aligned(16) = {
312 		[0] = (cw0
313 		    | C3_CRYPT_CWLO_ALG_AES
314 		    | C3_CRYPT_CWLO_DECRYPT
315 		    | C3_CRYPT_CWLO_NORMAL),
316 	};
317 
318 	KASSERT(((uintptr_t)dec & 0xf) == 0);
319 	KASSERT(((uintptr_t)in & 0xf) == 0);
320 	KASSERT(((uintptr_t)out & 0xf) == 0);
321 	KASSERT(((uintptr_t)iv & 0xf) == 0);
322 
323 	/*
324 	 * Register effects:
325 	 * - Counts nblocks down to zero.
326 	 * - Advances in by nblocks (units of blocks).
327 	 * - Advances out by nblocks (units of blocks).
328 	 * Memory side effects:
329 	 * - Writes what was the last block of in at the address iv.
330 	 */
331 	asm volatile("rep xcryptcbc"
332 	    : "+c"(nblocks), "+S"(in), "+D"(out)
333 	    : "a"(iv), "b"(dec), "d"(cw)
334 	    : "memory", "cc");
335 }
336 
337 static inline void
xor128(void * x,const void * a,const void * b)338 xor128(void *x, const void *a, const void *b)
339 {
340 	uint32_t *x32 = x;
341 	const uint32_t *a32 = a;
342 	const uint32_t *b32 = b;
343 
344 	x32[0] = a32[0] ^ b32[0];
345 	x32[1] = a32[1] ^ b32[1];
346 	x32[2] = a32[2] ^ b32[2];
347 	x32[3] = a32[3] ^ b32[3];
348 }
349 
350 static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
351     NULL, "aesvia", "cbcenc aligned");
352 EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
353 static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
354     NULL, "aesvia", "cbcenc unaligned");
355 EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
356 
357 static void
aesvia_cbc_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)358 aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
359     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
360     uint32_t nrounds)
361 {
362 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
363 
364 	KASSERT(nbytes % 16 == 0);
365 	if (nbytes == 0)
366 		return;
367 
368 	fpu_kern_enter();
369 	aesvia_reload_keys();
370 	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
371 		cbcenc_aligned_evcnt.ev_count++;
372 		uint8_t *ivp = iv;
373 		aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
374 		memcpy(iv, ivp, 16);
375 	} else {
376 		cbcenc_unaligned_evcnt.ev_count++;
377 		uint8_t cv[16] __aligned(16);
378 		uint8_t tmp[16] __aligned(16);
379 
380 		memcpy(cv, iv, 16);
381 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
382 			memcpy(tmp, in, 16);
383 			xor128(tmp, tmp, cv);
384 			aesvia_encN(enc, tmp, cv, 1, cw0);
385 			memcpy(out, cv, 16);
386 		}
387 		memcpy(iv, cv, 16);
388 	}
389 	fpu_kern_leave();
390 }
391 
392 static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
393     NULL, "aesvia", "cbcdec aligned");
394 EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
395 static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
396     NULL, "aesvia", "cbcdec unaligned");
397 EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
398 
399 static void
aesvia_cbc_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t iv[static16],uint32_t nrounds)400 aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
401     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
402     uint32_t nrounds)
403 {
404 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
405 
406 	KASSERT(nbytes % 16 == 0);
407 	if (nbytes == 0)
408 		return;
409 
410 	fpu_kern_enter();
411 	aesvia_reload_keys();
412 	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
413 		cbcdec_aligned_evcnt.ev_count++;
414 		aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
415 	} else {
416 		cbcdec_unaligned_evcnt.ev_count++;
417 		uint8_t iv0[16] __aligned(16);
418 		uint8_t cv[16] __aligned(16);
419 		uint8_t tmp[16] __aligned(16);
420 
421 		memcpy(iv0, iv, 16);
422 		memcpy(cv, in + nbytes - 16, 16);
423 		memcpy(iv, cv, 16);
424 
425 		for (;;) {
426 			aesvia_decN(dec, cv, tmp, 1, cw0);
427 			if ((nbytes -= 16) == 0)
428 				break;
429 			memcpy(cv, in + nbytes - 16, 16);
430 			xor128(tmp, tmp, cv);
431 			memcpy(out + nbytes, tmp, 16);
432 		}
433 
434 		xor128(tmp, tmp, iv0);
435 		memcpy(out, tmp, 16);
436 		explicit_memset(tmp, 0, sizeof tmp);
437 	}
438 	fpu_kern_leave();
439 }
440 
441 static inline void
aesvia_xts_update(uint32_t * t0,uint32_t * t1,uint32_t * t2,uint32_t * t3)442 aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
443 {
444 	uint32_t s0, s1, s2, s3;
445 
446 	s0 = *t0 >> 31;
447 	s1 = *t1 >> 31;
448 	s2 = *t2 >> 31;
449 	s3 = *t3 >> 31;
450 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
451 	*t1 = (*t1 << 1) ^ s0;
452 	*t2 = (*t2 << 1) ^ s1;
453 	*t3 = (*t3 << 1) ^ s2;
454 }
455 
456 static int
aesvia_xts_update_selftest(void)457 aesvia_xts_update_selftest(void)
458 {
459 	static const struct {
460 		uint32_t in[4], out[4];
461 	} cases[] = {
462 		{ {1}, {2} },
463 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
464 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
465 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
466 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
467 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
468 	};
469 	unsigned i;
470 	uint32_t t0, t1, t2, t3;
471 
472 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
473 		t0 = cases[i].in[0];
474 		t1 = cases[i].in[1];
475 		t2 = cases[i].in[2];
476 		t3 = cases[i].in[3];
477 		aesvia_xts_update(&t0, &t1, &t2, &t3);
478 		if (t0 != cases[i].out[0] ||
479 		    t1 != cases[i].out[1] ||
480 		    t2 != cases[i].out[2] ||
481 		    t3 != cases[i].out[3])
482 			return -1;
483 	}
484 
485 	/* Success!  */
486 	return 0;
487 }
488 
489 static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
490     NULL, "aesvia", "xtsenc aligned");
491 EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
492 static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
493     NULL, "aesvia", "xtsenc unaligned");
494 EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
495 
496 static void
aesvia_xts_enc(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)497 aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
498     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
499     uint32_t nrounds)
500 {
501 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
502 	uint32_t t[4];
503 
504 	KASSERT(nbytes % 16 == 0);
505 
506 	memcpy(t, tweak, 16);
507 
508 	fpu_kern_enter();
509 	aesvia_reload_keys();
510 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
511 		xtsenc_aligned_evcnt.ev_count++;
512 		unsigned lastblock = 0;
513 		uint32_t buf[8*4] __aligned(16);
514 
515 		/*
516 		 * Make sure the last block is not the last block of a
517 		 * page.  (Note that we store the AES input in `out' as
518 		 * a temporary buffer, rather than reading it directly
519 		 * from `in', since we have to combine the tweak
520 		 * first.)
521 		 */
522 		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
523 		nbytes -= lastblock;
524 
525 		/*
526 		 * Handle an odd number of initial blocks so we can
527 		 * process the rest in eight-block (128-byte) chunks.
528 		 */
529 		if (nbytes % 128) {
530 			unsigned nbytes128 = nbytes % 128;
531 
532 			nbytes -= nbytes128;
533 			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
534 			{
535 				xor128(out, in, t);
536 				aesvia_encN(enc, out, out, 1, cw0);
537 				xor128(out, out, t);
538 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
539 			}
540 		}
541 
542 		/* Process eight blocks at a time.  */
543 		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
544 			unsigned i;
545 			for (i = 0; i < 8; i++) {
546 				memcpy(buf + 4*i, t, 16);
547 				xor128(out + 4*i, in + 4*i, t);
548 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
549 			}
550 			aesvia_encN(enc, out, out, 8, cw0);
551 			for (i = 0; i < 8; i++)
552 				xor128(out + 4*i, in + 4*i, buf + 4*i);
553 		}
554 
555 		/* Handle the last block of a page, if necessary.  */
556 		if (lastblock) {
557 			xor128(buf, in, t);
558 			aesvia_encN(enc, (const void *)buf, out, 1, cw0);
559 		}
560 
561 		explicit_memset(buf, 0, sizeof buf);
562 	} else {
563 		xtsenc_unaligned_evcnt.ev_count++;
564 		uint8_t buf[16] __aligned(16);
565 
566 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
567 			memcpy(buf, in, 16);
568 			xor128(buf, buf, t);
569 			aesvia_encN(enc, buf, buf, 1, cw0);
570 			xor128(buf, buf, t);
571 			memcpy(out, buf, 16);
572 			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
573 		}
574 
575 		explicit_memset(buf, 0, sizeof buf);
576 	}
577 	fpu_kern_leave();
578 
579 	memcpy(tweak, t, 16);
580 	explicit_memset(t, 0, sizeof t);
581 }
582 
583 static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
584     NULL, "aesvia", "xtsdec aligned");
585 EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
586 static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
587     NULL, "aesvia", "xtsdec unaligned");
588 EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
589 
590 static void
aesvia_xts_dec(const struct aesdec * dec,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t tweak[static16],uint32_t nrounds)591 aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
592     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
593     uint32_t nrounds)
594 {
595 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
596 	uint32_t t[4];
597 
598 	KASSERT(nbytes % 16 == 0);
599 
600 	memcpy(t, tweak, 16);
601 
602 	fpu_kern_enter();
603 	aesvia_reload_keys();
604 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
605 		xtsdec_aligned_evcnt.ev_count++;
606 		unsigned lastblock = 0;
607 		uint32_t buf[8*4] __aligned(16);
608 
609 		/*
610 		 * Make sure the last block is not the last block of a
611 		 * page.  (Note that we store the AES input in `out' as
612 		 * a temporary buffer, rather than reading it directly
613 		 * from `in', since we have to combine the tweak
614 		 * first.)
615 		 */
616 		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
617 		nbytes -= lastblock;
618 
619 		/*
620 		 * Handle an odd number of initial blocks so we can
621 		 * process the rest in eight-block (128-byte) chunks.
622 		 */
623 		if (nbytes % 128) {
624 			unsigned nbytes128 = nbytes % 128;
625 
626 			nbytes -= nbytes128;
627 			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
628 			{
629 				xor128(out, in, t);
630 				aesvia_decN(dec, out, out, 1, cw0);
631 				xor128(out, out, t);
632 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
633 			}
634 		}
635 
636 		/* Process eight blocks at a time.  */
637 		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
638 			unsigned i;
639 			for (i = 0; i < 8; i++) {
640 				memcpy(buf + 4*i, t, 16);
641 				xor128(out + 4*i, in + 4*i, t);
642 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
643 			}
644 			aesvia_decN(dec, out, out, 8, cw0);
645 			for (i = 0; i < 8; i++)
646 				xor128(out + 4*i, in + 4*i, buf + 4*i);
647 		}
648 
649 		/* Handle the last block of a page, if necessary.  */
650 		if (lastblock) {
651 			xor128(buf, in, t);
652 			aesvia_decN(dec, (const void *)buf, out, 1, cw0);
653 		}
654 
655 		explicit_memset(buf, 0, sizeof buf);
656 	} else {
657 		xtsdec_unaligned_evcnt.ev_count++;
658 		uint8_t buf[16] __aligned(16);
659 
660 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
661 			memcpy(buf, in, 16);
662 			xor128(buf, buf, t);
663 			aesvia_decN(dec, buf, buf, 1, cw0);
664 			xor128(buf, buf, t);
665 			memcpy(out, buf, 16);
666 			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
667 		}
668 
669 		explicit_memset(buf, 0, sizeof buf);
670 	}
671 	fpu_kern_leave();
672 
673 	memcpy(tweak, t, 16);
674 	explicit_memset(t, 0, sizeof t);
675 }
676 
677 static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
678     NULL, "aesvia", "cbcmac aligned");
679 EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
680 static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
681     NULL, "aesvia", "cbcmac unaligned");
682 EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
683 
684 static void
aesvia_cbcmac_update1(const struct aesenc * enc,const uint8_t in[static16],size_t nbytes,uint8_t auth0[static16],uint32_t nrounds)685 aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
686     size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
687 {
688 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
689 	uint8_t authbuf[16] __aligned(16);
690 	uint8_t *auth = auth0;
691 
692 	KASSERT(nbytes);
693 	KASSERT(nbytes % 16 == 0);
694 
695 	if ((uintptr_t)auth0 & 0xf) {
696 		memcpy(authbuf, auth0, 16);
697 		auth = authbuf;
698 		cbcmac_unaligned_evcnt.ev_count++;
699 	} else {
700 		cbcmac_aligned_evcnt.ev_count++;
701 	}
702 
703 	fpu_kern_enter();
704 	aesvia_reload_keys();
705 	for (; nbytes; nbytes -= 16, in += 16) {
706 		xor128(auth, auth, in);
707 		aesvia_encN(enc, auth, auth, 1, cw0);
708 	}
709 	fpu_kern_leave();
710 
711 	if ((uintptr_t)auth0 & 0xf) {
712 		memcpy(auth0, authbuf, 16);
713 		explicit_memset(authbuf, 0, sizeof authbuf);
714 	}
715 }
716 
717 static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
718     NULL, "aesvia", "ccmenc aligned");
719 EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
720 static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
721     NULL, "aesvia", "ccmenc unaligned");
722 EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
723 
724 static void
aesvia_ccm_enc1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr0[static32],uint32_t nrounds)725 aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
726     uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
727     uint32_t nrounds)
728 {
729 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
730 	uint8_t authctrbuf[32] __aligned(16);
731 	uint8_t *authctr;
732 	uint32_t c0, c1, c2, c3;
733 
734 	KASSERT(nbytes);
735 	KASSERT(nbytes % 16 == 0);
736 
737 	if ((uintptr_t)authctr0 & 0xf) {
738 		memcpy(authctrbuf, authctr0, 16);
739 		authctr = authctrbuf;
740 		ccmenc_unaligned_evcnt.ev_count++;
741 	} else {
742 		authctr = authctr0;
743 		ccmenc_aligned_evcnt.ev_count++;
744 	}
745 	c0 = le32dec(authctr0 + 16 + 4*0);
746 	c1 = le32dec(authctr0 + 16 + 4*1);
747 	c2 = le32dec(authctr0 + 16 + 4*2);
748 	c3 = be32dec(authctr0 + 16 + 4*3);
749 
750 	/*
751 	 * In principle we could use REP XCRYPTCTR here, but that
752 	 * doesn't help to compute the CBC-MAC step, and certain VIA
753 	 * CPUs have some weird errata with REP XCRYPTCTR that make it
754 	 * kind of a pain to use.  So let's just use REP XCRYPTECB to
755 	 * simultaneously compute the CBC-MAC step and the CTR step.
756 	 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
757 	 * who knows...)
758 	 */
759 	fpu_kern_enter();
760 	aesvia_reload_keys();
761 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
762 		xor128(authctr, authctr, in);
763 		le32enc(authctr + 16 + 4*0, c0);
764 		le32enc(authctr + 16 + 4*1, c1);
765 		le32enc(authctr + 16 + 4*2, c2);
766 		be32enc(authctr + 16 + 4*3, ++c3);
767 		aesvia_encN(enc, authctr, authctr, 2, cw0);
768 		xor128(out, in, authctr + 16);
769 	}
770 	fpu_kern_leave();
771 
772 	if ((uintptr_t)authctr0 & 0xf) {
773 		memcpy(authctr0, authctrbuf, 16);
774 		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
775 	}
776 
777 	le32enc(authctr0 + 16 + 4*0, c0);
778 	le32enc(authctr0 + 16 + 4*1, c1);
779 	le32enc(authctr0 + 16 + 4*2, c2);
780 	be32enc(authctr0 + 16 + 4*3, c3);
781 }
782 
783 static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
784     NULL, "aesvia", "ccmdec aligned");
785 EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
786 static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
787     NULL, "aesvia", "ccmdec unaligned");
788 EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
789 
790 static void
aesvia_ccm_dec1(const struct aesenc * enc,const uint8_t in[static16],uint8_t out[static16],size_t nbytes,uint8_t authctr0[static32],uint32_t nrounds)791 aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
792     uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
793     uint32_t nrounds)
794 {
795 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
796 	uint8_t authctrbuf[32] __aligned(16);
797 	uint8_t *authctr;
798 	uint32_t c0, c1, c2, c3;
799 
800 	KASSERT(nbytes);
801 	KASSERT(nbytes % 16 == 0);
802 
803 	c0 = le32dec(authctr0 + 16 + 4*0);
804 	c1 = le32dec(authctr0 + 16 + 4*1);
805 	c2 = le32dec(authctr0 + 16 + 4*2);
806 	c3 = be32dec(authctr0 + 16 + 4*3);
807 
808 	if ((uintptr_t)authctr0 & 0xf) {
809 		memcpy(authctrbuf, authctr0, 16);
810 		authctr = authctrbuf;
811 		le32enc(authctr + 16 + 4*0, c0);
812 		le32enc(authctr + 16 + 4*1, c1);
813 		le32enc(authctr + 16 + 4*2, c2);
814 		ccmdec_unaligned_evcnt.ev_count++;
815 	} else {
816 		authctr = authctr0;
817 		ccmdec_aligned_evcnt.ev_count++;
818 	}
819 
820 	fpu_kern_enter();
821 	aesvia_reload_keys();
822 	be32enc(authctr + 16 + 4*3, ++c3);
823 	aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0);
824 	for (;; in += 16, out += 16) {
825 		xor128(out, authctr + 16, in);
826 		xor128(authctr, authctr, out);
827 		if ((nbytes -= 16) == 0)
828 			break;
829 		le32enc(authctr + 16 + 4*0, c0);
830 		le32enc(authctr + 16 + 4*1, c1);
831 		le32enc(authctr + 16 + 4*2, c2);
832 		be32enc(authctr + 16 + 4*3, ++c3);
833 		aesvia_encN(enc, authctr, authctr, 2, cw0);
834 	}
835 	aesvia_encN(enc, authctr, authctr, 1, cw0);
836 	fpu_kern_leave();
837 
838 	if ((uintptr_t)authctr0 & 0xf) {
839 		memcpy(authctr0, authctrbuf, 16);
840 		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
841 	}
842 
843 	le32enc(authctr0 + 16 + 4*0, c0);
844 	le32enc(authctr0 + 16 + 4*1, c1);
845 	le32enc(authctr0 + 16 + 4*2, c2);
846 	be32enc(authctr0 + 16 + 4*3, c3);
847 }
848 
849 static int
aesvia_probe(void)850 aesvia_probe(void)
851 {
852 
853 	/* Verify that the CPU advertises VIA ACE support.  */
854 #ifdef _KERNEL
855 	if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
856 		return -1;
857 #else
858 	/*
859 	 * From the VIA PadLock Programming Guide:
860 	 * https://web.archive.org/web/20220104214041/http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261
861 	 */
862 	unsigned eax, ebx, ecx, edx;
863 	if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx))
864 		return -1;
865 	if (ebx != signature_CENTAUR_ebx ||
866 	    ecx != signature_CENTAUR_ecx ||
867 	    edx != signature_CENTAUR_edx)
868 		return -1;
869 	if (eax < 0xc0000000)
870 		return -1;
871 	if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx))
872 		return -1;
873 	if (eax < 0xc0000001)
874 		return -1;
875 	if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx))
876 		return -1;
877 	/* Check whether ACE or ACE2 is both supported and enabled.  */
878 	if ((edx & 0x000000c0) != 0x000000c0 ||
879 	    (edx & 0x00000300) != 0x00000300)
880 		return -1;
881 #endif
882 
883 	/* Verify that our XTS tweak update logic works.  */
884 	if (aesvia_xts_update_selftest())
885 		return -1;
886 
887 	/* Success!  */
888 	return 0;
889 }
890 
891 struct aes_impl aes_via_impl = {
892 	.ai_name = "VIA ACE",
893 	.ai_probe = aesvia_probe,
894 	.ai_setenckey = aesvia_setenckey,
895 	.ai_setdeckey = aesvia_setdeckey,
896 	.ai_enc = aesvia_enc,
897 	.ai_dec = aesvia_dec,
898 	.ai_cbc_enc = aesvia_cbc_enc,
899 	.ai_cbc_dec = aesvia_cbc_dec,
900 	.ai_xts_enc = aesvia_xts_enc,
901 	.ai_xts_dec = aesvia_xts_dec,
902 	.ai_cbcmac_update1 = aesvia_cbcmac_update1,
903 	.ai_ccm_enc1 = aesvia_ccm_enc1,
904 	.ai_ccm_dec1 = aesvia_ccm_dec1,
905 };
906