xref: /netbsd-src/sys/arch/arm/vfp/vfp_init.c (revision 53b02e147d4ed531c0d2a5ca9b3e8026ba3e99b5)
1 /*      $NetBSD: vfp_init.c,v 1.76 2021/10/31 16:23:48 skrll Exp $ */
2 
3 /*
4  * Copyright (c) 2008 ARM Ltd
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. The name of the company may not be used to endorse or promote
16  *    products derived from this software without specific prior written
17  *    permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY
23  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "opt_cputypes.h"
33 
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: vfp_init.c,v 1.76 2021/10/31 16:23:48 skrll Exp $");
36 
37 #include <sys/param.h>
38 #include <sys/types.h>
39 #include <sys/systm.h>
40 #include <sys/device.h>
41 #include <sys/kthread.h>
42 #include <sys/proc.h>
43 #include <sys/cpu.h>
44 
45 #include <arm/locore.h>
46 #include <arm/pcb.h>
47 #include <arm/undefined.h>
48 #include <arm/vfpreg.h>
49 #include <arm/mcontext.h>
50 #include <arm/fpu.h>
51 
52 #include <uvm/uvm_extern.h>		/* for pmap.h */
53 
54 #include <crypto/aes/aes_impl.h>
55 #include <crypto/aes/arch/arm/aes_neon.h>
56 #include <crypto/chacha/arch/arm/chacha_neon.h>
57 #include <crypto/chacha/chacha_impl.h>
58 
59 #ifdef FPU_VFP
60 
61 #ifdef CPU_CORTEX
62 #define SETFPU __asm(".fpu\tvfpv4")
63 #else
64 #define SETFPU __asm(".fpu\tvfp")
65 #endif
66 SETFPU;
67 
68 /* FLDMD <X>, {d0-d15} */
69 static inline void
70 load_vfpregs_lo(const uint64_t *p)
71 {
72 	SETFPU;
73 	__asm __volatile(".fpu vfp\n vldmia\t%0, {d0-d15}" :: "r" (p) : "memory");
74 }
75 
76 /* FSTMD <X>, {d0-d15} */
77 static inline void
78 save_vfpregs_lo(uint64_t *p)
79 {
80 	SETFPU;
81 	__asm __volatile(".fpu vfp\n vstmia\t%0, {d0-d15}" :: "r" (p) : "memory");
82 }
83 
84 #ifdef CPU_CORTEX
85 /* FLDMD <X>, {d16-d31} */
86 static inline void
87 load_vfpregs_hi(const uint64_t *p)
88 {
89 	SETFPU;
90 	__asm __volatile(".fpu neon-vfpv4\n vldmia\t%0, {d16-d31}" :: "r" (&p[16]) : "memory");
91 }
92 
93 /* FLDMD <X>, {d16-d31} */
94 static inline void
95 save_vfpregs_hi(uint64_t *p)
96 {
97 	SETFPU;
98 	__asm __volatile(".fpu neon-vfpv4\nvstmia\t%0, {d16-d31}" :: "r" (&p[16]) : "memory");
99 }
100 #endif
101 
102 static inline void
103 load_vfpregs(const struct vfpreg *fregs)
104 {
105 	load_vfpregs_lo(fregs->vfp_regs);
106 #ifdef CPU_CORTEX
107 #ifdef CPU_ARM11
108 	switch (curcpu()->ci_vfp_id) {
109 	case FPU_VFP_CORTEXA5:
110 	case FPU_VFP_CORTEXA7:
111 	case FPU_VFP_CORTEXA8:
112 	case FPU_VFP_CORTEXA9:
113 	case FPU_VFP_CORTEXA15:
114 	case FPU_VFP_CORTEXA15_QEMU:
115 	case FPU_VFP_CORTEXA53:
116 	case FPU_VFP_CORTEXA57:
117 #endif
118 		load_vfpregs_hi(fregs->vfp_regs);
119 #ifdef CPU_ARM11
120 		break;
121 	}
122 #endif
123 #endif
124 }
125 
126 static inline void
127 save_vfpregs(struct vfpreg *fregs)
128 {
129 	save_vfpregs_lo(fregs->vfp_regs);
130 #ifdef CPU_CORTEX
131 #ifdef CPU_ARM11
132 	switch (curcpu()->ci_vfp_id) {
133 	case FPU_VFP_CORTEXA5:
134 	case FPU_VFP_CORTEXA7:
135 	case FPU_VFP_CORTEXA8:
136 	case FPU_VFP_CORTEXA9:
137 	case FPU_VFP_CORTEXA15:
138 	case FPU_VFP_CORTEXA15_QEMU:
139 	case FPU_VFP_CORTEXA53:
140 	case FPU_VFP_CORTEXA57:
141 #endif
142 		save_vfpregs_hi(fregs->vfp_regs);
143 #ifdef CPU_ARM11
144 		break;
145 	}
146 #endif
147 #endif
148 }
149 
150 /* The real handler for VFP bounces.  */
151 static int vfp_handler(u_int, u_int, trapframe_t *, int);
152 #ifdef CPU_CORTEX
153 static int neon_handler(u_int, u_int, trapframe_t *, int);
154 #endif
155 
156 static void vfp_state_load(lwp_t *, u_int);
157 static void vfp_state_save(lwp_t *);
158 static void vfp_state_release(lwp_t *);
159 
160 const pcu_ops_t arm_vfp_ops = {
161 	.pcu_id = PCU_FPU,
162 	.pcu_state_save = vfp_state_save,
163 	.pcu_state_load = vfp_state_load,
164 	.pcu_state_release = vfp_state_release,
165 };
166 
167 /* determine what bits can be changed */
168 uint32_t vfp_fpscr_changable = VFP_FPSCR_CSUM;
169 /* default to run fast */
170 uint32_t vfp_fpscr_default = (VFP_FPSCR_DN | VFP_FPSCR_FZ | VFP_FPSCR_RN);
171 
172 #else
173 /* determine what bits can be changed */
174 uint32_t vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM|VFP_FPSCR_RMODE;
175 #endif /* FPU_VFP */
176 
177 static int
178 vfp_fpscr_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
179 {
180 	struct lwp * const l = curlwp;
181 	const u_int regno = (insn >> 12) & 0xf;
182 	/*
183 	 * Only match move to/from the FPSCR register and we
184 	 * can't be using the SP,LR,PC as a source.
185 	 */
186 	if ((insn & 0xffef0fff) != 0xeee10a10 || regno > 12)
187 		return 1;
188 
189 	struct pcb * const pcb = lwp_getpcb(l);
190 
191 #ifdef FPU_VFP
192 	/*
193 	 * If FPU is valid somewhere, let's just reenable VFP and
194 	 * retry the instruction (only safe thing to do since the
195 	 * pcb has a stale copy).
196 	 */
197 	if (pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN)
198 		return 1;
199 
200 	if (__predict_false(!vfp_used_p(l))) {
201 		pcb->pcb_vfp.vfp_fpscr = vfp_fpscr_default;
202 	}
203 #endif
204 
205 	/*
206 	 * We now know the pcb has the saved copy.
207 	 */
208 	register_t * const regp = &frame->tf_r0 + regno;
209 	if (insn & 0x00100000) {
210 		*regp = pcb->pcb_vfp.vfp_fpscr;
211 	} else {
212 		pcb->pcb_vfp.vfp_fpscr &= ~vfp_fpscr_changable;
213 		pcb->pcb_vfp.vfp_fpscr |= *regp & vfp_fpscr_changable;
214 	}
215 
216 	curcpu()->ci_vfp_evs[0].ev_count++;
217 
218 	frame->tf_pc += INSN_SIZE;
219 	return 0;
220 }
221 
222 #ifndef FPU_VFP
223 void
224 vfp_detect(struct cpu_info *ci)
225 {
226 	ci->ci_vfp_id = 0;
227 	return;
228 }
229 /*
230  * If we don't want VFP support, we still need to handle emulating VFP FPSCR
231  * instructions.
232  */
233 void
234 vfp_attach(struct cpu_info *ci)
235 {
236 	if (CPU_IS_PRIMARY(ci)) {
237 		replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
238 	}
239 	evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_TRAP, NULL,
240 	    ci->ci_cpuname, "vfp fpscr traps");
241 }
242 
243 #else
244 void
245 vfp_detect(struct cpu_info *ci)
246 {
247 
248 	if (CPU_ID_ARM11_P(ci->ci_arm_cpuid)
249 	    || CPU_ID_MV88SV58XX_P(ci->ci_arm_cpuid)
250 	    || CPU_ID_CORTEX_P(ci->ci_arm_cpuid)) {
251 #if 0
252 		const uint32_t nsacr = armreg_nsacr_read();
253 		const uint32_t nsacr_vfp = __BITS(VFP_COPROC,VFP_COPROC2);
254 		if ((nsacr & nsacr_vfp) != nsacr_vfp) {
255 			ci->ci_fp_id = 0;
256 			return;
257 		}
258 #endif
259 		const uint32_t cpacr_vfp = CPACR_CPn(VFP_COPROC);
260 		const uint32_t cpacr_vfp2 = CPACR_CPn(VFP_COPROC2);
261 
262 		/*
263 		 * We first need to enable access to the coprocessors.
264 		 */
265 		uint32_t cpacr = armreg_cpacr_read();
266 		cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp);
267 		cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp2);
268 		armreg_cpacr_write(cpacr);
269 
270 		isb();
271 
272 		/*
273 		 * If we could enable them, then they exist.
274 		 */
275 		cpacr = armreg_cpacr_read();
276 		bool vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) == CPACR_ALL
277 		    && __SHIFTOUT(cpacr, cpacr_vfp) == CPACR_ALL;
278 		if (!vfp_p) {
279 			ci->ci_vfp_id = 0;
280 			return;
281 		}
282 	}
283 
284 	/* borrow the ci_vfd_id field for VFP detection */
285 	ci->ci_vfp_id = -1;
286 
287 	const uint32_t fpsid = armreg_fpsid_read();
288 	if (ci->ci_vfp_id == 0) {
289 		return;
290 	}
291 
292 	ci->ci_vfp_id = fpsid;
293 
294 	ci->ci_mvfr[0] = armreg_mvfr0_read();
295 	ci->ci_mvfr[1] = armreg_mvfr1_read();
296 
297 }
298 
299 void
300 vfp_attach(struct cpu_info *ci)
301 {
302 	const char *model = NULL;
303 
304 	switch (ci->ci_vfp_id & ~ VFP_FPSID_REV_MSK) {
305 	case FPU_VFP10_ARM10E:
306 		model = "VFP10 R1";
307 		break;
308 	case FPU_VFP11_ARM11:
309 		model = "VFP11";
310 		break;
311 	case FPU_VFP_MV88SV58XX:
312 		model = "VFP3";
313 		break;
314 	case FPU_VFP_CORTEXA5:
315 	case FPU_VFP_CORTEXA7:
316 	case FPU_VFP_CORTEXA8:
317 	case FPU_VFP_CORTEXA9:
318 	case FPU_VFP_CORTEXA12:
319 	case FPU_VFP_CORTEXA15:
320 	case FPU_VFP_CORTEXA15_QEMU:
321 	case FPU_VFP_CORTEXA17:
322 	case FPU_VFP_CORTEXA53:
323 	case FPU_VFP_CORTEXA57:
324 		if (armreg_cpacr_read() & CPACR_V7_ASEDIS) {
325 			model = "VFP 4.0+";
326 		} else {
327 			model = "NEON MPE (VFP 3.0+)";
328 			cpu_neon_present = 1;
329 		}
330 		break;
331 	default:
332 		aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %#x\n",
333 		    ci->ci_vfp_id);
334 		if (CPU_IS_PRIMARY(ci))
335 			replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
336 		vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM
337 		    |VFP_FPSCR_RMODE;
338 		vfp_fpscr_default = 0;
339 		return;
340 	}
341 
342 	cpu_fpu_present = 1;
343 
344 	const uint32_t f0 = ci->ci_mvfr[0];
345 	const uint32_t f1 = ci->ci_mvfr[1];
346 	aprint_normal("vfp%d at %s: %s%s%s%s%s\n",
347 	    device_unit(ci->ci_dev),
348 	    device_xname(ci->ci_dev),
349 	    model,
350 	    ((f0 & ARM_MVFR0_ROUNDING_MASK) ? ", rounding" : ""),
351 	    ((f0 & ARM_MVFR0_EXCEPT_MASK) ? ", exceptions" : ""),
352 	    ((f1 & ARM_MVFR1_D_NAN_MASK) ? ", NaN propagation" : ""),
353 	    ((f1 & ARM_MVFR1_FTZ_MASK) ? ", denormals" : ""));
354 
355 	aprint_debug("vfp%d: mvfr: [0]=%#x [1]=%#x\n",
356 	    device_unit(ci->ci_dev), f0, f1);
357 
358 	if (CPU_IS_PRIMARY(ci)) {
359 		cpu_media_and_vfp_features[0] = f0;
360 		cpu_media_and_vfp_features[1] = f1;
361 
362 		if (f0 & ARM_MVFR0_ROUNDING_MASK) {
363 			vfp_fpscr_changable |= VFP_FPSCR_RMODE;
364 		}
365 		if (f1 & ARM_MVFR0_EXCEPT_MASK) {
366 			vfp_fpscr_changable |= VFP_FPSCR_ESUM;
367 		}
368 		// If hardware supports propagation of NaNs, select it.
369 		if (f1 & ARM_MVFR1_D_NAN_MASK) {
370 			vfp_fpscr_default &= ~VFP_FPSCR_DN;
371 			vfp_fpscr_changable |= VFP_FPSCR_DN;
372 		}
373 		// If hardware supports denormalized numbers, use it.
374 		if (f1 & ARM_MVFR1_FTZ_MASK) {
375 			vfp_fpscr_default &= ~VFP_FPSCR_FZ;
376 			vfp_fpscr_changable |= VFP_FPSCR_FZ;
377 		}
378 
379 		replace_coproc_handler(VFP_COPROC, vfp_handler);
380 		install_coproc_handler(VFP_COPROC2, vfp_handler);
381 #ifdef CPU_CORTEX
382 		if (cpu_neon_present) {
383 			install_coproc_handler(CORE_UNKNOWN_HANDLER,
384 			    neon_handler);
385 			aes_md_init(&aes_neon_impl);
386 			chacha_md_init(&chacha_neon_impl);
387 		}
388 #endif
389 	}
390 
391 	evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_MISC, NULL,
392 	    ci->ci_cpuname, "vfp coproc use");
393 	evcnt_attach_dynamic(&ci->ci_vfp_evs[1], EVCNT_TYPE_MISC, NULL,
394 	    ci->ci_cpuname, "vfp coproc re-use");
395 	evcnt_attach_dynamic(&ci->ci_vfp_evs[2], EVCNT_TYPE_TRAP, NULL,
396 	    ci->ci_cpuname, "vfp coproc fault");
397 }
398 
399 /* The real handler for VFP bounces.  */
400 static int
401 vfp_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
402 {
403 	struct cpu_info * const ci = curcpu();
404 	uint32_t fpexc;
405 
406 	/* This shouldn't ever happen.  */
407 	if (fault_code != FAULT_USER &&
408 	    (curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == LW_SYSTEM)
409 		panic("VFP fault at %#x in non-user mode", frame->tf_pc);
410 
411 	if (ci->ci_vfp_id == 0) {
412 		/* No VFP detected, just fault.  */
413 		return 1;
414 	}
415 
416 	/*
417 	 * If we already own the FPU and it's enabled (and no exception), raise
418 	 * SIGILL.  If there is an exception, raise SIGFPE.
419 	 */
420 	if (curlwp->l_pcu_cpu[PCU_FPU] == ci) {
421 		KASSERT(ci->ci_pcu_curlwp[PCU_FPU] == curlwp);
422 
423 		fpexc = armreg_fpexc_read();
424 		if (fpexc & VFP_FPEXC_EN) {
425 			if ((fpexc & VFP_FPEXC_EX) == 0) {
426 				return 1;	/* SIGILL */
427 			} else {
428 				goto fpe;	/* SIGFPE; skip pcu_load(9) */
429 			}
430 		}
431 	}
432 
433 	/*
434 	 * Make sure we own the FP.
435 	 */
436 	pcu_load(&arm_vfp_ops);
437 
438 	fpexc = armreg_fpexc_read();
439 	if (fpexc & VFP_FPEXC_EX) {
440 		ksiginfo_t ksi;
441 		KASSERT(fpexc & VFP_FPEXC_EN);
442 
443 fpe:
444 		curcpu()->ci_vfp_evs[2].ev_count++;
445 
446 		/*
447 		 * Need the clear the exception condition so any signal
448 		 * and future use can proceed.
449 		 */
450 		armreg_fpexc_write(fpexc & ~(VFP_FPEXC_EX|VFP_FPEXC_FSUM));
451 
452 		pcu_save(&arm_vfp_ops, curlwp);
453 
454 		/*
455 		 * XXX Need to emulate bounce instructions here to get correct
456 		 * XXX exception codes, etc.
457 		 */
458 		KSI_INIT_TRAP(&ksi);
459 		ksi.ksi_signo = SIGFPE;
460 		if (fpexc & VFP_FPEXC_IXF)
461 			ksi.ksi_code = FPE_FLTRES;
462 		else if (fpexc & VFP_FPEXC_UFF)
463 			ksi.ksi_code = FPE_FLTUND;
464 		else if (fpexc & VFP_FPEXC_OFF)
465 			ksi.ksi_code = FPE_FLTOVF;
466 		else if (fpexc & VFP_FPEXC_DZF)
467 			ksi.ksi_code = FPE_FLTDIV;
468 		else if (fpexc & VFP_FPEXC_IOF)
469 			ksi.ksi_code = FPE_FLTINV;
470 		ksi.ksi_addr = (uint32_t *)address;
471 		ksi.ksi_trap = 0;
472 		trapsignal(curlwp, &ksi);
473 		return 0;
474 	}
475 
476 	/* Need to restart the faulted instruction.  */
477 //	frame->tf_pc -= INSN_SIZE;
478 	return 0;
479 }
480 
481 #ifdef CPU_CORTEX
482 /* The real handler for NEON bounces.  */
483 static int
484 neon_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
485 {
486 	struct cpu_info * const ci = curcpu();
487 
488 	if (ci->ci_vfp_id == 0)
489 		/* No VFP detected, just fault.  */
490 		return 1;
491 
492 	if ((insn & 0xfe000000) != 0xf2000000
493 	    && (insn & 0xfe000000) != 0xf4000000)
494 		/* Not NEON instruction, just fault.  */
495 		return 1;
496 
497 	/* This shouldn't ever happen.  */
498 	if (fault_code != FAULT_USER &&
499 	    (curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == LW_SYSTEM)
500 		panic("NEON fault in non-user mode");
501 
502 	/* if we already own the FPU and it's enabled, raise SIGILL */
503 	if (curcpu()->ci_pcu_curlwp[PCU_FPU] == curlwp
504 	    && (armreg_fpexc_read() & VFP_FPEXC_EN) != 0)
505 		return 1;
506 
507 	pcu_load(&arm_vfp_ops);
508 
509 	/* Need to restart the faulted instruction.  */
510 //	frame->tf_pc -= INSN_SIZE;
511 	return 0;
512 }
513 #endif
514 
515 static void
516 vfp_state_load(lwp_t *l, u_int flags)
517 {
518 	struct pcb * const pcb = lwp_getpcb(l);
519 	struct vfpreg * const fregs = &pcb->pcb_vfp;
520 
521 	/*
522 	 * Instrument VFP usage -- if a process has not previously
523 	 * used the VFP, mark it as having used VFP for the first time,
524 	 * and count this event.
525 	 *
526 	 * If a process has used the VFP, count a "used VFP, and took
527 	 * a trap to use it again" event.
528 	 */
529 	if (__predict_false((flags & PCU_VALID) == 0)) {
530 		curcpu()->ci_vfp_evs[0].ev_count++;
531 		pcb->pcb_vfp.vfp_fpscr = vfp_fpscr_default;
532 	} else {
533 		curcpu()->ci_vfp_evs[1].ev_count++;
534 	}
535 
536 	KASSERT((armreg_fpexc_read() & VFP_FPEXC_EN) == 0);
537 	/*
538 	 * If the VFP is already enabled we must be bouncing an instruction.
539 	 */
540 	if (flags & PCU_REENABLE) {
541 		uint32_t fpexc = armreg_fpexc_read();
542 		armreg_fpexc_write(fpexc | VFP_FPEXC_EN);
543 		fregs->vfp_fpexc |= VFP_FPEXC_EN;
544 		return;
545 	}
546 	KASSERT((fregs->vfp_fpexc & VFP_FPEXC_EN) == 0);
547 
548 	/*
549 	 * Load and Enable the VFP (so that we can write the registers).
550 	 */
551 	fregs->vfp_fpexc |= VFP_FPEXC_EN;
552 	armreg_fpexc_write(fregs->vfp_fpexc);
553 	KASSERT(curcpu()->ci_pcu_curlwp[PCU_FPU] == NULL);
554 	KASSERT(l->l_pcu_cpu[PCU_FPU] == NULL);
555 
556 	load_vfpregs(fregs);
557 	armreg_fpscr_write(fregs->vfp_fpscr);
558 
559 	if (fregs->vfp_fpexc & VFP_FPEXC_EX) {
560 		/* Need to restore the exception handling state.  */
561 		armreg_fpinst_write(fregs->vfp_fpinst);
562 		if (fregs->vfp_fpexc & VFP_FPEXC_FP2V)
563 			armreg_fpinst2_write(fregs->vfp_fpinst2);
564 	}
565 }
566 
567 void
568 vfp_state_save(lwp_t *l)
569 {
570 	struct pcb * const pcb = lwp_getpcb(l);
571 	struct vfpreg * const fregs = &pcb->pcb_vfp;
572 	uint32_t fpexc = armreg_fpexc_read();
573 
574 	KASSERT(curcpu()->ci_pcu_curlwp[PCU_FPU] == l);
575 	KASSERT(curcpu() == l->l_pcu_cpu[PCU_FPU]);
576 	KASSERT(curlwp == l || curlwp->l_pcu_cpu[PCU_FPU] != curcpu());
577 	/*
578 	 * Enable the VFP (so we can read the registers).
579 	 * Make sure the exception bit is cleared so that we can
580 	 * safely dump the registers.
581 	 */
582 	armreg_fpexc_write((fpexc | VFP_FPEXC_EN) & ~VFP_FPEXC_EX);
583 
584 	fregs->vfp_fpexc = fpexc;
585 	if (fpexc & VFP_FPEXC_EX) {
586 		/* Need to save the exception handling state */
587 		fregs->vfp_fpinst = armreg_fpinst_read();
588 		if (fpexc & VFP_FPEXC_FP2V)
589 			fregs->vfp_fpinst2 = armreg_fpinst2_read();
590 	}
591 	fregs->vfp_fpscr = armreg_fpscr_read();
592 	save_vfpregs(fregs);
593 
594 	/* Disable the VFP.  */
595 	armreg_fpexc_write(fpexc & ~VFP_FPEXC_EN);
596 }
597 
598 void
599 vfp_state_release(lwp_t *l)
600 {
601 	struct pcb * const pcb = lwp_getpcb(l);
602 
603 	/*
604 	 * Now mark the VFP as disabled (and our state
605 	 * has been already saved or is being discarded).
606 	 */
607 	pcb->pcb_vfp.vfp_fpexc &= ~VFP_FPEXC_EN;
608 
609 	/*
610 	 * Turn off the FPU so the next time a VFP instruction is issued
611 	 * an exception happens.  We don't know if this LWP's state was
612 	 * loaded but if we turned off the FPU for some other LWP, when
613 	 * pcu_load invokes vfp_state_load it will see that VFP_FPEXC_EN
614 	 * is still set so it just restore fpexc and return since its
615 	 * contents are still sitting in the VFP.
616 	 */
617 	armreg_fpexc_write(armreg_fpexc_read() & ~VFP_FPEXC_EN);
618 }
619 
620 void
621 vfp_savecontext(lwp_t *l)
622 {
623 	pcu_save(&arm_vfp_ops, l);
624 }
625 
626 void
627 vfp_discardcontext(lwp_t *l, bool used_p)
628 {
629 	pcu_discard(&arm_vfp_ops, l, used_p);
630 }
631 
632 bool
633 vfp_used_p(const lwp_t *l)
634 {
635 	return pcu_valid_p(&arm_vfp_ops, l);
636 }
637 
638 void
639 vfp_getcontext(struct lwp *l, mcontext_t *mcp, int *flagsp)
640 {
641 	if (vfp_used_p(l)) {
642 		const struct pcb * const pcb = lwp_getpcb(l);
643 
644 		pcu_save(&arm_vfp_ops, l);
645 		mcp->__fpu.__vfpregs.__vfp_fpscr = pcb->pcb_vfp.vfp_fpscr;
646 		memcpy(mcp->__fpu.__vfpregs.__vfp_fstmx, pcb->pcb_vfp.vfp_regs,
647 		    sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
648 		*flagsp |= _UC_FPU|_UC_ARM_VFP;
649 	}
650 }
651 
652 void
653 vfp_setcontext(struct lwp *l, const mcontext_t *mcp)
654 {
655 	struct pcb * const pcb = lwp_getpcb(l);
656 
657 	pcu_discard(&arm_vfp_ops, l, true);
658 	pcb->pcb_vfp.vfp_fpscr = mcp->__fpu.__vfpregs.__vfp_fpscr;
659 	memcpy(pcb->pcb_vfp.vfp_regs, mcp->__fpu.__vfpregs.__vfp_fstmx,
660 	    sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
661 }
662 
663 /*
664  * True if this is a system thread with its own private FPU state.
665  */
666 static inline bool
667 lwp_system_fpu_p(struct lwp *l)
668 {
669 
670 	return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
671 	    (LW_SYSTEM|LW_SYSTEM_FPU);
672 }
673 
674 static const struct vfpreg zero_vfpreg;
675 
676 void
677 fpu_kern_enter(void)
678 {
679 	struct cpu_info *ci;
680 	uint32_t fpexc;
681 	int s;
682 
683 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
684 		KASSERT(!cpu_softintr_p());
685 		return;
686 	}
687 
688 	/*
689 	 * Block interrupts up to IPL_VM.  We must block preemption
690 	 * since -- if this is a user thread -- there is nowhere to
691 	 * save the kernel fpu state, and if we want this to be usable
692 	 * in interrupts, we can't let interrupts interfere with the
693 	 * fpu state in use since there's nowhere for them to save it.
694 	 */
695 	s = splvm();
696 	ci = curcpu();
697 	KASSERTMSG(ci->ci_cpl <= IPL_VM, "cpl=%d", ci->ci_cpl);
698 	KASSERT(ci->ci_kfpu_spl == -1);
699 	ci->ci_kfpu_spl = s;
700 
701 	/* Save any fpu state on the current CPU.  */
702 	pcu_save_all_on_cpu();
703 
704 	/* Enable the fpu.  */
705 	fpexc = armreg_fpexc_read();
706 	fpexc |= VFP_FPEXC_EN;
707 	fpexc &= ~VFP_FPEXC_EX;
708 	armreg_fpexc_write(fpexc);
709 }
710 
711 void
712 fpu_kern_leave(void)
713 {
714 	struct cpu_info *ci = curcpu();
715 	int s;
716 	uint32_t fpexc;
717 
718 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
719 		KASSERT(!cpu_softintr_p());
720 		return;
721 	}
722 
723 	KASSERT(ci->ci_cpl == IPL_VM);
724 	KASSERT(ci->ci_kfpu_spl != -1);
725 
726 	/*
727 	 * Zero the fpu registers; otherwise we might leak secrets
728 	 * through Spectre-class attacks to userland, even if there are
729 	 * no bugs in fpu state management.
730 	 */
731 	load_vfpregs(&zero_vfpreg);
732 
733 	/*
734 	 * Disable the fpu so that the kernel can't accidentally use
735 	 * it again.
736 	 */
737 	fpexc = armreg_fpexc_read();
738 	fpexc &= ~VFP_FPEXC_EN;
739 	armreg_fpexc_write(fpexc);
740 
741 	/* Restore interrupts.  */
742 	s = ci->ci_kfpu_spl;
743 	ci->ci_kfpu_spl = -1;
744 	splx(s);
745 }
746 
747 void
748 kthread_fpu_enter_md(void)
749 {
750 
751 	pcu_load(&arm_vfp_ops);
752 }
753 
754 void
755 kthread_fpu_exit_md(void)
756 {
757 
758 	/* XXX Should vfp_state_release zero the registers itself?  */
759 	load_vfpregs(&zero_vfpreg);
760 	vfp_discardcontext(curlwp, 0);
761 }
762 
763 #endif /* FPU_VFP */
764