xref: /netbsd-src/sys/arch/aarch64/aarch64/fpu.c (revision 040f3e2bc695bf105f2ec65860ff68212c7f8e71)
1 /* $NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $ */
2 
3 /*-
4  * Copyright (c) 2014 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Matt Thomas of 3am Software Foundry.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 
34 __KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $");
35 
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/cpu.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/lwp.h>
42 #include <sys/evcnt.h>
43 
44 #include <arm/cpufunc.h>
45 #include <arm/fpu.h>
46 #include <arm/cpufunc.h>
47 
48 #include <aarch64/locore.h>
49 #include <aarch64/reg.h>
50 #include <aarch64/pcb.h>
51 #include <aarch64/armreg.h>
52 #include <aarch64/machdep.h>
53 
54 static void fpu_state_load(lwp_t *, unsigned int);
55 static void fpu_state_save(lwp_t *);
56 static void fpu_state_release(lwp_t *);
57 
58 const pcu_ops_t pcu_fpu_ops = {
59 	.pcu_id = PCU_FPU,
60 	.pcu_state_load = fpu_state_load,
61 	.pcu_state_save = fpu_state_save,
62 	.pcu_state_release = fpu_state_release
63 };
64 
65 void
fpu_attach(struct cpu_info * ci)66 fpu_attach(struct cpu_info *ci)
67 {
68 	evcnt_attach_dynamic(&ci->ci_vfp_use, EVCNT_TYPE_MISC, NULL,
69 	    ci->ci_cpuname, "vfp use");
70 	evcnt_attach_dynamic(&ci->ci_vfp_reuse, EVCNT_TYPE_MISC, NULL,
71 	    ci->ci_cpuname, "vfp reuse");
72 	evcnt_attach_dynamic(&ci->ci_vfp_save, EVCNT_TYPE_MISC, NULL,
73 	    ci->ci_cpuname, "vfp save");
74 	evcnt_attach_dynamic(&ci->ci_vfp_release, EVCNT_TYPE_MISC, NULL,
75 	    ci->ci_cpuname, "vfp release");
76 }
77 
78 static void
fpu_state_load(lwp_t * l,unsigned int flags)79 fpu_state_load(lwp_t *l, unsigned int flags)
80 {
81 	struct pcb * const pcb = lwp_getpcb(l);
82 
83 	KASSERT(l == curlwp);
84 
85 	if (__predict_false((flags & PCU_VALID) == 0)) {
86 		uint64_t mvfr1 = reg_mvfr1_el1_read();
87 		bool fp16 = false;
88 		uint32_t fpcr = 0;
89 
90 		/*
91 		 * Determine whether ARMv8.2-FP16 binary16
92 		 * floating-point arithmetic is supported.
93 		 */
94 		switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) {
95 		case MVFR1_FPHP_HALF_ARITH:
96 			fp16 = true;
97 			break;
98 		}
99 
100 		/* Rounding mode: round to nearest, ties to even.  */
101 		fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE);
102 
103 		/* NaN propagation or default NaN.   */
104 		switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) {
105 		case MVFR1_FPDNAN_NAN:
106 			/*
107 			 * IEEE 754 NaN propagation supported.  Don't
108 			 * enable default NaN mode.
109 			 */
110 			break;
111 		default:
112 			/*
113 			 * IEEE 754 NaN propagation not supported, so
114 			 * enable default NaN mode.
115 			 */
116 			fpcr |= FPCR_DN;
117 		}
118 
119 		/* Subnormal arithmetic or flush-to-zero.  */
120 		switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) {
121 		case MVFR1_FPFTZ_DENORMAL:
122 			/*
123 			 * IEEE 754 subnormal arithmetic supported.
124 			 * Don't enable flush-to-zero mode.
125 			 */
126 			break;
127 		default:
128 			/*
129 			 * IEEE 754 subnormal arithmetic not supported,
130 			 * so enable flush-to-zero mode.  If FP16 is
131 			 * supported, also enable flush-to-zero for
132 			 * binary16 arithmetic.
133 			 */
134 			fpcr |= FPCR_FZ;
135 			if (fp16)
136 				fpcr |= FPCR_FZ16;
137 		}
138 
139 		/* initialize fpregs */
140 		memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs));
141 		pcb->pcb_fpregs.fpcr = fpcr;
142 
143 		curcpu()->ci_vfp_use.ev_count++;
144 	} else {
145 		curcpu()->ci_vfp_reuse.ev_count++;
146 	}
147 
148 	/* allow user process to use FP */
149 	l->l_md.md_cpacr = CPACR_FPEN_ALL;
150 	reg_cpacr_el1_write(CPACR_FPEN_ALL);
151 	isb();
152 
153 	if ((flags & PCU_REENABLE) == 0)
154 		load_fpregs(&pcb->pcb_fpregs);
155 }
156 
157 static void
fpu_state_save(lwp_t * l)158 fpu_state_save(lwp_t *l)
159 {
160 	struct pcb * const pcb = lwp_getpcb(l);
161 
162 	curcpu()->ci_vfp_save.ev_count++;
163 
164 	reg_cpacr_el1_write(CPACR_FPEN_EL1);	/* fpreg access enable */
165 	isb();
166 
167 	save_fpregs(&pcb->pcb_fpregs);
168 
169 	reg_cpacr_el1_write(CPACR_FPEN_NONE);	/* fpreg access disable */
170 	isb();
171 }
172 
173 static void
fpu_state_release(lwp_t * l)174 fpu_state_release(lwp_t *l)
175 {
176 	curcpu()->ci_vfp_release.ev_count++;
177 
178 	/* disallow user process to use FP */
179 	l->l_md.md_cpacr = CPACR_FPEN_NONE;
180 	reg_cpacr_el1_write(CPACR_FPEN_NONE);
181 	isb();
182 }
183 
184 static const struct fpreg zero_fpreg;
185 
186 /*
187  * True if this is a system thread with its own private FPU state.
188  */
189 static inline bool
lwp_system_fpu_p(struct lwp * l)190 lwp_system_fpu_p(struct lwp *l)
191 {
192 
193 	return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
194 	    (LW_SYSTEM|LW_SYSTEM_FPU);
195 }
196 
197 void
fpu_kern_enter(void)198 fpu_kern_enter(void)
199 {
200 	struct cpu_info *ci;
201 	int s;
202 
203 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
204 		KASSERT(!cpu_softintr_p());
205 		return;
206 	}
207 
208 	/*
209 	 * Block interrupts up to IPL_VM.  We must block preemption
210 	 * since -- if this is a user thread -- there is nowhere to
211 	 * save the kernel fpu state, and if we want this to be usable
212 	 * in interrupts, we can't let interrupts interfere with the
213 	 * fpu state in use since there's nowhere for them to save it.
214 	 */
215 	s = splvm();
216 	ci = curcpu();
217 #if 0
218 	/*
219 	 * Can't assert this because if the caller holds a spin lock at
220 	 * IPL_VM, and previously held and released a spin lock at
221 	 * higher IPL, the IPL remains raised above IPL_VM.
222 	 */
223 	KASSERTMSG(ci->ci_cpl <= IPL_VM || cold, "cpl=%d", ci->ci_cpl);
224 #endif
225 	KASSERT(ci->ci_kfpu_spl == -1);
226 	ci->ci_kfpu_spl = s;
227 
228 	/* Save any fpu state on the current CPU.  */
229 	pcu_save_all_on_cpu();
230 
231 	/*
232 	 * Enable the fpu, and wait until it is enabled before
233 	 * executing any further instructions.
234 	 */
235 	reg_cpacr_el1_write(CPACR_FPEN_ALL);
236 	isb();
237 }
238 
239 void
fpu_kern_leave(void)240 fpu_kern_leave(void)
241 {
242 	struct cpu_info *ci;
243 	int s;
244 
245 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
246 		KASSERT(!cpu_softintr_p());
247 		return;
248 	}
249 
250 	ci = curcpu();
251 
252 #if 0
253 	/*
254 	 * Can't assert this because if the caller holds a spin lock at
255 	 * IPL_VM, and previously held and released a spin lock at
256 	 * higher IPL, the IPL remains raised above IPL_VM.
257 	 */
258 	KASSERT(ci->ci_cpl == IPL_VM || cold);
259 #endif
260 	KASSERT(ci->ci_kfpu_spl != -1);
261 
262 	/*
263 	 * Zero the fpu registers; otherwise we might leak secrets
264 	 * through Spectre-class attacks to userland, even if there are
265 	 * no bugs in fpu state management.
266 	 */
267 	load_fpregs(&zero_fpreg);
268 
269 	/*
270 	 * Disable the fpu so that the kernel can't accidentally use
271 	 * it again.
272 	 */
273 	reg_cpacr_el1_write(CPACR_FPEN_NONE);
274 	isb();
275 
276 	s = ci->ci_kfpu_spl;
277 	ci->ci_kfpu_spl = -1;
278 	splx(s);
279 }
280 
281 void
kthread_fpu_enter_md(void)282 kthread_fpu_enter_md(void)
283 {
284 
285 	fpu_load(curlwp);
286 }
287 
288 void
kthread_fpu_exit_md(void)289 kthread_fpu_exit_md(void)
290 {
291 
292 	/* XXX Should fpu_state_release zero the registers itself?  */
293 	load_fpregs(&zero_fpreg);
294 	fpu_discard(curlwp, 0);
295 }
296