1 /* $NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matt Thomas of 3am Software Foundry.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33
34 __KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $");
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/cpu.h>
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/lwp.h>
42 #include <sys/evcnt.h>
43
44 #include <arm/cpufunc.h>
45 #include <arm/fpu.h>
46 #include <arm/cpufunc.h>
47
48 #include <aarch64/locore.h>
49 #include <aarch64/reg.h>
50 #include <aarch64/pcb.h>
51 #include <aarch64/armreg.h>
52 #include <aarch64/machdep.h>
53
54 static void fpu_state_load(lwp_t *, unsigned int);
55 static void fpu_state_save(lwp_t *);
56 static void fpu_state_release(lwp_t *);
57
58 const pcu_ops_t pcu_fpu_ops = {
59 .pcu_id = PCU_FPU,
60 .pcu_state_load = fpu_state_load,
61 .pcu_state_save = fpu_state_save,
62 .pcu_state_release = fpu_state_release
63 };
64
65 void
fpu_attach(struct cpu_info * ci)66 fpu_attach(struct cpu_info *ci)
67 {
68 evcnt_attach_dynamic(&ci->ci_vfp_use, EVCNT_TYPE_MISC, NULL,
69 ci->ci_cpuname, "vfp use");
70 evcnt_attach_dynamic(&ci->ci_vfp_reuse, EVCNT_TYPE_MISC, NULL,
71 ci->ci_cpuname, "vfp reuse");
72 evcnt_attach_dynamic(&ci->ci_vfp_save, EVCNT_TYPE_MISC, NULL,
73 ci->ci_cpuname, "vfp save");
74 evcnt_attach_dynamic(&ci->ci_vfp_release, EVCNT_TYPE_MISC, NULL,
75 ci->ci_cpuname, "vfp release");
76 }
77
78 static void
fpu_state_load(lwp_t * l,unsigned int flags)79 fpu_state_load(lwp_t *l, unsigned int flags)
80 {
81 struct pcb * const pcb = lwp_getpcb(l);
82
83 KASSERT(l == curlwp);
84
85 if (__predict_false((flags & PCU_VALID) == 0)) {
86 uint64_t mvfr1 = reg_mvfr1_el1_read();
87 bool fp16 = false;
88 uint32_t fpcr = 0;
89
90 /*
91 * Determine whether ARMv8.2-FP16 binary16
92 * floating-point arithmetic is supported.
93 */
94 switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) {
95 case MVFR1_FPHP_HALF_ARITH:
96 fp16 = true;
97 break;
98 }
99
100 /* Rounding mode: round to nearest, ties to even. */
101 fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE);
102
103 /* NaN propagation or default NaN. */
104 switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) {
105 case MVFR1_FPDNAN_NAN:
106 /*
107 * IEEE 754 NaN propagation supported. Don't
108 * enable default NaN mode.
109 */
110 break;
111 default:
112 /*
113 * IEEE 754 NaN propagation not supported, so
114 * enable default NaN mode.
115 */
116 fpcr |= FPCR_DN;
117 }
118
119 /* Subnormal arithmetic or flush-to-zero. */
120 switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) {
121 case MVFR1_FPFTZ_DENORMAL:
122 /*
123 * IEEE 754 subnormal arithmetic supported.
124 * Don't enable flush-to-zero mode.
125 */
126 break;
127 default:
128 /*
129 * IEEE 754 subnormal arithmetic not supported,
130 * so enable flush-to-zero mode. If FP16 is
131 * supported, also enable flush-to-zero for
132 * binary16 arithmetic.
133 */
134 fpcr |= FPCR_FZ;
135 if (fp16)
136 fpcr |= FPCR_FZ16;
137 }
138
139 /* initialize fpregs */
140 memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs));
141 pcb->pcb_fpregs.fpcr = fpcr;
142
143 curcpu()->ci_vfp_use.ev_count++;
144 } else {
145 curcpu()->ci_vfp_reuse.ev_count++;
146 }
147
148 /* allow user process to use FP */
149 l->l_md.md_cpacr = CPACR_FPEN_ALL;
150 reg_cpacr_el1_write(CPACR_FPEN_ALL);
151 isb();
152
153 if ((flags & PCU_REENABLE) == 0)
154 load_fpregs(&pcb->pcb_fpregs);
155 }
156
157 static void
fpu_state_save(lwp_t * l)158 fpu_state_save(lwp_t *l)
159 {
160 struct pcb * const pcb = lwp_getpcb(l);
161
162 curcpu()->ci_vfp_save.ev_count++;
163
164 reg_cpacr_el1_write(CPACR_FPEN_EL1); /* fpreg access enable */
165 isb();
166
167 save_fpregs(&pcb->pcb_fpregs);
168
169 reg_cpacr_el1_write(CPACR_FPEN_NONE); /* fpreg access disable */
170 isb();
171 }
172
173 static void
fpu_state_release(lwp_t * l)174 fpu_state_release(lwp_t *l)
175 {
176 curcpu()->ci_vfp_release.ev_count++;
177
178 /* disallow user process to use FP */
179 l->l_md.md_cpacr = CPACR_FPEN_NONE;
180 reg_cpacr_el1_write(CPACR_FPEN_NONE);
181 isb();
182 }
183
184 static const struct fpreg zero_fpreg;
185
186 /*
187 * True if this is a system thread with its own private FPU state.
188 */
189 static inline bool
lwp_system_fpu_p(struct lwp * l)190 lwp_system_fpu_p(struct lwp *l)
191 {
192
193 return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
194 (LW_SYSTEM|LW_SYSTEM_FPU);
195 }
196
197 void
fpu_kern_enter(void)198 fpu_kern_enter(void)
199 {
200 struct cpu_info *ci;
201 int s;
202
203 if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
204 KASSERT(!cpu_softintr_p());
205 return;
206 }
207
208 /*
209 * Block interrupts up to IPL_VM. We must block preemption
210 * since -- if this is a user thread -- there is nowhere to
211 * save the kernel fpu state, and if we want this to be usable
212 * in interrupts, we can't let interrupts interfere with the
213 * fpu state in use since there's nowhere for them to save it.
214 */
215 s = splvm();
216 ci = curcpu();
217 #if 0
218 /*
219 * Can't assert this because if the caller holds a spin lock at
220 * IPL_VM, and previously held and released a spin lock at
221 * higher IPL, the IPL remains raised above IPL_VM.
222 */
223 KASSERTMSG(ci->ci_cpl <= IPL_VM || cold, "cpl=%d", ci->ci_cpl);
224 #endif
225 KASSERT(ci->ci_kfpu_spl == -1);
226 ci->ci_kfpu_spl = s;
227
228 /* Save any fpu state on the current CPU. */
229 pcu_save_all_on_cpu();
230
231 /*
232 * Enable the fpu, and wait until it is enabled before
233 * executing any further instructions.
234 */
235 reg_cpacr_el1_write(CPACR_FPEN_ALL);
236 isb();
237 }
238
239 void
fpu_kern_leave(void)240 fpu_kern_leave(void)
241 {
242 struct cpu_info *ci;
243 int s;
244
245 if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
246 KASSERT(!cpu_softintr_p());
247 return;
248 }
249
250 ci = curcpu();
251
252 #if 0
253 /*
254 * Can't assert this because if the caller holds a spin lock at
255 * IPL_VM, and previously held and released a spin lock at
256 * higher IPL, the IPL remains raised above IPL_VM.
257 */
258 KASSERT(ci->ci_cpl == IPL_VM || cold);
259 #endif
260 KASSERT(ci->ci_kfpu_spl != -1);
261
262 /*
263 * Zero the fpu registers; otherwise we might leak secrets
264 * through Spectre-class attacks to userland, even if there are
265 * no bugs in fpu state management.
266 */
267 load_fpregs(&zero_fpreg);
268
269 /*
270 * Disable the fpu so that the kernel can't accidentally use
271 * it again.
272 */
273 reg_cpacr_el1_write(CPACR_FPEN_NONE);
274 isb();
275
276 s = ci->ci_kfpu_spl;
277 ci->ci_kfpu_spl = -1;
278 splx(s);
279 }
280
281 void
kthread_fpu_enter_md(void)282 kthread_fpu_enter_md(void)
283 {
284
285 fpu_load(curlwp);
286 }
287
288 void
kthread_fpu_exit_md(void)289 kthread_fpu_exit_md(void)
290 {
291
292 /* XXX Should fpu_state_release zero the registers itself? */
293 load_fpregs(&zero_fpreg);
294 fpu_discard(curlwp, 0);
295 }
296