xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision e626a45296dde37b3284e2e353e3a8e04611474e)
1 /*	$NetBSD: fpu.c,v 1.89 2024/06/21 17:24:08 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran and Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.89 2024/06/21 17:24:08 riastradh Exp $");
100 
101 #include "opt_ddb.h"
102 #include "opt_multiprocessor.h"
103 
104 #include <sys/param.h>
105 #include <sys/systm.h>
106 #include <sys/conf.h>
107 #include <sys/cpu.h>
108 #include <sys/file.h>
109 #include <sys/proc.h>
110 #include <sys/kernel.h>
111 #include <sys/sysctl.h>
112 #include <sys/xcall.h>
113 
114 #include <machine/cpu.h>
115 #include <machine/cpuvar.h>
116 #include <machine/cputypes.h>
117 #include <machine/intr.h>
118 #include <machine/cpufunc.h>
119 #include <machine/pcb.h>
120 #include <machine/trap.h>
121 #include <machine/specialreg.h>
122 #include <x86/cpu.h>
123 #include <x86/fpu.h>
124 
125 #ifdef DDB
126 #include <ddb/ddb.h>
127 #endif
128 
129 #ifdef XENPV
130 #define clts() HYPERVISOR_fpu_taskswitch(0)
131 #define stts() HYPERVISOR_fpu_taskswitch(1)
132 #endif
133 
134 void fpu_handle_deferred(void);
135 void fpu_switch(struct lwp *, struct lwp *);
136 
137 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
138 
139 static inline union savefpu *
fpu_lwp_area(struct lwp * l)140 fpu_lwp_area(struct lwp *l)
141 {
142 	struct pcb *pcb = lwp_getpcb(l);
143 	union savefpu *area = &pcb->pcb_savefpu;
144 
145 	KASSERT((l->l_flag & LW_SYSTEM) == 0);
146 	if (l == curlwp) {
147 		fpu_save();
148 	}
149 	KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
150 
151 	return area;
152 }
153 
154 static inline void
fpu_save_lwp(struct lwp * l)155 fpu_save_lwp(struct lwp *l)
156 {
157 	struct pcb *pcb = lwp_getpcb(l);
158 	union savefpu *area = &pcb->pcb_savefpu;
159 	int s;
160 
161 	s = splvm();
162 	if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
163 		KASSERT((l->l_flag & LW_SYSTEM) == 0);
164 		fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
165 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
166 	}
167 	splx(s);
168 }
169 
170 /*
171  * Bring curlwp's FPU state in memory. It will get installed back in the CPU
172  * when returning to userland.
173  */
174 void
fpu_save(void)175 fpu_save(void)
176 {
177 	fpu_save_lwp(curlwp);
178 }
179 
180 void
fpuinit(struct cpu_info * ci)181 fpuinit(struct cpu_info *ci)
182 {
183 	/*
184 	 * This might not be strictly necessary since it will be initialized
185 	 * for each process. However it does no harm.
186 	 */
187 	clts();
188 	fninit();
189 	stts();
190 }
191 
192 void
fpuinit_mxcsr_mask(void)193 fpuinit_mxcsr_mask(void)
194 {
195 #ifndef XENPV
196 	union savefpu fpusave __aligned(64);
197 	u_long psl;
198 
199 	memset(&fpusave, 0, sizeof(fpusave));
200 
201 	/* Disable interrupts, and enable FPU */
202 	psl = x86_read_psl();
203 	x86_disable_intr();
204 	clts();
205 
206 	/* Fill in the FPU area */
207 	fxsave(&fpusave);
208 
209 	/* Restore previous state */
210 	stts();
211 	x86_write_psl(psl);
212 
213 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
214 		x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
215 	} else {
216 		x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
217 	}
218 #else
219 	/*
220 	 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
221 	 * &fpusave is not 16-byte aligned. Stack alignment problem
222 	 * somewhere, it seems.
223 	 */
224 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
225 #endif
226 }
227 
228 static inline void
fpu_errata_amd(void)229 fpu_errata_amd(void)
230 {
231 	uint16_t sw;
232 
233 	/*
234 	 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
235 	 * when FSW.ES=0, leaking other threads' execution history.
236 	 *
237 	 * Clear them manually by loading a zero (fldummy). We do this
238 	 * unconditionally, regardless of FSW.ES.
239 	 *
240 	 * Before that, clear the ES bit in the x87 status word if it is
241 	 * currently set, in order to avoid causing a fault in the
242 	 * upcoming load.
243 	 *
244 	 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
245 	 * which indicates that FIP/FDP/FOP are restored (same behavior
246 	 * as Intel). We're not using it though.
247 	 */
248 	fnstsw(&sw);
249 	if (sw & 0x80)
250 		fnclex();
251 	fldummy();
252 }
253 
254 #ifdef __x86_64__
255 #define XS64(x) (is_64bit ? x##64 : x)
256 #else
257 #define XS64(x) x
258 #endif
259 
260 void
fpu_area_save(void * area,uint64_t xsave_features,bool is_64bit)261 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
262 {
263 	switch (x86_fpu_save) {
264 	case FPU_SAVE_FSAVE:
265 		fnsave(area);
266 		break;
267 	case FPU_SAVE_FXSAVE:
268 		XS64(fxsave)(area);
269 		break;
270 	case FPU_SAVE_XSAVE:
271 		XS64(xsave)(area, xsave_features);
272 		break;
273 	case FPU_SAVE_XSAVEOPT:
274 		XS64(xsaveopt)(area, xsave_features);
275 		break;
276 	}
277 
278 	stts();
279 }
280 
281 void
fpu_area_restore(const void * area,uint64_t xsave_features,bool is_64bit)282 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
283 {
284 	clts();
285 
286 	switch (x86_fpu_save) {
287 	case FPU_SAVE_FSAVE:
288 		frstor(area);
289 		break;
290 	case FPU_SAVE_FXSAVE:
291 		if (cpu_vendor == CPUVENDOR_AMD)
292 			fpu_errata_amd();
293 		XS64(fxrstor)(area);
294 		break;
295 	case FPU_SAVE_XSAVE:
296 	case FPU_SAVE_XSAVEOPT:
297 		if (cpu_vendor == CPUVENDOR_AMD)
298 			fpu_errata_amd();
299 		XS64(xrstor)(area, xsave_features);
300 		break;
301 	}
302 }
303 
304 void
fpu_handle_deferred(void)305 fpu_handle_deferred(void)
306 {
307 	struct pcb *pcb = lwp_getpcb(curlwp);
308 	fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features,
309 	    !(curlwp->l_proc->p_flag & PK_32));
310 }
311 
312 void
fpu_switch(struct lwp * oldlwp,struct lwp * newlwp)313 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
314 {
315 	struct cpu_info *ci __diagused = curcpu();
316 	struct pcb *pcb;
317 
318 	KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
319 	    cpu_index(ci), ci->ci_ilevel);
320 
321 	if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
322 		KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
323 		pcb = lwp_getpcb(oldlwp);
324 		fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
325 		    !(oldlwp->l_proc->p_flag & PK_32));
326 		oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
327 	}
328 	KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
329 }
330 
331 void
fpu_lwp_fork(struct lwp * l1,struct lwp * l2)332 fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
333 {
334 	struct pcb *pcb2 = lwp_getpcb(l2);
335 	union savefpu *fpu_save;
336 
337 	/* Kernel threads have no FPU. */
338 	if (__predict_false(l2->l_flag & LW_SYSTEM)) {
339 		return;
340 	}
341 	/* For init(8). */
342 	if (__predict_false(l1->l_flag & LW_SYSTEM)) {
343 		memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
344 		return;
345 	}
346 
347 	fpu_save = fpu_lwp_area(l1);
348 	memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
349 	l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
350 }
351 
352 void
fpu_lwp_abandon(struct lwp * l)353 fpu_lwp_abandon(struct lwp *l)
354 {
355 	int s;
356 
357 	KASSERT(l == curlwp);
358 	s = splvm();
359 	l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
360 	stts();
361 	splx(s);
362 }
363 
364 /* -------------------------------------------------------------------------- */
365 
366 /*
367  * fpu_kern_enter()
368  *
369  *	Begin using the FPU.  Raises to splvm, disabling most
370  *	interrupts and rendering the thread non-preemptible; caller
371  *	should not use this for long periods of time, and must call
372  *	fpu_kern_leave() afterward.  Non-recursive -- you cannot call
373  *	fpu_kern_enter() again without calling fpu_kern_leave() first.
374  *
375  *	Must be used only at IPL_VM or below -- never in IPL_SCHED or
376  *	IPL_HIGH interrupt handlers.
377  */
378 void
fpu_kern_enter(void)379 fpu_kern_enter(void)
380 {
381 	static const union savefpu safe_fpu __aligned(64) = {
382 		.sv_xmm = {
383 			.fx_mxcsr = __SAFE_MXCSR__,
384 		},
385 	};
386 	struct lwp *l = curlwp;
387 	struct cpu_info *ci;
388 	int s;
389 
390 	s = splvm();
391 
392 	ci = curcpu();
393 #if 0
394 	/*
395 	 * Can't assert this because if the caller holds a spin lock at
396 	 * IPL_VM, and previously held and released a spin lock at
397 	 * higher IPL, the IPL remains raised above IPL_VM.
398 	 */
399 	KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d",
400 	    ci->ci_ilevel);
401 #endif
402 	KASSERT(ci->ci_kfpu_spl == -1);
403 	ci->ci_kfpu_spl = s;
404 
405 	/*
406 	 * If we are in a softint and have a pinned lwp, the fpu state is that
407 	 * of the pinned lwp, so save it there.
408 	 */
409 	while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
410 		l = l->l_switchto;
411 	fpu_save_lwp(l);
412 
413 	/*
414 	 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
415 	 * otherwise the CPU will trap if we try to use the FPU under
416 	 * the false impression that there has been a task switch since
417 	 * the last FPU usage requiring that we save the FPU state.
418 	 */
419 	clts();
420 
421 	/*
422 	 * Zero the FPU registers and install safe control words.
423 	 */
424 	fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
425 }
426 
427 /*
428  * fpu_kern_leave()
429  *
430  *	End using the FPU after fpu_kern_enter().
431  */
432 void
fpu_kern_leave(void)433 fpu_kern_leave(void)
434 {
435 	static const union savefpu zero_fpu __aligned(64);
436 	struct cpu_info *ci = curcpu();
437 	int s;
438 
439 #if 0
440 	/*
441 	 * Can't assert this because if the caller holds a spin lock at
442 	 * IPL_VM, and previously held and released a spin lock at
443 	 * higher IPL, the IPL remains raised above IPL_VM.
444 	 */
445 	KASSERT(ci->ci_ilevel == IPL_VM || cold);
446 #endif
447 	KASSERT(ci->ci_kfpu_spl != -1);
448 
449 	/*
450 	 * Zero the fpu registers; otherwise we might leak secrets
451 	 * through Spectre-class attacks to userland, even if there are
452 	 * no bugs in fpu state management.
453 	 */
454 	fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
455 
456 	/*
457 	 * Set CR0_TS again so that the kernel can't accidentally use
458 	 * the FPU.
459 	 */
460 	stts();
461 
462 	s = ci->ci_kfpu_spl;
463 	ci->ci_kfpu_spl = -1;
464 	splx(s);
465 }
466 
467 /* -------------------------------------------------------------------------- */
468 
469 /*
470  * The following table is used to ensure that the FPE_... value
471  * that is passed as a trapcode to the signal handler of the user
472  * process does not have more than one bit set.
473  *
474  * Multiple bits may be set if SSE simd instructions generate errors
475  * on more than one value or if the user process modifies the control
476  * word while a status word bit is already set (which this is a sign
477  * of bad coding).
478  * We have no choice than to narrow them down to one bit, since we must
479  * not send a trapcode that is not exactly one of the FPE_ macros.
480  *
481  * The mechanism has a static table with 127 entries.  Each combination
482  * of the 7 FPU status word exception bits directly translates to a
483  * position in this table, where a single FPE_... value is stored.
484  * This FPE_... value stored there is considered the "most important"
485  * of the exception bits and will be sent as the signal code.  The
486  * precedence of the bits is based upon Intel Document "Numerical
487  * Applications", Chapter "Special Computational Situations".
488  *
489  * The code to choose one of these values does these steps:
490  * 1) Throw away status word bits that cannot be masked.
491  * 2) Throw away the bits currently masked in the control word,
492  *    assuming the user isn't interested in them anymore.
493  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
494  *    cannot be masked but must be preserved.
495  *    'Stack fault' is a sub-class of 'invalid operation'.
496  * 4) Use the remaining bits to point into the trapcode table.
497  *
498  * The 6 maskable bits in order of their preference, as stated in the
499  * above referenced Intel manual:
500  * 1  Invalid operation (FP_X_INV)
501  * 1a   Stack underflow
502  * 1b   Stack overflow
503  * 1c   Operand of unsupported format
504  * 1d   SNaN operand.
505  * 2  QNaN operand (not an exception, irrelevant here)
506  * 3  Any other invalid-operation not mentioned above or zero divide
507  *      (FP_X_INV, FP_X_DZ)
508  * 4  Denormal operand (FP_X_DNML)
509  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
510  * 6  Inexact result (FP_X_IMP)
511  *
512  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
513  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
514  * status.
515  *
516  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
517  * are swapped).
518  *
519  * This table assumes that any stack fault is cleared - so that an INVOP
520  * fault will only be reported as FLTSUB once.
521  * This might not happen if the mask is being changed.
522  */
523 #define FPE_xxx1(f) (f & EN_SW_INVOP \
524 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
525 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
526 	: f & EN_SW_DENORM ? FPE_FLTUND \
527 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
528 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
529 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
530 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
531 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
532 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
533 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
534 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
535 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
536 static const uint8_t fpetable[128] = {
537 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
538 };
539 #undef FPE_xxx1
540 #undef FPE_xxx2
541 #undef FPE_xxx4
542 #undef FPE_xxx8
543 #undef FPE_xxx16
544 #undef FPE_xxx32
545 
546 /*
547  * This is a synchronous trap on either an x87 instruction (due to an unmasked
548  * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
549  * to an error on the instruction itself.
550  *
551  * If trap actually generates a signal, then the fpu state is saved and then
552  * copied onto the lwp's user-stack, and then recovered from there when the
553  * signal returns.
554  *
555  * All this code needs to do is save the reason for the trap. For x87 traps the
556  * status word bits need clearing to stop the trap re-occurring. For SSE traps
557  * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
558  *
559  * We come here with interrupts disabled.
560  */
561 void
fputrap(struct trapframe * frame)562 fputrap(struct trapframe *frame)
563 {
564 	uint32_t statbits;
565 	ksiginfo_t ksi;
566 
567 	if (__predict_false(!USERMODE(frame->tf_cs))) {
568 		register_t ip = X86_TF_RIP(frame);
569 		char where[128];
570 
571 #ifdef DDB
572 		db_symstr(where, sizeof(where), (db_expr_t)ip, DB_STGY_PROC);
573 #else
574 		snprintf(where, sizeof(where), "%p", (void *)ip);
575 #endif
576 		panic("fpu trap from kernel at %s, trapframe %p\n", where,
577 		    frame);
578 	}
579 
580 	KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
581 
582 	if (frame->tf_trapno == T_XMM) {
583 		uint32_t mxcsr;
584 		x86_stmxcsr(&mxcsr);
585 		statbits = mxcsr;
586 		/* Clear the sticky status bits */
587 		mxcsr &= ~0x3f;
588 		x86_ldmxcsr(&mxcsr);
589 
590 		/* Remove masked interrupts and non-status bits */
591 		statbits &= ~(statbits >> 7) & 0x3f;
592 		/* Mark this is an XMM status */
593 		statbits |= 0x10000;
594 	} else {
595 		uint16_t cw, sw;
596 		/* Get current control and status words */
597 		fnstcw(&cw);
598 		fnstsw(&sw);
599 		/* Clear any pending exceptions from status word */
600 		fnclex();
601 
602 		/* Remove masked interrupts */
603 		statbits = sw & ~(cw & 0x3f);
604 	}
605 
606 	/* Doesn't matter now if we get pre-empted */
607 	x86_enable_intr();
608 
609 	KSI_INIT_TRAP(&ksi);
610 	ksi.ksi_signo = SIGFPE;
611 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
612 	ksi.ksi_code = fpetable[statbits & 0x7f];
613 	ksi.ksi_trap = statbits;
614 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
615 }
616 
617 void
fpudna(struct trapframe * frame)618 fpudna(struct trapframe *frame)
619 {
620 #ifdef XENPV
621 	/*
622 	 * Xen produes spurious fpudna traps, just do nothing.
623 	 */
624 	if (USERMODE(frame->tf_cs)) {
625 		clts();
626 		return;
627 	}
628 #endif
629 	panic("fpudna from %s, ip %p, trapframe %p",
630 	    USERMODE(frame->tf_cs) ? "userland" : "kernel",
631 	    (void *)X86_TF_RIP(frame), frame);
632 }
633 
634 /* -------------------------------------------------------------------------- */
635 
636 static inline void
fpu_xstate_reload(union savefpu * fpu_save,uint64_t xstate)637 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
638 {
639 	/*
640 	 * Force a reload of the given xstate during the next XRSTOR.
641 	 */
642 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
643 		fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
644 	}
645 }
646 
647 void
fpu_set_default_cw(struct lwp * l,unsigned int x87_cw)648 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
649 {
650 	union savefpu *fpu_save = fpu_lwp_area(l);
651 	struct pcb *pcb = lwp_getpcb(l);
652 
653 	if (i386_use_fxsave) {
654 		fpu_save->sv_xmm.fx_cw = x87_cw;
655 		if (x87_cw != __INITIAL_NPXCW__) {
656 			fpu_xstate_reload(fpu_save, XCR0_X87);
657 		}
658 	} else {
659 		fpu_save->sv_87.s87_cw = x87_cw;
660 	}
661 	pcb->pcb_fpu_dflt_cw = x87_cw;
662 }
663 
664 void
fpu_clear(struct lwp * l,unsigned int x87_cw)665 fpu_clear(struct lwp *l, unsigned int x87_cw)
666 {
667 	union savefpu *fpu_save;
668 	struct pcb *pcb;
669 
670 	KASSERT(l == curlwp);
671 	fpu_save = fpu_lwp_area(l);
672 
673 	switch (x86_fpu_save) {
674 	case FPU_SAVE_FSAVE:
675 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
676 		fpu_save->sv_87.s87_tw = 0xffff;
677 		fpu_save->sv_87.s87_cw = x87_cw;
678 		break;
679 	case FPU_SAVE_FXSAVE:
680 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
681 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
682 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
683 		fpu_save->sv_xmm.fx_cw = x87_cw;
684 		break;
685 	case FPU_SAVE_XSAVE:
686 	case FPU_SAVE_XSAVEOPT:
687 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
688 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
689 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
690 		fpu_save->sv_xmm.fx_cw = x87_cw;
691 		if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
692 			fpu_xstate_reload(fpu_save, XCR0_X87);
693 		}
694 		break;
695 	}
696 
697 	pcb = lwp_getpcb(l);
698 	pcb->pcb_fpu_dflt_cw = x87_cw;
699 }
700 
701 void
fpu_sigreset(struct lwp * l)702 fpu_sigreset(struct lwp *l)
703 {
704 	union savefpu *fpu_save = fpu_lwp_area(l);
705 	struct pcb *pcb = lwp_getpcb(l);
706 
707 	/*
708 	 * For signal handlers the register values don't matter. Just reset
709 	 * a few fields.
710 	 */
711 	if (i386_use_fxsave) {
712 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
713 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
714 		fpu_save->sv_xmm.fx_tw = 0;
715 		fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
716 	} else {
717 		fpu_save->sv_87.s87_tw = 0xffff;
718 		fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
719 	}
720 }
721 
722 void
process_write_fpregs_xmm(struct lwp * l,const struct fxsave * fpregs)723 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
724 {
725 	union savefpu *fpu_save = fpu_lwp_area(l);
726 
727 	if (i386_use_fxsave) {
728 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
729 
730 		/*
731 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
732 		 */
733 		fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
734 		fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
735 
736 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
737 	} else {
738 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
739 	}
740 }
741 
742 void
process_write_fpregs_s87(struct lwp * l,const struct save87 * fpregs)743 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
744 {
745 	union savefpu *fpu_save = fpu_lwp_area(l);
746 
747 	if (i386_use_fxsave) {
748 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
749 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
750 	} else {
751 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
752 	}
753 }
754 
755 void
process_read_fpregs_xmm(struct lwp * l,struct fxsave * fpregs)756 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
757 {
758 	union savefpu *fpu_save = fpu_lwp_area(l);
759 
760 	if (i386_use_fxsave) {
761 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
762 	} else {
763 		memset(fpregs, 0, sizeof(*fpregs));
764 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
765 	}
766 }
767 
768 void
process_read_fpregs_s87(struct lwp * l,struct save87 * fpregs)769 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
770 {
771 	union savefpu *fpu_save = fpu_lwp_area(l);
772 
773 	if (i386_use_fxsave) {
774 		memset(fpregs, 0, sizeof(*fpregs));
775 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
776 	} else {
777 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
778 	}
779 }
780 
781 int
process_read_xstate(struct lwp * l,struct xstate * xstate)782 process_read_xstate(struct lwp *l, struct xstate *xstate)
783 {
784 	union savefpu *fpu_save = fpu_lwp_area(l);
785 
786 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
787 		/* Convert from legacy FSAVE format. */
788 		memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
789 		process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
790 
791 		/* We only got x87 data. */
792 		xstate->xs_rfbm = XCR0_X87;
793 		xstate->xs_xstate_bv = XCR0_X87;
794 		return 0;
795 	}
796 
797 	/* Copy the legacy area. */
798 	memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
799 	    sizeof(xstate->xs_fxsave));
800 
801 	if (x86_fpu_save == FPU_SAVE_FXSAVE) {
802 		/* FXSAVE means we've got x87 + SSE data. */
803 		xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
804 		xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
805 		return 0;
806 	}
807 
808 	/* Copy the bitmap indicating which states are available. */
809 	xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
810 	xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
811 	KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
812 
813 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
814 	if (xstate->xs_xstate_bv & xcr0_val) {				\
815 		KASSERT(x86_xsave_offsets[xsave_val]			\
816 		    >= sizeof(struct xsave_header));			\
817 		KASSERT(x86_xsave_sizes[xsave_val]			\
818 		    >= sizeof(xstate->field));				\
819 		memcpy(&xstate->field,					\
820 		    (char*)fpu_save + x86_xsave_offsets[xsave_val],	\
821 		    sizeof(xstate->field));				\
822 	}
823 
824 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
825 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
826 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
827 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
828 
829 #undef COPY_COMPONENT
830 
831 	return 0;
832 }
833 
834 int
process_verify_xstate(const struct xstate * xstate)835 process_verify_xstate(const struct xstate *xstate)
836 {
837 	/* xstate_bv must be a subset of RFBM */
838 	if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
839 		return EINVAL;
840 
841 	switch (x86_fpu_save) {
842 	case FPU_SAVE_FSAVE:
843 		if ((xstate->xs_rfbm & ~XCR0_X87))
844 			return EINVAL;
845 		break;
846 	case FPU_SAVE_FXSAVE:
847 		if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
848 			return EINVAL;
849 		break;
850 	default:
851 		/* Verify whether no unsupported features are enabled */
852 		if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
853 			return EINVAL;
854 	}
855 
856 	return 0;
857 }
858 
859 int
process_write_xstate(struct lwp * l,const struct xstate * xstate)860 process_write_xstate(struct lwp *l, const struct xstate *xstate)
861 {
862 	union savefpu *fpu_save = fpu_lwp_area(l);
863 
864 	/* Convert data into legacy FSAVE format. */
865 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
866 		if (xstate->xs_xstate_bv & XCR0_X87)
867 			process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
868 		return 0;
869 	}
870 
871 	/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
872 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
873 		/*
874 		 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
875 		 *           fpu_save->sv_xsave_hdr.xsh_xstate_bv"
876 		 */
877 		fpu_save->sv_xsave_hdr.xsh_xstate_bv =
878 		    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
879 		    xstate->xs_xstate_bv;
880 	}
881 
882 	if (xstate->xs_xstate_bv & XCR0_X87) {
883 		/*
884 		 * X87 state is split into two areas, interspersed with SSE
885 		 * data.
886 		 */
887 		memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
888 		memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
889 		    sizeof(xstate->xs_fxsave.fx_87_ac));
890 	}
891 
892 	/*
893 	 * Copy MXCSR if either SSE or AVX state is requested, to match the
894 	 * XSAVE behavior for those flags.
895 	 */
896 	if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
897 		/*
898 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
899 		 */
900 		fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
901 		    & x86_fpu_mxcsr_mask;
902 		fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
903 		    fpu_save->sv_xmm.fx_mxcsr_mask;
904 	}
905 
906 	if (xstate->xs_xstate_bv & XCR0_SSE) {
907 		memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
908 		    xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
909 	}
910 
911 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
912 	if (xstate->xs_xstate_bv & xcr0_val) {				\
913 		KASSERT(x86_xsave_offsets[xsave_val]			\
914 		    >= sizeof(struct xsave_header));			\
915 		KASSERT(x86_xsave_sizes[xsave_val]			\
916 		    >= sizeof(xstate->field));				\
917 		memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val],	\
918 		    &xstate->field, sizeof(xstate->field));		\
919 	}
920 
921 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
922 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
923 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
924 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
925 
926 #undef COPY_COMPONENT
927 
928 	return 0;
929 }
930