xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision 7d62b00eb9ad855ffcd7da46b41e23feb5476fac)
1 /*	$NetBSD: fpu.c,v 1.86 2023/03/03 14:40:16 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran and Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.86 2023/03/03 14:40:16 riastradh Exp $");
100 
101 #include "opt_multiprocessor.h"
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/conf.h>
106 #include <sys/cpu.h>
107 #include <sys/file.h>
108 #include <sys/proc.h>
109 #include <sys/kernel.h>
110 #include <sys/sysctl.h>
111 #include <sys/xcall.h>
112 
113 #include <machine/cpu.h>
114 #include <machine/cpuvar.h>
115 #include <machine/cputypes.h>
116 #include <machine/intr.h>
117 #include <machine/cpufunc.h>
118 #include <machine/pcb.h>
119 #include <machine/trap.h>
120 #include <machine/specialreg.h>
121 #include <x86/cpu.h>
122 #include <x86/fpu.h>
123 
124 #ifdef XENPV
125 #define clts() HYPERVISOR_fpu_taskswitch(0)
126 #define stts() HYPERVISOR_fpu_taskswitch(1)
127 #endif
128 
129 void fpu_handle_deferred(void);
130 void fpu_switch(struct lwp *, struct lwp *);
131 
132 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
133 
134 static inline union savefpu *
135 fpu_lwp_area(struct lwp *l)
136 {
137 	struct pcb *pcb = lwp_getpcb(l);
138 	union savefpu *area = &pcb->pcb_savefpu;
139 
140 	KASSERT((l->l_flag & LW_SYSTEM) == 0);
141 	if (l == curlwp) {
142 		fpu_save();
143 	}
144 	KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
145 
146 	return area;
147 }
148 
149 static inline void
150 fpu_save_lwp(struct lwp *l)
151 {
152 	struct pcb *pcb = lwp_getpcb(l);
153 	union savefpu *area = &pcb->pcb_savefpu;
154 	int s;
155 
156 	s = splvm();
157 	if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
158 		KASSERT((l->l_flag & LW_SYSTEM) == 0);
159 		fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32));
160 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
161 	}
162 	splx(s);
163 }
164 
165 /*
166  * Bring curlwp's FPU state in memory. It will get installed back in the CPU
167  * when returning to userland.
168  */
169 void
170 fpu_save(void)
171 {
172 	fpu_save_lwp(curlwp);
173 }
174 
175 void
176 fpuinit(struct cpu_info *ci)
177 {
178 	/*
179 	 * This might not be strictly necessary since it will be initialized
180 	 * for each process. However it does no harm.
181 	 */
182 	clts();
183 	fninit();
184 	stts();
185 }
186 
187 void
188 fpuinit_mxcsr_mask(void)
189 {
190 #ifndef XENPV
191 	union savefpu fpusave __aligned(64);
192 	u_long psl;
193 
194 	memset(&fpusave, 0, sizeof(fpusave));
195 
196 	/* Disable interrupts, and enable FPU */
197 	psl = x86_read_psl();
198 	x86_disable_intr();
199 	clts();
200 
201 	/* Fill in the FPU area */
202 	fxsave(&fpusave);
203 
204 	/* Restore previous state */
205 	stts();
206 	x86_write_psl(psl);
207 
208 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
209 		x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
210 	} else {
211 		x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
212 	}
213 #else
214 	/*
215 	 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
216 	 * &fpusave is not 16-byte aligned. Stack alignment problem
217 	 * somewhere, it seems.
218 	 */
219 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
220 #endif
221 }
222 
223 static inline void
224 fpu_errata_amd(void)
225 {
226 	uint16_t sw;
227 
228 	/*
229 	 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
230 	 * when FSW.ES=0, leaking other threads' execution history.
231 	 *
232 	 * Clear them manually by loading a zero (fldummy). We do this
233 	 * unconditionally, regardless of FSW.ES.
234 	 *
235 	 * Before that, clear the ES bit in the x87 status word if it is
236 	 * currently set, in order to avoid causing a fault in the
237 	 * upcoming load.
238 	 *
239 	 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
240 	 * which indicates that FIP/FDP/FOP are restored (same behavior
241 	 * as Intel). We're not using it though.
242 	 */
243 	fnstsw(&sw);
244 	if (sw & 0x80)
245 		fnclex();
246 	fldummy();
247 }
248 
249 #ifdef __x86_64__
250 #define XS64(x) (is_64bit ? x##64 : x)
251 #else
252 #define XS64(x) x
253 #endif
254 
255 void
256 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit)
257 {
258 	switch (x86_fpu_save) {
259 	case FPU_SAVE_FSAVE:
260 		fnsave(area);
261 		break;
262 	case FPU_SAVE_FXSAVE:
263 		XS64(fxsave)(area);
264 		break;
265 	case FPU_SAVE_XSAVE:
266 		XS64(xsave)(area, xsave_features);
267 		break;
268 	case FPU_SAVE_XSAVEOPT:
269 		XS64(xsaveopt)(area, xsave_features);
270 		break;
271 	}
272 
273 	stts();
274 }
275 
276 void
277 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit)
278 {
279 	clts();
280 
281 	switch (x86_fpu_save) {
282 	case FPU_SAVE_FSAVE:
283 		frstor(area);
284 		break;
285 	case FPU_SAVE_FXSAVE:
286 		if (cpu_vendor == CPUVENDOR_AMD)
287 			fpu_errata_amd();
288 		XS64(fxrstor)(area);
289 		break;
290 	case FPU_SAVE_XSAVE:
291 	case FPU_SAVE_XSAVEOPT:
292 		if (cpu_vendor == CPUVENDOR_AMD)
293 			fpu_errata_amd();
294 		XS64(xrstor)(area, xsave_features);
295 		break;
296 	}
297 }
298 
299 void
300 fpu_handle_deferred(void)
301 {
302 	struct pcb *pcb = lwp_getpcb(curlwp);
303 	fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features,
304 	    !(curlwp->l_proc->p_flag & PK_32));
305 }
306 
307 void
308 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
309 {
310 	struct cpu_info *ci __diagused = curcpu();
311 	struct pcb *pcb;
312 
313 	KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
314 	    cpu_index(ci), ci->ci_ilevel);
315 
316 	if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
317 		KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
318 		pcb = lwp_getpcb(oldlwp);
319 		fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features,
320 		    !(oldlwp->l_proc->p_flag & PK_32));
321 		oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
322 	}
323 	KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
324 }
325 
326 void
327 fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
328 {
329 	struct pcb *pcb2 = lwp_getpcb(l2);
330 	union savefpu *fpu_save;
331 
332 	/* Kernel threads have no FPU. */
333 	if (__predict_false(l2->l_flag & LW_SYSTEM)) {
334 		return;
335 	}
336 	/* For init(8). */
337 	if (__predict_false(l1->l_flag & LW_SYSTEM)) {
338 		memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
339 		return;
340 	}
341 
342 	fpu_save = fpu_lwp_area(l1);
343 	memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
344 	l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
345 }
346 
347 void
348 fpu_lwp_abandon(struct lwp *l)
349 {
350 	int s;
351 
352 	KASSERT(l == curlwp);
353 	s = splvm();
354 	l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
355 	stts();
356 	splx(s);
357 }
358 
359 /* -------------------------------------------------------------------------- */
360 
361 /*
362  * fpu_kern_enter()
363  *
364  *	Begin using the FPU.  Raises to splvm, disabling most
365  *	interrupts and rendering the thread non-preemptible; caller
366  *	should not use this for long periods of time, and must call
367  *	fpu_kern_leave() afterward.  Non-recursive -- you cannot call
368  *	fpu_kern_enter() again without calling fpu_kern_leave() first.
369  *
370  *	Must be used only at IPL_VM or below -- never in IPL_SCHED or
371  *	IPL_HIGH interrupt handlers.
372  */
373 void
374 fpu_kern_enter(void)
375 {
376 	static const union savefpu safe_fpu __aligned(64) = {
377 		.sv_xmm = {
378 			.fx_mxcsr = __SAFE_MXCSR__,
379 		},
380 	};
381 	struct lwp *l = curlwp;
382 	struct cpu_info *ci;
383 	int s;
384 
385 	s = splvm();
386 
387 	ci = curcpu();
388 #if 0
389 	/*
390 	 * Can't assert this because if the caller holds a spin lock at
391 	 * IPL_VM, and previously held and released a spin lock at
392 	 * higher IPL, the IPL remains raised above IPL_VM.
393 	 */
394 	KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d",
395 	    ci->ci_ilevel);
396 #endif
397 	KASSERT(ci->ci_kfpu_spl == -1);
398 	ci->ci_kfpu_spl = s;
399 
400 	/*
401 	 * If we are in a softint and have a pinned lwp, the fpu state is that
402 	 * of the pinned lwp, so save it there.
403 	 */
404 	while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
405 		l = l->l_switchto;
406 	fpu_save_lwp(l);
407 
408 	/*
409 	 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
410 	 * otherwise the CPU will trap if we try to use the FPU under
411 	 * the false impression that there has been a task switch since
412 	 * the last FPU usage requiring that we save the FPU state.
413 	 */
414 	clts();
415 
416 	/*
417 	 * Zero the FPU registers and install safe control words.
418 	 */
419 	fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false);
420 }
421 
422 /*
423  * fpu_kern_leave()
424  *
425  *	End using the FPU after fpu_kern_enter().
426  */
427 void
428 fpu_kern_leave(void)
429 {
430 	static const union savefpu zero_fpu __aligned(64);
431 	struct cpu_info *ci = curcpu();
432 	int s;
433 
434 #if 0
435 	/*
436 	 * Can't assert this because if the caller holds a spin lock at
437 	 * IPL_VM, and previously held and released a spin lock at
438 	 * higher IPL, the IPL remains raised above IPL_VM.
439 	 */
440 	KASSERT(ci->ci_ilevel == IPL_VM || cold);
441 #endif
442 	KASSERT(ci->ci_kfpu_spl != -1);
443 
444 	/*
445 	 * Zero the fpu registers; otherwise we might leak secrets
446 	 * through Spectre-class attacks to userland, even if there are
447 	 * no bugs in fpu state management.
448 	 */
449 	fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false);
450 
451 	/*
452 	 * Set CR0_TS again so that the kernel can't accidentally use
453 	 * the FPU.
454 	 */
455 	stts();
456 
457 	s = ci->ci_kfpu_spl;
458 	ci->ci_kfpu_spl = -1;
459 	splx(s);
460 }
461 
462 /* -------------------------------------------------------------------------- */
463 
464 /*
465  * The following table is used to ensure that the FPE_... value
466  * that is passed as a trapcode to the signal handler of the user
467  * process does not have more than one bit set.
468  *
469  * Multiple bits may be set if SSE simd instructions generate errors
470  * on more than one value or if the user process modifies the control
471  * word while a status word bit is already set (which this is a sign
472  * of bad coding).
473  * We have no choice than to narrow them down to one bit, since we must
474  * not send a trapcode that is not exactly one of the FPE_ macros.
475  *
476  * The mechanism has a static table with 127 entries.  Each combination
477  * of the 7 FPU status word exception bits directly translates to a
478  * position in this table, where a single FPE_... value is stored.
479  * This FPE_... value stored there is considered the "most important"
480  * of the exception bits and will be sent as the signal code.  The
481  * precedence of the bits is based upon Intel Document "Numerical
482  * Applications", Chapter "Special Computational Situations".
483  *
484  * The code to choose one of these values does these steps:
485  * 1) Throw away status word bits that cannot be masked.
486  * 2) Throw away the bits currently masked in the control word,
487  *    assuming the user isn't interested in them anymore.
488  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
489  *    cannot be masked but must be preserved.
490  *    'Stack fault' is a sub-class of 'invalid operation'.
491  * 4) Use the remaining bits to point into the trapcode table.
492  *
493  * The 6 maskable bits in order of their preference, as stated in the
494  * above referenced Intel manual:
495  * 1  Invalid operation (FP_X_INV)
496  * 1a   Stack underflow
497  * 1b   Stack overflow
498  * 1c   Operand of unsupported format
499  * 1d   SNaN operand.
500  * 2  QNaN operand (not an exception, irrelevant here)
501  * 3  Any other invalid-operation not mentioned above or zero divide
502  *      (FP_X_INV, FP_X_DZ)
503  * 4  Denormal operand (FP_X_DNML)
504  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
505  * 6  Inexact result (FP_X_IMP)
506  *
507  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
508  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
509  * status.
510  *
511  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
512  * are swapped).
513  *
514  * This table assumes that any stack fault is cleared - so that an INVOP
515  * fault will only be reported as FLTSUB once.
516  * This might not happen if the mask is being changed.
517  */
518 #define FPE_xxx1(f) (f & EN_SW_INVOP \
519 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
520 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
521 	: f & EN_SW_DENORM ? FPE_FLTUND \
522 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
523 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
524 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
525 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
526 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
527 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
528 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
529 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
530 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
531 static const uint8_t fpetable[128] = {
532 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
533 };
534 #undef FPE_xxx1
535 #undef FPE_xxx2
536 #undef FPE_xxx4
537 #undef FPE_xxx8
538 #undef FPE_xxx16
539 #undef FPE_xxx32
540 
541 /*
542  * This is a synchronous trap on either an x87 instruction (due to an unmasked
543  * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
544  * to an error on the instruction itself.
545  *
546  * If trap actually generates a signal, then the fpu state is saved and then
547  * copied onto the lwp's user-stack, and then recovered from there when the
548  * signal returns.
549  *
550  * All this code needs to do is save the reason for the trap. For x87 traps the
551  * status word bits need clearing to stop the trap re-occurring. For SSE traps
552  * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
553  *
554  * We come here with interrupts disabled.
555  */
556 void
557 fputrap(struct trapframe *frame)
558 {
559 	uint32_t statbits;
560 	ksiginfo_t ksi;
561 
562 	if (__predict_false(!USERMODE(frame->tf_cs))) {
563 		panic("fpu trap from kernel, trapframe %p\n", frame);
564 	}
565 
566 	KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
567 
568 	if (frame->tf_trapno == T_XMM) {
569 		uint32_t mxcsr;
570 		x86_stmxcsr(&mxcsr);
571 		statbits = mxcsr;
572 		/* Clear the sticky status bits */
573 		mxcsr &= ~0x3f;
574 		x86_ldmxcsr(&mxcsr);
575 
576 		/* Remove masked interrupts and non-status bits */
577 		statbits &= ~(statbits >> 7) & 0x3f;
578 		/* Mark this is an XMM status */
579 		statbits |= 0x10000;
580 	} else {
581 		uint16_t cw, sw;
582 		/* Get current control and status words */
583 		fnstcw(&cw);
584 		fnstsw(&sw);
585 		/* Clear any pending exceptions from status word */
586 		fnclex();
587 
588 		/* Remove masked interrupts */
589 		statbits = sw & ~(cw & 0x3f);
590 	}
591 
592 	/* Doesn't matter now if we get pre-empted */
593 	x86_enable_intr();
594 
595 	KSI_INIT_TRAP(&ksi);
596 	ksi.ksi_signo = SIGFPE;
597 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
598 	ksi.ksi_code = fpetable[statbits & 0x7f];
599 	ksi.ksi_trap = statbits;
600 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
601 }
602 
603 void
604 fpudna(struct trapframe *frame)
605 {
606 	panic("fpudna from %s, ip %p, trapframe %p",
607 	    USERMODE(frame->tf_cs) ? "userland" : "kernel",
608 	    (void *)X86_TF_RIP(frame), frame);
609 }
610 
611 /* -------------------------------------------------------------------------- */
612 
613 static inline void
614 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
615 {
616 	/*
617 	 * Force a reload of the given xstate during the next XRSTOR.
618 	 */
619 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
620 		fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
621 	}
622 }
623 
624 void
625 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
626 {
627 	union savefpu *fpu_save = fpu_lwp_area(l);
628 	struct pcb *pcb = lwp_getpcb(l);
629 
630 	if (i386_use_fxsave) {
631 		fpu_save->sv_xmm.fx_cw = x87_cw;
632 		if (x87_cw != __INITIAL_NPXCW__) {
633 			fpu_xstate_reload(fpu_save, XCR0_X87);
634 		}
635 	} else {
636 		fpu_save->sv_87.s87_cw = x87_cw;
637 	}
638 	pcb->pcb_fpu_dflt_cw = x87_cw;
639 }
640 
641 void
642 fpu_clear(struct lwp *l, unsigned int x87_cw)
643 {
644 	union savefpu *fpu_save;
645 	struct pcb *pcb;
646 
647 	KASSERT(l == curlwp);
648 	fpu_save = fpu_lwp_area(l);
649 
650 	switch (x86_fpu_save) {
651 	case FPU_SAVE_FSAVE:
652 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
653 		fpu_save->sv_87.s87_tw = 0xffff;
654 		fpu_save->sv_87.s87_cw = x87_cw;
655 		break;
656 	case FPU_SAVE_FXSAVE:
657 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
658 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
659 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
660 		fpu_save->sv_xmm.fx_cw = x87_cw;
661 		break;
662 	case FPU_SAVE_XSAVE:
663 	case FPU_SAVE_XSAVEOPT:
664 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
665 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
666 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
667 		fpu_save->sv_xmm.fx_cw = x87_cw;
668 		if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
669 			fpu_xstate_reload(fpu_save, XCR0_X87);
670 		}
671 		break;
672 	}
673 
674 	pcb = lwp_getpcb(l);
675 	pcb->pcb_fpu_dflt_cw = x87_cw;
676 }
677 
678 void
679 fpu_sigreset(struct lwp *l)
680 {
681 	union savefpu *fpu_save = fpu_lwp_area(l);
682 	struct pcb *pcb = lwp_getpcb(l);
683 
684 	/*
685 	 * For signal handlers the register values don't matter. Just reset
686 	 * a few fields.
687 	 */
688 	if (i386_use_fxsave) {
689 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
690 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
691 		fpu_save->sv_xmm.fx_tw = 0;
692 		fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
693 	} else {
694 		fpu_save->sv_87.s87_tw = 0xffff;
695 		fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
696 	}
697 }
698 
699 void
700 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
701 {
702 	union savefpu *fpu_save = fpu_lwp_area(l);
703 
704 	if (i386_use_fxsave) {
705 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
706 
707 		/*
708 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
709 		 */
710 		fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
711 		fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
712 
713 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
714 	} else {
715 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
716 	}
717 }
718 
719 void
720 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
721 {
722 	union savefpu *fpu_save = fpu_lwp_area(l);
723 
724 	if (i386_use_fxsave) {
725 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
726 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
727 	} else {
728 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
729 	}
730 }
731 
732 void
733 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
734 {
735 	union savefpu *fpu_save = fpu_lwp_area(l);
736 
737 	if (i386_use_fxsave) {
738 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
739 	} else {
740 		memset(fpregs, 0, sizeof(*fpregs));
741 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
742 	}
743 }
744 
745 void
746 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
747 {
748 	union savefpu *fpu_save = fpu_lwp_area(l);
749 
750 	if (i386_use_fxsave) {
751 		memset(fpregs, 0, sizeof(*fpregs));
752 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
753 	} else {
754 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
755 	}
756 }
757 
758 int
759 process_read_xstate(struct lwp *l, struct xstate *xstate)
760 {
761 	union savefpu *fpu_save = fpu_lwp_area(l);
762 
763 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
764 		/* Convert from legacy FSAVE format. */
765 		memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
766 		process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
767 
768 		/* We only got x87 data. */
769 		xstate->xs_rfbm = XCR0_X87;
770 		xstate->xs_xstate_bv = XCR0_X87;
771 		return 0;
772 	}
773 
774 	/* Copy the legacy area. */
775 	memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
776 	    sizeof(xstate->xs_fxsave));
777 
778 	if (x86_fpu_save == FPU_SAVE_FXSAVE) {
779 		/* FXSAVE means we've got x87 + SSE data. */
780 		xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
781 		xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
782 		return 0;
783 	}
784 
785 	/* Copy the bitmap indicating which states are available. */
786 	xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
787 	xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
788 	KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
789 
790 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
791 	if (xstate->xs_xstate_bv & xcr0_val) {				\
792 		KASSERT(x86_xsave_offsets[xsave_val]			\
793 		    >= sizeof(struct xsave_header));			\
794 		KASSERT(x86_xsave_sizes[xsave_val]			\
795 		    >= sizeof(xstate->field));				\
796 		memcpy(&xstate->field,					\
797 		    (char*)fpu_save + x86_xsave_offsets[xsave_val],	\
798 		    sizeof(xstate->field));				\
799 	}
800 
801 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
802 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
803 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
804 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
805 
806 #undef COPY_COMPONENT
807 
808 	return 0;
809 }
810 
811 int
812 process_verify_xstate(const struct xstate *xstate)
813 {
814 	/* xstate_bv must be a subset of RFBM */
815 	if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
816 		return EINVAL;
817 
818 	switch (x86_fpu_save) {
819 	case FPU_SAVE_FSAVE:
820 		if ((xstate->xs_rfbm & ~XCR0_X87))
821 			return EINVAL;
822 		break;
823 	case FPU_SAVE_FXSAVE:
824 		if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
825 			return EINVAL;
826 		break;
827 	default:
828 		/* Verify whether no unsupported features are enabled */
829 		if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
830 			return EINVAL;
831 	}
832 
833 	return 0;
834 }
835 
836 int
837 process_write_xstate(struct lwp *l, const struct xstate *xstate)
838 {
839 	union savefpu *fpu_save = fpu_lwp_area(l);
840 
841 	/* Convert data into legacy FSAVE format. */
842 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
843 		if (xstate->xs_xstate_bv & XCR0_X87)
844 			process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
845 		return 0;
846 	}
847 
848 	/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
849 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
850 		/*
851 		 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
852 		 *           fpu_save->sv_xsave_hdr.xsh_xstate_bv"
853 		 */
854 		fpu_save->sv_xsave_hdr.xsh_xstate_bv =
855 		    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
856 		    xstate->xs_xstate_bv;
857 	}
858 
859 	if (xstate->xs_xstate_bv & XCR0_X87) {
860 		/*
861 		 * X87 state is split into two areas, interspersed with SSE
862 		 * data.
863 		 */
864 		memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
865 		memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
866 		    sizeof(xstate->xs_fxsave.fx_87_ac));
867 	}
868 
869 	/*
870 	 * Copy MXCSR if either SSE or AVX state is requested, to match the
871 	 * XSAVE behavior for those flags.
872 	 */
873 	if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
874 		/*
875 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
876 		 */
877 		fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
878 		    & x86_fpu_mxcsr_mask;
879 		fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
880 		    fpu_save->sv_xmm.fx_mxcsr_mask;
881 	}
882 
883 	if (xstate->xs_xstate_bv & XCR0_SSE) {
884 		memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
885 		    xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
886 	}
887 
888 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
889 	if (xstate->xs_xstate_bv & xcr0_val) {				\
890 		KASSERT(x86_xsave_offsets[xsave_val]			\
891 		    >= sizeof(struct xsave_header));			\
892 		KASSERT(x86_xsave_sizes[xsave_val]			\
893 		    >= sizeof(xstate->field));				\
894 		memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val],	\
895 		    &xstate->field, sizeof(xstate->field));		\
896 	}
897 
898 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
899 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
900 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
901 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
902 
903 #undef COPY_COMPONENT
904 
905 	return 0;
906 }
907