xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /*	$NetBSD: fpu.c,v 1.68 2020/07/13 16:51:51 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran and Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.68 2020/07/13 16:51:51 riastradh Exp $");
100 
101 #include "opt_multiprocessor.h"
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/conf.h>
106 #include <sys/cpu.h>
107 #include <sys/file.h>
108 #include <sys/proc.h>
109 #include <sys/kernel.h>
110 #include <sys/sysctl.h>
111 #include <sys/xcall.h>
112 
113 #include <machine/cpu.h>
114 #include <machine/cpuvar.h>
115 #include <machine/cputypes.h>
116 #include <machine/intr.h>
117 #include <machine/cpufunc.h>
118 #include <machine/pcb.h>
119 #include <machine/trap.h>
120 #include <machine/specialreg.h>
121 #include <x86/cpu.h>
122 #include <x86/fpu.h>
123 
124 #ifdef XENPV
125 #define clts() HYPERVISOR_fpu_taskswitch(0)
126 #define stts() HYPERVISOR_fpu_taskswitch(1)
127 #endif
128 
129 static void fpu_area_do_save(void *, uint64_t);
130 
131 void fpu_handle_deferred(void);
132 void fpu_switch(struct lwp *, struct lwp *);
133 
134 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
135 
136 static inline union savefpu *
137 fpu_lwp_area(struct lwp *l)
138 {
139 	struct pcb *pcb = lwp_getpcb(l);
140 	union savefpu *area = &pcb->pcb_savefpu;
141 
142 	KASSERT((l->l_flag & LW_SYSTEM) == 0);
143 	if (l == curlwp) {
144 		fpu_save();
145 	}
146 	KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
147 
148 	return area;
149 }
150 
151 static inline void
152 fpu_save_lwp(struct lwp *l)
153 {
154 	struct pcb *pcb = lwp_getpcb(l);
155 	union savefpu *area = &pcb->pcb_savefpu;
156 
157 	kpreempt_disable();
158 	if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
159 		KASSERT((l->l_flag & LW_SYSTEM) == 0);
160 
161 		/*
162 		 * Order is important, in case we are interrupted and
163 		 * the interrupt calls fpu_kern_enter, triggering
164 		 * reentry of fpu_save_lwp:
165 		 *
166 		 * 1. Save FPU state.
167 		 * 2. Note FPU state has been saved.
168 		 * 3. Disable FPU access so the kernel doesn't
169 		 *    accidentally use it.
170 		 *
171 		 * Steps (1) and (2) are both idempotent until step
172 		 * (3), after which point attempting to save the FPU
173 		 * state will trigger #NM/fpudna fault.
174 		 */
175 		fpu_area_do_save(area, x86_xsave_features);
176 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
177 		stts();
178 	}
179 	kpreempt_enable();
180 }
181 
182 /*
183  * Bring curlwp's FPU state in memory. It will get installed back in the CPU
184  * when returning to userland.
185  */
186 void
187 fpu_save(void)
188 {
189 	fpu_save_lwp(curlwp);
190 }
191 
192 void
193 fpuinit(struct cpu_info *ci)
194 {
195 	/*
196 	 * This might not be strictly necessary since it will be initialized
197 	 * for each process. However it does no harm.
198 	 */
199 	clts();
200 	fninit();
201 	stts();
202 }
203 
204 void
205 fpuinit_mxcsr_mask(void)
206 {
207 #ifndef XENPV
208 	union savefpu fpusave __aligned(16);
209 	u_long psl;
210 
211 	memset(&fpusave, 0, sizeof(fpusave));
212 
213 	/* Disable interrupts, and enable FPU */
214 	psl = x86_read_psl();
215 	x86_disable_intr();
216 	clts();
217 
218 	/* Fill in the FPU area */
219 	fxsave(&fpusave);
220 
221 	/* Restore previous state */
222 	stts();
223 	x86_write_psl(psl);
224 
225 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
226 		x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
227 	} else {
228 		x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
229 	}
230 #else
231 	/*
232 	 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
233 	 * &fpusave is not 16-byte aligned. Stack alignment problem
234 	 * somewhere, it seems.
235 	 */
236 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
237 #endif
238 }
239 
240 static inline void
241 fpu_errata_amd(void)
242 {
243 	uint16_t sw;
244 
245 	/*
246 	 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
247 	 * when FSW.ES=0, leaking other threads' execution history.
248 	 *
249 	 * Clear them manually by loading a zero (fldummy). We do this
250 	 * unconditionally, regardless of FSW.ES.
251 	 *
252 	 * Before that, clear the ES bit in the x87 status word if it is
253 	 * currently set, in order to avoid causing a fault in the
254 	 * upcoming load.
255 	 *
256 	 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
257 	 * which indicates that FIP/FDP/FOP are restored (same behavior
258 	 * as Intel). We're not using it though.
259 	 */
260 	fnstsw(&sw);
261 	if (sw & 0x80)
262 		fnclex();
263 	fldummy();
264 }
265 
266 static void
267 fpu_area_do_save(void *area, uint64_t xsave_features)
268 {
269 	switch (x86_fpu_save) {
270 	case FPU_SAVE_FSAVE:
271 		fnsave(area);
272 		break;
273 	case FPU_SAVE_FXSAVE:
274 		fxsave(area);
275 		break;
276 	case FPU_SAVE_XSAVE:
277 		xsave(area, xsave_features);
278 		break;
279 	case FPU_SAVE_XSAVEOPT:
280 		xsaveopt(area, xsave_features);
281 		break;
282 	}
283 }
284 
285 void
286 fpu_area_save(void *area, uint64_t xsave_features)
287 {
288 
289 	fpu_area_do_save(area, xsave_features);
290 	stts();
291 }
292 
293 void
294 fpu_area_restore(const void *area, uint64_t xsave_features)
295 {
296 	clts();
297 
298 	switch (x86_fpu_save) {
299 	case FPU_SAVE_FSAVE:
300 		frstor(area);
301 		break;
302 	case FPU_SAVE_FXSAVE:
303 		if (cpu_vendor == CPUVENDOR_AMD)
304 			fpu_errata_amd();
305 		fxrstor(area);
306 		break;
307 	case FPU_SAVE_XSAVE:
308 	case FPU_SAVE_XSAVEOPT:
309 		if (cpu_vendor == CPUVENDOR_AMD)
310 			fpu_errata_amd();
311 		xrstor(area, xsave_features);
312 		break;
313 	}
314 }
315 
316 void
317 fpu_handle_deferred(void)
318 {
319 	struct pcb *pcb = lwp_getpcb(curlwp);
320 	fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features);
321 }
322 
323 void
324 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
325 {
326 	struct pcb *pcb;
327 
328 	if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
329 		KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
330 		pcb = lwp_getpcb(oldlwp);
331 		fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features);
332 		oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
333 	}
334 	KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
335 }
336 
337 void
338 fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
339 {
340 	struct pcb *pcb2 = lwp_getpcb(l2);
341 	union savefpu *fpu_save;
342 
343 	/* Kernel threads have no FPU. */
344 	if (__predict_false(l2->l_flag & LW_SYSTEM)) {
345 		return;
346 	}
347 	/* For init(8). */
348 	if (__predict_false(l1->l_flag & LW_SYSTEM)) {
349 		memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
350 		return;
351 	}
352 
353 	fpu_save = fpu_lwp_area(l1);
354 	memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
355 	l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
356 }
357 
358 void
359 fpu_lwp_abandon(struct lwp *l)
360 {
361 	KASSERT(l == curlwp);
362 	kpreempt_disable();
363 	l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
364 	stts();
365 	kpreempt_enable();
366 }
367 
368 /* -------------------------------------------------------------------------- */
369 
370 /*
371  * fpu_kern_enter()
372  *
373  *	Begin using the FPU.  Raises to splvm, disabling most
374  *	interrupts and rendering the thread non-preemptible; caller
375  *	should not use this for long periods of time, and must call
376  *	fpu_kern_leave() afterward.  Non-recursive -- you cannot call
377  *	fpu_kern_enter() again without calling fpu_kern_leave() first.
378  *
379  *	Must be used only at IPL_VM or below -- never in IPL_SCHED or
380  *	IPL_HIGH interrupt handlers.
381  */
382 void
383 fpu_kern_enter(void)
384 {
385 	struct lwp *l = curlwp;
386 	struct cpu_info *ci;
387 	int s;
388 
389 	s = splvm();
390 
391 	ci = curcpu();
392 	KASSERTMSG(ci->ci_ilevel <= IPL_VM, "ilevel=%d", ci->ci_ilevel);
393 	KASSERT(ci->ci_kfpu_spl == -1);
394 	ci->ci_kfpu_spl = s;
395 
396 	/*
397 	 * If we are in a softint and have a pinned lwp, the fpu state is that
398 	 * of the pinned lwp, so save it there.
399 	 */
400 	if ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) {
401 		fpu_save_lwp(l->l_switchto);
402 	} else {
403 		fpu_save_lwp(l);
404 	}
405 
406 	/*
407 	 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
408 	 * otherwise the CPU will trap if we try to use the FPU under
409 	 * the false impression that there has been a task switch since
410 	 * the last FPU usage requiring that we save the FPU state.
411 	 */
412 	clts();
413 }
414 
415 /*
416  * fpu_kern_leave()
417  *
418  *	End using the FPU after fpu_kern_enter().
419  */
420 void
421 fpu_kern_leave(void)
422 {
423 	static const union savefpu zero_fpu __aligned(64);
424 	const union savefpu *savefpu;
425 	struct lwp *l = curlwp;
426 	struct pcb *pcb;
427 	struct cpu_info *ci = curcpu();
428 	int s;
429 
430 	KASSERT(ci->ci_ilevel == IPL_VM);
431 	KASSERT(ci->ci_kfpu_spl != -1);
432 
433 	/*
434 	 * Restore the FPU state immediately to avoid leaking any
435 	 * kernel secrets, or zero it if this is a kthread.
436 	 */
437 	if ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
438 		l = l->l_switchto;
439 	if (l->l_flag & LW_SYSTEM) {
440 		savefpu = &zero_fpu;
441 	} else {
442 		pcb = lwp_getpcb(l);
443 		savefpu = &pcb->pcb_savefpu;
444 	}
445 	fpu_area_restore(savefpu, x86_xsave_features);
446 
447 	s = ci->ci_kfpu_spl;
448 	ci->ci_kfpu_spl = -1;
449 	splx(s);
450 }
451 
452 /* -------------------------------------------------------------------------- */
453 
454 /*
455  * The following table is used to ensure that the FPE_... value
456  * that is passed as a trapcode to the signal handler of the user
457  * process does not have more than one bit set.
458  *
459  * Multiple bits may be set if SSE simd instructions generate errors
460  * on more than one value or if the user process modifies the control
461  * word while a status word bit is already set (which this is a sign
462  * of bad coding).
463  * We have no choise than to narrow them down to one bit, since we must
464  * not send a trapcode that is not exactly one of the FPE_ macros.
465  *
466  * The mechanism has a static table with 127 entries.  Each combination
467  * of the 7 FPU status word exception bits directly translates to a
468  * position in this table, where a single FPE_... value is stored.
469  * This FPE_... value stored there is considered the "most important"
470  * of the exception bits and will be sent as the signal code.  The
471  * precedence of the bits is based upon Intel Document "Numerical
472  * Applications", Chapter "Special Computational Situations".
473  *
474  * The code to choose one of these values does these steps:
475  * 1) Throw away status word bits that cannot be masked.
476  * 2) Throw away the bits currently masked in the control word,
477  *    assuming the user isn't interested in them anymore.
478  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
479  *    cannot be masked but must be preserved.
480  *    'Stack fault' is a sub-class of 'invalid operation'.
481  * 4) Use the remaining bits to point into the trapcode table.
482  *
483  * The 6 maskable bits in order of their preference, as stated in the
484  * above referenced Intel manual:
485  * 1  Invalid operation (FP_X_INV)
486  * 1a   Stack underflow
487  * 1b   Stack overflow
488  * 1c   Operand of unsupported format
489  * 1d   SNaN operand.
490  * 2  QNaN operand (not an exception, irrelevant here)
491  * 3  Any other invalid-operation not mentioned above or zero divide
492  *      (FP_X_INV, FP_X_DZ)
493  * 4  Denormal operand (FP_X_DNML)
494  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
495  * 6  Inexact result (FP_X_IMP)
496  *
497  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
498  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
499  * status.
500  *
501  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
502  * are swapped).
503  *
504  * This table assumes that any stack fault is cleared - so that an INVOP
505  * fault will only be reported as FLTSUB once.
506  * This might not happen if the mask is being changed.
507  */
508 #define FPE_xxx1(f) (f & EN_SW_INVOP \
509 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
510 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
511 	: f & EN_SW_DENORM ? FPE_FLTUND \
512 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
513 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
514 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
515 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
516 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
517 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
518 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
519 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
520 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
521 static const uint8_t fpetable[128] = {
522 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
523 };
524 #undef FPE_xxx1
525 #undef FPE_xxx2
526 #undef FPE_xxx4
527 #undef FPE_xxx8
528 #undef FPE_xxx16
529 #undef FPE_xxx32
530 
531 /*
532  * This is a synchronous trap on either an x87 instruction (due to an unmasked
533  * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
534  * to an error on the instruction itself.
535  *
536  * If trap actually generates a signal, then the fpu state is saved and then
537  * copied onto the lwp's user-stack, and then recovered from there when the
538  * signal returns.
539  *
540  * All this code needs to do is save the reason for the trap. For x87 traps the
541  * status word bits need clearing to stop the trap re-occurring. For SSE traps
542  * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
543  *
544  * We come here with interrupts disabled.
545  */
546 void
547 fputrap(struct trapframe *frame)
548 {
549 	uint32_t statbits;
550 	ksiginfo_t ksi;
551 
552 	if (__predict_false(!USERMODE(frame->tf_cs))) {
553 		panic("fpu trap from kernel, trapframe %p\n", frame);
554 	}
555 
556 	KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
557 
558 	if (frame->tf_trapno == T_XMM) {
559 		uint32_t mxcsr;
560 		x86_stmxcsr(&mxcsr);
561 		statbits = mxcsr;
562 		/* Clear the sticky status bits */
563 		mxcsr &= ~0x3f;
564 		x86_ldmxcsr(&mxcsr);
565 
566 		/* Remove masked interrupts and non-status bits */
567 		statbits &= ~(statbits >> 7) & 0x3f;
568 		/* Mark this is an XMM status */
569 		statbits |= 0x10000;
570 	} else {
571 		uint16_t cw, sw;
572 		/* Get current control and status words */
573 		fnstcw(&cw);
574 		fnstsw(&sw);
575 		/* Clear any pending exceptions from status word */
576 		fnclex();
577 
578 		/* Remove masked interrupts */
579 		statbits = sw & ~(cw & 0x3f);
580 	}
581 
582 	/* Doesn't matter now if we get pre-empted */
583 	x86_enable_intr();
584 
585 	KSI_INIT_TRAP(&ksi);
586 	ksi.ksi_signo = SIGFPE;
587 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
588 	ksi.ksi_code = fpetable[statbits & 0x7f];
589 	ksi.ksi_trap = statbits;
590 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
591 }
592 
593 void
594 fpudna(struct trapframe *frame)
595 {
596 	panic("fpudna from %s, ip %p, trapframe %p",
597 	    USERMODE(frame->tf_cs) ? "userland" : "kernel",
598 	    (void *)X86_TF_RIP(frame), frame);
599 }
600 
601 /* -------------------------------------------------------------------------- */
602 
603 static inline void
604 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
605 {
606 	/*
607 	 * Force a reload of the given xstate during the next XRSTOR.
608 	 */
609 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
610 		fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
611 	}
612 }
613 
614 void
615 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
616 {
617 	union savefpu *fpu_save = fpu_lwp_area(l);
618 	struct pcb *pcb = lwp_getpcb(l);
619 
620 	if (i386_use_fxsave) {
621 		fpu_save->sv_xmm.fx_cw = x87_cw;
622 		if (x87_cw != __INITIAL_NPXCW__) {
623 			fpu_xstate_reload(fpu_save, XCR0_X87);
624 		}
625 	} else {
626 		fpu_save->sv_87.s87_cw = x87_cw;
627 	}
628 	pcb->pcb_fpu_dflt_cw = x87_cw;
629 }
630 
631 void
632 fpu_clear(struct lwp *l, unsigned int x87_cw)
633 {
634 	union savefpu *fpu_save;
635 	struct pcb *pcb;
636 
637 	KASSERT(l == curlwp);
638 	fpu_save = fpu_lwp_area(l);
639 
640 	switch (x86_fpu_save) {
641 	case FPU_SAVE_FSAVE:
642 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
643 		fpu_save->sv_87.s87_tw = 0xffff;
644 		fpu_save->sv_87.s87_cw = x87_cw;
645 		break;
646 	case FPU_SAVE_FXSAVE:
647 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
648 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
649 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
650 		fpu_save->sv_xmm.fx_cw = x87_cw;
651 		break;
652 	case FPU_SAVE_XSAVE:
653 	case FPU_SAVE_XSAVEOPT:
654 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
655 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
656 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
657 		fpu_save->sv_xmm.fx_cw = x87_cw;
658 		if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
659 			fpu_xstate_reload(fpu_save, XCR0_X87);
660 		}
661 		break;
662 	}
663 
664 	pcb = lwp_getpcb(l);
665 	pcb->pcb_fpu_dflt_cw = x87_cw;
666 }
667 
668 void
669 fpu_sigreset(struct lwp *l)
670 {
671 	union savefpu *fpu_save = fpu_lwp_area(l);
672 	struct pcb *pcb = lwp_getpcb(l);
673 
674 	/*
675 	 * For signal handlers the register values don't matter. Just reset
676 	 * a few fields.
677 	 */
678 	if (i386_use_fxsave) {
679 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
680 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
681 		fpu_save->sv_xmm.fx_tw = 0;
682 		fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
683 	} else {
684 		fpu_save->sv_87.s87_tw = 0xffff;
685 		fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
686 	}
687 }
688 
689 /* -------------------------------------------------------------------------- */
690 
691 static void
692 process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
693 {
694 	unsigned int tag, ab_tag;
695 	const struct fpaccfx *fx_reg;
696 	struct fpacc87 *s87_reg;
697 	int i;
698 
699 	/*
700 	 * For historic reasons core dumps and ptrace all use the old save87
701 	 * layout.  Convert the important parts.
702 	 * getucontext gets what we give it.
703 	 * setucontext should return something given by getucontext, but
704 	 * we are (at the moment) willing to change it.
705 	 *
706 	 * It really isn't worth setting the 'tag' bits to 01 (zero) or
707 	 * 10 (NaN etc) since the processor will set any internal bits
708 	 * correctly when the value is loaded (the 287 believed them).
709 	 *
710 	 * Additionally the s87_tw and s87_tw are 'indexed' by the actual
711 	 * register numbers, whereas the registers themselves have ST(0)
712 	 * first. Pairing the values and tags can only be done with
713 	 * reference to the 'top of stack'.
714 	 *
715 	 * If any x87 registers are used, they will typically be from
716 	 * r7 downwards - so the high bits of the tag register indicate
717 	 * used registers. The conversions are not optimised for this.
718 	 *
719 	 * The ABI we use requires the FP stack to be empty on every
720 	 * function call. I think this means that the stack isn't expected
721 	 * to overflow - overflow doesn't drop a core in my testing.
722 	 *
723 	 * Note that this code writes to all of the 's87' structure that
724 	 * actually gets written to userspace.
725 	 */
726 
727 	/* FPU control/status */
728 	s87->s87_cw = sxmm->fx_cw;
729 	s87->s87_sw = sxmm->fx_sw;
730 	/* tag word handled below */
731 	s87->s87_ip = sxmm->fx_ip;
732 	s87->s87_opcode = sxmm->fx_opcode;
733 	s87->s87_dp = sxmm->fx_dp;
734 
735 	/* FP registers (in stack order) */
736 	fx_reg = sxmm->fx_87_ac;
737 	s87_reg = s87->s87_ac;
738 	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
739 		*s87_reg = fx_reg->r;
740 
741 	/* Tag word and registers. */
742 	ab_tag = sxmm->fx_tw & 0xff;	/* Bits set if valid */
743 	if (ab_tag == 0) {
744 		/* none used */
745 		s87->s87_tw = 0xffff;
746 		return;
747 	}
748 
749 	tag = 0;
750 	/* Separate bits of abridged tag word with zeros */
751 	for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
752 		tag |= ab_tag & i;
753 	/* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
754 	s87->s87_tw = (tag | tag >> 1) ^ 0xffff;
755 }
756 
757 static void
758 process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm)
759 {
760 	unsigned int tag, ab_tag;
761 	struct fpaccfx *fx_reg;
762 	const struct fpacc87 *s87_reg;
763 	int i;
764 
765 	/*
766 	 * ptrace gives us registers in the save87 format and
767 	 * we must convert them to the correct format.
768 	 *
769 	 * This code is normally used when overwriting the processes
770 	 * registers (in the pcb), so it musn't change any other fields.
771 	 *
772 	 * There is a lot of pad in 'struct fxsave', if the destination
773 	 * is written to userspace, it must be zeroed first.
774 	 */
775 
776 	/* FPU control/status */
777 	sxmm->fx_cw = s87->s87_cw;
778 	sxmm->fx_sw = s87->s87_sw;
779 	/* tag word handled below */
780 	sxmm->fx_ip = s87->s87_ip;
781 	sxmm->fx_opcode = s87->s87_opcode;
782 	sxmm->fx_dp = s87->s87_dp;
783 
784 	/* Tag word */
785 	tag = s87->s87_tw;	/* 0b11 => unused */
786 	if (tag == 0xffff) {
787 		/* All unused - values don't matter, zero for safety */
788 		sxmm->fx_tw = 0;
789 		memset(&sxmm->fx_87_ac, 0, sizeof sxmm->fx_87_ac);
790 		return;
791 	}
792 
793 	tag ^= 0xffff;		/* So 0b00 is unused */
794 	tag |= tag >> 1;	/* Look at even bits */
795 	ab_tag = 0;
796 	i = 1;
797 	do
798 		ab_tag |= tag & i;
799 	while ((tag >>= 1) >= (i <<= 1));
800 	sxmm->fx_tw = ab_tag;
801 
802 	/* FP registers (in stack order) */
803 	fx_reg = sxmm->fx_87_ac;
804 	s87_reg = s87->s87_ac;
805 	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
806 		fx_reg->r = *s87_reg;
807 }
808 
809 void
810 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
811 {
812 	union savefpu *fpu_save = fpu_lwp_area(l);
813 
814 	if (i386_use_fxsave) {
815 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
816 
817 		/*
818 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
819 		 */
820 		fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
821 		fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
822 
823 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
824 	} else {
825 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
826 	}
827 }
828 
829 void
830 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
831 {
832 	union savefpu *fpu_save = fpu_lwp_area(l);
833 
834 	if (i386_use_fxsave) {
835 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
836 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
837 	} else {
838 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
839 	}
840 }
841 
842 void
843 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
844 {
845 	union savefpu *fpu_save = fpu_lwp_area(l);
846 
847 	if (i386_use_fxsave) {
848 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
849 	} else {
850 		memset(fpregs, 0, sizeof(*fpregs));
851 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
852 	}
853 }
854 
855 void
856 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
857 {
858 	union savefpu *fpu_save = fpu_lwp_area(l);
859 
860 	if (i386_use_fxsave) {
861 		memset(fpregs, 0, sizeof(*fpregs));
862 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
863 	} else {
864 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
865 	}
866 }
867 
868 int
869 process_read_xstate(struct lwp *l, struct xstate *xstate)
870 {
871 	union savefpu *fpu_save = fpu_lwp_area(l);
872 
873 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
874 		/* Convert from legacy FSAVE format. */
875 		memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
876 		process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
877 
878 		/* We only got x87 data. */
879 		xstate->xs_rfbm = XCR0_X87;
880 		xstate->xs_xstate_bv = XCR0_X87;
881 		return 0;
882 	}
883 
884 	/* Copy the legacy area. */
885 	memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
886 	    sizeof(xstate->xs_fxsave));
887 
888 	if (x86_fpu_save == FPU_SAVE_FXSAVE) {
889 		/* FXSAVE means we've got x87 + SSE data. */
890 		xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
891 		xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
892 		return 0;
893 	}
894 
895 	/* Copy the bitmap indicating which states are available. */
896 	xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
897 	xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
898 	KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
899 
900 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
901 	if (xstate->xs_xstate_bv & xcr0_val) {				\
902 		KASSERT(x86_xsave_offsets[xsave_val]			\
903 		    >= sizeof(struct xsave_header));			\
904 		KASSERT(x86_xsave_sizes[xsave_val]			\
905 		    >= sizeof(xstate->field));				\
906 		memcpy(&xstate->field,					\
907 		    (char*)fpu_save + x86_xsave_offsets[xsave_val],	\
908 		    sizeof(xstate->field));				\
909 	}
910 
911 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
912 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
913 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
914 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
915 
916 #undef COPY_COMPONENT
917 
918 	return 0;
919 }
920 
921 int
922 process_verify_xstate(const struct xstate *xstate)
923 {
924 	/* xstate_bv must be a subset of RFBM */
925 	if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
926 		return EINVAL;
927 
928 	switch (x86_fpu_save) {
929 	case FPU_SAVE_FSAVE:
930 		if ((xstate->xs_rfbm & ~XCR0_X87))
931 			return EINVAL;
932 		break;
933 	case FPU_SAVE_FXSAVE:
934 		if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
935 			return EINVAL;
936 		break;
937 	default:
938 		/* Verify whether no unsupported features are enabled */
939 		if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
940 			return EINVAL;
941 	}
942 
943 	return 0;
944 }
945 
946 int
947 process_write_xstate(struct lwp *l, const struct xstate *xstate)
948 {
949 	union savefpu *fpu_save = fpu_lwp_area(l);
950 
951 	/* Convert data into legacy FSAVE format. */
952 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
953 		if (xstate->xs_xstate_bv & XCR0_X87)
954 			process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
955 		return 0;
956 	}
957 
958 	/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
959 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
960 		/*
961 		 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
962 		 *           fpu_save->sv_xsave_hdr.xsh_xstate_bv"
963 		 */
964 		fpu_save->sv_xsave_hdr.xsh_xstate_bv =
965 		    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
966 		    xstate->xs_xstate_bv;
967 	}
968 
969 	if (xstate->xs_xstate_bv & XCR0_X87) {
970 		/*
971 		 * X87 state is split into two areas, interspersed with SSE
972 		 * data.
973 		 */
974 		memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
975 		memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
976 		    sizeof(xstate->xs_fxsave.fx_87_ac));
977 	}
978 
979 	/*
980 	 * Copy MXCSR if either SSE or AVX state is requested, to match the
981 	 * XSAVE behavior for those flags.
982 	 */
983 	if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
984 		/*
985 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
986 		 */
987 		fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
988 		    & x86_fpu_mxcsr_mask;
989 		fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
990 		    fpu_save->sv_xmm.fx_mxcsr_mask;
991 	}
992 
993 	if (xstate->xs_xstate_bv & XCR0_SSE) {
994 		memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
995 		    xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
996 	}
997 
998 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
999 	if (xstate->xs_xstate_bv & xcr0_val) {				\
1000 		KASSERT(x86_xsave_offsets[xsave_val]			\
1001 		    >= sizeof(struct xsave_header));			\
1002 		KASSERT(x86_xsave_sizes[xsave_val]			\
1003 		    >= sizeof(xstate->field));				\
1004 		memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val],	\
1005 		    &xstate->field, sizeof(xstate->field));		\
1006 	}
1007 
1008 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
1009 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
1010 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
1011 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
1012 
1013 #undef COPY_COMPONENT
1014 
1015 	return 0;
1016 }
1017