xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /*	$NetBSD: fpu.c,v 1.74 2020/08/02 18:23:33 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran and Maxime Villard.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.74 2020/08/02 18:23:33 riastradh Exp $");
100 
101 #include "opt_multiprocessor.h"
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/conf.h>
106 #include <sys/cpu.h>
107 #include <sys/file.h>
108 #include <sys/proc.h>
109 #include <sys/kernel.h>
110 #include <sys/sysctl.h>
111 #include <sys/xcall.h>
112 
113 #include <machine/cpu.h>
114 #include <machine/cpuvar.h>
115 #include <machine/cputypes.h>
116 #include <machine/intr.h>
117 #include <machine/cpufunc.h>
118 #include <machine/pcb.h>
119 #include <machine/trap.h>
120 #include <machine/specialreg.h>
121 #include <x86/cpu.h>
122 #include <x86/fpu.h>
123 
124 #ifdef XENPV
125 #define clts() HYPERVISOR_fpu_taskswitch(0)
126 #define stts() HYPERVISOR_fpu_taskswitch(1)
127 #endif
128 
129 void fpu_handle_deferred(void);
130 void fpu_switch(struct lwp *, struct lwp *);
131 
132 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
133 
134 static inline union savefpu *
135 fpu_lwp_area(struct lwp *l)
136 {
137 	struct pcb *pcb = lwp_getpcb(l);
138 	union savefpu *area = &pcb->pcb_savefpu;
139 
140 	KASSERT((l->l_flag & LW_SYSTEM) == 0);
141 	if (l == curlwp) {
142 		fpu_save();
143 	}
144 	KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU));
145 
146 	return area;
147 }
148 
149 static inline void
150 fpu_save_lwp(struct lwp *l)
151 {
152 	struct pcb *pcb = lwp_getpcb(l);
153 	union savefpu *area = &pcb->pcb_savefpu;
154 	int s;
155 
156 	s = splvm();
157 	if (l->l_md.md_flags & MDL_FPU_IN_CPU) {
158 		KASSERT((l->l_flag & LW_SYSTEM) == 0);
159 		fpu_area_save(area, x86_xsave_features);
160 		l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
161 	}
162 	splx(s);
163 }
164 
165 /*
166  * Bring curlwp's FPU state in memory. It will get installed back in the CPU
167  * when returning to userland.
168  */
169 void
170 fpu_save(void)
171 {
172 	fpu_save_lwp(curlwp);
173 }
174 
175 void
176 fpuinit(struct cpu_info *ci)
177 {
178 	/*
179 	 * This might not be strictly necessary since it will be initialized
180 	 * for each process. However it does no harm.
181 	 */
182 	clts();
183 	fninit();
184 	stts();
185 }
186 
187 void
188 fpuinit_mxcsr_mask(void)
189 {
190 #ifndef XENPV
191 	union savefpu fpusave __aligned(16);
192 	u_long psl;
193 
194 	memset(&fpusave, 0, sizeof(fpusave));
195 
196 	/* Disable interrupts, and enable FPU */
197 	psl = x86_read_psl();
198 	x86_disable_intr();
199 	clts();
200 
201 	/* Fill in the FPU area */
202 	fxsave(&fpusave);
203 
204 	/* Restore previous state */
205 	stts();
206 	x86_write_psl(psl);
207 
208 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
209 		x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
210 	} else {
211 		x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
212 	}
213 #else
214 	/*
215 	 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because
216 	 * &fpusave is not 16-byte aligned. Stack alignment problem
217 	 * somewhere, it seems.
218 	 */
219 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
220 #endif
221 }
222 
223 static inline void
224 fpu_errata_amd(void)
225 {
226 	uint16_t sw;
227 
228 	/*
229 	 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor
230 	 * when FSW.ES=0, leaking other threads' execution history.
231 	 *
232 	 * Clear them manually by loading a zero (fldummy). We do this
233 	 * unconditionally, regardless of FSW.ES.
234 	 *
235 	 * Before that, clear the ES bit in the x87 status word if it is
236 	 * currently set, in order to avoid causing a fault in the
237 	 * upcoming load.
238 	 *
239 	 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2],
240 	 * which indicates that FIP/FDP/FOP are restored (same behavior
241 	 * as Intel). We're not using it though.
242 	 */
243 	fnstsw(&sw);
244 	if (sw & 0x80)
245 		fnclex();
246 	fldummy();
247 }
248 
249 void
250 fpu_area_save(void *area, uint64_t xsave_features)
251 {
252 	switch (x86_fpu_save) {
253 	case FPU_SAVE_FSAVE:
254 		fnsave(area);
255 		break;
256 	case FPU_SAVE_FXSAVE:
257 		fxsave(area);
258 		break;
259 	case FPU_SAVE_XSAVE:
260 		xsave(area, xsave_features);
261 		break;
262 	case FPU_SAVE_XSAVEOPT:
263 		xsaveopt(area, xsave_features);
264 		break;
265 	}
266 
267 	stts();
268 }
269 
270 void
271 fpu_area_restore(const void *area, uint64_t xsave_features)
272 {
273 	clts();
274 
275 	switch (x86_fpu_save) {
276 	case FPU_SAVE_FSAVE:
277 		frstor(area);
278 		break;
279 	case FPU_SAVE_FXSAVE:
280 		if (cpu_vendor == CPUVENDOR_AMD)
281 			fpu_errata_amd();
282 		fxrstor(area);
283 		break;
284 	case FPU_SAVE_XSAVE:
285 	case FPU_SAVE_XSAVEOPT:
286 		if (cpu_vendor == CPUVENDOR_AMD)
287 			fpu_errata_amd();
288 		xrstor(area, xsave_features);
289 		break;
290 	}
291 }
292 
293 void
294 fpu_handle_deferred(void)
295 {
296 	struct pcb *pcb = lwp_getpcb(curlwp);
297 	fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features);
298 }
299 
300 void
301 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp)
302 {
303 	struct cpu_info *ci __diagused = curcpu();
304 	struct pcb *pcb;
305 
306 	KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d",
307 	    cpu_index(ci), ci->ci_ilevel);
308 
309 	if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) {
310 		KASSERT(!(oldlwp->l_flag & LW_SYSTEM));
311 		pcb = lwp_getpcb(oldlwp);
312 		fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features);
313 		oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU;
314 	}
315 	KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU));
316 }
317 
318 void
319 fpu_lwp_fork(struct lwp *l1, struct lwp *l2)
320 {
321 	struct pcb *pcb2 = lwp_getpcb(l2);
322 	union savefpu *fpu_save;
323 
324 	/* Kernel threads have no FPU. */
325 	if (__predict_false(l2->l_flag & LW_SYSTEM)) {
326 		return;
327 	}
328 	/* For init(8). */
329 	if (__predict_false(l1->l_flag & LW_SYSTEM)) {
330 		memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size);
331 		return;
332 	}
333 
334 	fpu_save = fpu_lwp_area(l1);
335 	memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size);
336 	l2->l_md.md_flags &= ~MDL_FPU_IN_CPU;
337 }
338 
339 void
340 fpu_lwp_abandon(struct lwp *l)
341 {
342 	int s;
343 
344 	KASSERT(l == curlwp);
345 	s = splvm();
346 	l->l_md.md_flags &= ~MDL_FPU_IN_CPU;
347 	stts();
348 	splx(s);
349 }
350 
351 /* -------------------------------------------------------------------------- */
352 
353 /*
354  * fpu_kern_enter()
355  *
356  *	Begin using the FPU.  Raises to splvm, disabling most
357  *	interrupts and rendering the thread non-preemptible; caller
358  *	should not use this for long periods of time, and must call
359  *	fpu_kern_leave() afterward.  Non-recursive -- you cannot call
360  *	fpu_kern_enter() again without calling fpu_kern_leave() first.
361  *
362  *	Must be used only at IPL_VM or below -- never in IPL_SCHED or
363  *	IPL_HIGH interrupt handlers.
364  */
365 void
366 fpu_kern_enter(void)
367 {
368 	struct lwp *l = curlwp;
369 	struct cpu_info *ci;
370 	int s;
371 
372 	s = splvm();
373 
374 	ci = curcpu();
375 	KASSERTMSG(ci->ci_ilevel <= IPL_VM, "ilevel=%d", ci->ci_ilevel);
376 	KASSERT(ci->ci_kfpu_spl == -1);
377 	ci->ci_kfpu_spl = s;
378 
379 	/*
380 	 * If we are in a softint and have a pinned lwp, the fpu state is that
381 	 * of the pinned lwp, so save it there.
382 	 */
383 	while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL))
384 		l = l->l_switchto;
385 	fpu_save_lwp(l);
386 
387 	/*
388 	 * Clear CR0_TS, which fpu_save_lwp set if it saved anything --
389 	 * otherwise the CPU will trap if we try to use the FPU under
390 	 * the false impression that there has been a task switch since
391 	 * the last FPU usage requiring that we save the FPU state.
392 	 */
393 	clts();
394 }
395 
396 /*
397  * fpu_kern_leave()
398  *
399  *	End using the FPU after fpu_kern_enter().
400  */
401 void
402 fpu_kern_leave(void)
403 {
404 	static const union savefpu zero_fpu __aligned(64);
405 	struct cpu_info *ci = curcpu();
406 	int s;
407 
408 	KASSERT(ci->ci_ilevel == IPL_VM);
409 	KASSERT(ci->ci_kfpu_spl != -1);
410 
411 	/*
412 	 * Zero the fpu registers; otherwise we might leak secrets
413 	 * through Spectre-class attacks to userland, even if there are
414 	 * no bugs in fpu state management.
415 	 */
416 	fpu_area_restore(&zero_fpu, x86_xsave_features);
417 
418 	/*
419 	 * Set CR0_TS again so that the kernel can't accidentally use
420 	 * the FPU.
421 	 */
422 	stts();
423 
424 	s = ci->ci_kfpu_spl;
425 	ci->ci_kfpu_spl = -1;
426 	splx(s);
427 }
428 
429 /* -------------------------------------------------------------------------- */
430 
431 /*
432  * The following table is used to ensure that the FPE_... value
433  * that is passed as a trapcode to the signal handler of the user
434  * process does not have more than one bit set.
435  *
436  * Multiple bits may be set if SSE simd instructions generate errors
437  * on more than one value or if the user process modifies the control
438  * word while a status word bit is already set (which this is a sign
439  * of bad coding).
440  * We have no choise than to narrow them down to one bit, since we must
441  * not send a trapcode that is not exactly one of the FPE_ macros.
442  *
443  * The mechanism has a static table with 127 entries.  Each combination
444  * of the 7 FPU status word exception bits directly translates to a
445  * position in this table, where a single FPE_... value is stored.
446  * This FPE_... value stored there is considered the "most important"
447  * of the exception bits and will be sent as the signal code.  The
448  * precedence of the bits is based upon Intel Document "Numerical
449  * Applications", Chapter "Special Computational Situations".
450  *
451  * The code to choose one of these values does these steps:
452  * 1) Throw away status word bits that cannot be masked.
453  * 2) Throw away the bits currently masked in the control word,
454  *    assuming the user isn't interested in them anymore.
455  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
456  *    cannot be masked but must be preserved.
457  *    'Stack fault' is a sub-class of 'invalid operation'.
458  * 4) Use the remaining bits to point into the trapcode table.
459  *
460  * The 6 maskable bits in order of their preference, as stated in the
461  * above referenced Intel manual:
462  * 1  Invalid operation (FP_X_INV)
463  * 1a   Stack underflow
464  * 1b   Stack overflow
465  * 1c   Operand of unsupported format
466  * 1d   SNaN operand.
467  * 2  QNaN operand (not an exception, irrelevant here)
468  * 3  Any other invalid-operation not mentioned above or zero divide
469  *      (FP_X_INV, FP_X_DZ)
470  * 4  Denormal operand (FP_X_DNML)
471  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
472  * 6  Inexact result (FP_X_IMP)
473  *
474  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
475  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
476  * status.
477  *
478  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
479  * are swapped).
480  *
481  * This table assumes that any stack fault is cleared - so that an INVOP
482  * fault will only be reported as FLTSUB once.
483  * This might not happen if the mask is being changed.
484  */
485 #define FPE_xxx1(f) (f & EN_SW_INVOP \
486 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
487 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
488 	: f & EN_SW_DENORM ? FPE_FLTUND \
489 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
490 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
491 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
492 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
493 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
494 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
495 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
496 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
497 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
498 static const uint8_t fpetable[128] = {
499 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
500 };
501 #undef FPE_xxx1
502 #undef FPE_xxx2
503 #undef FPE_xxx4
504 #undef FPE_xxx8
505 #undef FPE_xxx16
506 #undef FPE_xxx32
507 
508 /*
509  * This is a synchronous trap on either an x87 instruction (due to an unmasked
510  * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due
511  * to an error on the instruction itself.
512  *
513  * If trap actually generates a signal, then the fpu state is saved and then
514  * copied onto the lwp's user-stack, and then recovered from there when the
515  * signal returns.
516  *
517  * All this code needs to do is save the reason for the trap. For x87 traps the
518  * status word bits need clearing to stop the trap re-occurring. For SSE traps
519  * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
520  *
521  * We come here with interrupts disabled.
522  */
523 void
524 fputrap(struct trapframe *frame)
525 {
526 	uint32_t statbits;
527 	ksiginfo_t ksi;
528 
529 	if (__predict_false(!USERMODE(frame->tf_cs))) {
530 		panic("fpu trap from kernel, trapframe %p\n", frame);
531 	}
532 
533 	KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU);
534 
535 	if (frame->tf_trapno == T_XMM) {
536 		uint32_t mxcsr;
537 		x86_stmxcsr(&mxcsr);
538 		statbits = mxcsr;
539 		/* Clear the sticky status bits */
540 		mxcsr &= ~0x3f;
541 		x86_ldmxcsr(&mxcsr);
542 
543 		/* Remove masked interrupts and non-status bits */
544 		statbits &= ~(statbits >> 7) & 0x3f;
545 		/* Mark this is an XMM status */
546 		statbits |= 0x10000;
547 	} else {
548 		uint16_t cw, sw;
549 		/* Get current control and status words */
550 		fnstcw(&cw);
551 		fnstsw(&sw);
552 		/* Clear any pending exceptions from status word */
553 		fnclex();
554 
555 		/* Remove masked interrupts */
556 		statbits = sw & ~(cw & 0x3f);
557 	}
558 
559 	/* Doesn't matter now if we get pre-empted */
560 	x86_enable_intr();
561 
562 	KSI_INIT_TRAP(&ksi);
563 	ksi.ksi_signo = SIGFPE;
564 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
565 	ksi.ksi_code = fpetable[statbits & 0x7f];
566 	ksi.ksi_trap = statbits;
567 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
568 }
569 
570 void
571 fpudna(struct trapframe *frame)
572 {
573 	panic("fpudna from %s, ip %p, trapframe %p",
574 	    USERMODE(frame->tf_cs) ? "userland" : "kernel",
575 	    (void *)X86_TF_RIP(frame), frame);
576 }
577 
578 /* -------------------------------------------------------------------------- */
579 
580 static inline void
581 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate)
582 {
583 	/*
584 	 * Force a reload of the given xstate during the next XRSTOR.
585 	 */
586 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
587 		fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate;
588 	}
589 }
590 
591 void
592 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
593 {
594 	union savefpu *fpu_save = fpu_lwp_area(l);
595 	struct pcb *pcb = lwp_getpcb(l);
596 
597 	if (i386_use_fxsave) {
598 		fpu_save->sv_xmm.fx_cw = x87_cw;
599 		if (x87_cw != __INITIAL_NPXCW__) {
600 			fpu_xstate_reload(fpu_save, XCR0_X87);
601 		}
602 	} else {
603 		fpu_save->sv_87.s87_cw = x87_cw;
604 	}
605 	pcb->pcb_fpu_dflt_cw = x87_cw;
606 }
607 
608 void
609 fpu_clear(struct lwp *l, unsigned int x87_cw)
610 {
611 	union savefpu *fpu_save;
612 	struct pcb *pcb;
613 
614 	KASSERT(l == curlwp);
615 	fpu_save = fpu_lwp_area(l);
616 
617 	switch (x86_fpu_save) {
618 	case FPU_SAVE_FSAVE:
619 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
620 		fpu_save->sv_87.s87_tw = 0xffff;
621 		fpu_save->sv_87.s87_cw = x87_cw;
622 		break;
623 	case FPU_SAVE_FXSAVE:
624 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
625 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
626 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
627 		fpu_save->sv_xmm.fx_cw = x87_cw;
628 		break;
629 	case FPU_SAVE_XSAVE:
630 	case FPU_SAVE_XSAVEOPT:
631 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
632 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
633 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
634 		fpu_save->sv_xmm.fx_cw = x87_cw;
635 		if (__predict_false(x87_cw != __INITIAL_NPXCW__)) {
636 			fpu_xstate_reload(fpu_save, XCR0_X87);
637 		}
638 		break;
639 	}
640 
641 	pcb = lwp_getpcb(l);
642 	pcb->pcb_fpu_dflt_cw = x87_cw;
643 }
644 
645 void
646 fpu_sigreset(struct lwp *l)
647 {
648 	union savefpu *fpu_save = fpu_lwp_area(l);
649 	struct pcb *pcb = lwp_getpcb(l);
650 
651 	/*
652 	 * For signal handlers the register values don't matter. Just reset
653 	 * a few fields.
654 	 */
655 	if (i386_use_fxsave) {
656 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
657 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
658 		fpu_save->sv_xmm.fx_tw = 0;
659 		fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
660 	} else {
661 		fpu_save->sv_87.s87_tw = 0xffff;
662 		fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
663 	}
664 }
665 
666 /* -------------------------------------------------------------------------- */
667 
668 static void
669 process_xmm_to_s87(const struct fxsave *sxmm, struct save87 *s87)
670 {
671 	unsigned int tag, ab_tag;
672 	const struct fpaccfx *fx_reg;
673 	struct fpacc87 *s87_reg;
674 	int i;
675 
676 	/*
677 	 * For historic reasons core dumps and ptrace all use the old save87
678 	 * layout.  Convert the important parts.
679 	 * getucontext gets what we give it.
680 	 * setucontext should return something given by getucontext, but
681 	 * we are (at the moment) willing to change it.
682 	 *
683 	 * It really isn't worth setting the 'tag' bits to 01 (zero) or
684 	 * 10 (NaN etc) since the processor will set any internal bits
685 	 * correctly when the value is loaded (the 287 believed them).
686 	 *
687 	 * Additionally the s87_tw and s87_tw are 'indexed' by the actual
688 	 * register numbers, whereas the registers themselves have ST(0)
689 	 * first. Pairing the values and tags can only be done with
690 	 * reference to the 'top of stack'.
691 	 *
692 	 * If any x87 registers are used, they will typically be from
693 	 * r7 downwards - so the high bits of the tag register indicate
694 	 * used registers. The conversions are not optimised for this.
695 	 *
696 	 * The ABI we use requires the FP stack to be empty on every
697 	 * function call. I think this means that the stack isn't expected
698 	 * to overflow - overflow doesn't drop a core in my testing.
699 	 *
700 	 * Note that this code writes to all of the 's87' structure that
701 	 * actually gets written to userspace.
702 	 */
703 
704 	/* FPU control/status */
705 	s87->s87_cw = sxmm->fx_cw;
706 	s87->s87_sw = sxmm->fx_sw;
707 	/* tag word handled below */
708 	s87->s87_ip = sxmm->fx_ip;
709 	s87->s87_opcode = sxmm->fx_opcode;
710 	s87->s87_dp = sxmm->fx_dp;
711 
712 	/* FP registers (in stack order) */
713 	fx_reg = sxmm->fx_87_ac;
714 	s87_reg = s87->s87_ac;
715 	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
716 		*s87_reg = fx_reg->r;
717 
718 	/* Tag word and registers. */
719 	ab_tag = sxmm->fx_tw & 0xff;	/* Bits set if valid */
720 	if (ab_tag == 0) {
721 		/* none used */
722 		s87->s87_tw = 0xffff;
723 		return;
724 	}
725 
726 	tag = 0;
727 	/* Separate bits of abridged tag word with zeros */
728 	for (i = 0x80; i != 0; tag <<= 1, i >>= 1)
729 		tag |= ab_tag & i;
730 	/* Replicate and invert so that 0 => 0b11 and 1 => 0b00 */
731 	s87->s87_tw = (tag | tag >> 1) ^ 0xffff;
732 }
733 
734 static void
735 process_s87_to_xmm(const struct save87 *s87, struct fxsave *sxmm)
736 {
737 	unsigned int tag, ab_tag;
738 	struct fpaccfx *fx_reg;
739 	const struct fpacc87 *s87_reg;
740 	int i;
741 
742 	/*
743 	 * ptrace gives us registers in the save87 format and
744 	 * we must convert them to the correct format.
745 	 *
746 	 * This code is normally used when overwriting the processes
747 	 * registers (in the pcb), so it musn't change any other fields.
748 	 *
749 	 * There is a lot of pad in 'struct fxsave', if the destination
750 	 * is written to userspace, it must be zeroed first.
751 	 */
752 
753 	/* FPU control/status */
754 	sxmm->fx_cw = s87->s87_cw;
755 	sxmm->fx_sw = s87->s87_sw;
756 	/* tag word handled below */
757 	sxmm->fx_ip = s87->s87_ip;
758 	sxmm->fx_opcode = s87->s87_opcode;
759 	sxmm->fx_dp = s87->s87_dp;
760 
761 	/* Tag word */
762 	tag = s87->s87_tw;	/* 0b11 => unused */
763 	if (tag == 0xffff) {
764 		/* All unused - values don't matter, zero for safety */
765 		sxmm->fx_tw = 0;
766 		memset(&sxmm->fx_87_ac, 0, sizeof sxmm->fx_87_ac);
767 		return;
768 	}
769 
770 	tag ^= 0xffff;		/* So 0b00 is unused */
771 	tag |= tag >> 1;	/* Look at even bits */
772 	ab_tag = 0;
773 	i = 1;
774 	do
775 		ab_tag |= tag & i;
776 	while ((tag >>= 1) >= (i <<= 1));
777 	sxmm->fx_tw = ab_tag;
778 
779 	/* FP registers (in stack order) */
780 	fx_reg = sxmm->fx_87_ac;
781 	s87_reg = s87->s87_ac;
782 	for (i = 0; i < 8; fx_reg++, s87_reg++, i++)
783 		fx_reg->r = *s87_reg;
784 }
785 
786 void
787 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
788 {
789 	union savefpu *fpu_save = fpu_lwp_area(l);
790 
791 	if (i386_use_fxsave) {
792 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
793 
794 		/*
795 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
796 		 */
797 		fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
798 		fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
799 
800 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
801 	} else {
802 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
803 	}
804 }
805 
806 void
807 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
808 {
809 	union savefpu *fpu_save = fpu_lwp_area(l);
810 
811 	if (i386_use_fxsave) {
812 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
813 		fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE);
814 	} else {
815 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
816 	}
817 }
818 
819 void
820 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
821 {
822 	union savefpu *fpu_save = fpu_lwp_area(l);
823 
824 	if (i386_use_fxsave) {
825 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
826 	} else {
827 		memset(fpregs, 0, sizeof(*fpregs));
828 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
829 	}
830 }
831 
832 void
833 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
834 {
835 	union savefpu *fpu_save = fpu_lwp_area(l);
836 
837 	if (i386_use_fxsave) {
838 		memset(fpregs, 0, sizeof(*fpregs));
839 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
840 	} else {
841 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
842 	}
843 }
844 
845 int
846 process_read_xstate(struct lwp *l, struct xstate *xstate)
847 {
848 	union savefpu *fpu_save = fpu_lwp_area(l);
849 
850 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
851 		/* Convert from legacy FSAVE format. */
852 		memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave));
853 		process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave);
854 
855 		/* We only got x87 data. */
856 		xstate->xs_rfbm = XCR0_X87;
857 		xstate->xs_xstate_bv = XCR0_X87;
858 		return 0;
859 	}
860 
861 	/* Copy the legacy area. */
862 	memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave,
863 	    sizeof(xstate->xs_fxsave));
864 
865 	if (x86_fpu_save == FPU_SAVE_FXSAVE) {
866 		/* FXSAVE means we've got x87 + SSE data. */
867 		xstate->xs_rfbm = XCR0_X87 | XCR0_SSE;
868 		xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE;
869 		return 0;
870 	}
871 
872 	/* Copy the bitmap indicating which states are available. */
873 	xstate->xs_rfbm = x86_xsave_features & XCR0_FPU;
874 	xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv;
875 	KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm));
876 
877 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
878 	if (xstate->xs_xstate_bv & xcr0_val) {				\
879 		KASSERT(x86_xsave_offsets[xsave_val]			\
880 		    >= sizeof(struct xsave_header));			\
881 		KASSERT(x86_xsave_sizes[xsave_val]			\
882 		    >= sizeof(xstate->field));				\
883 		memcpy(&xstate->field,					\
884 		    (char*)fpu_save + x86_xsave_offsets[xsave_val],	\
885 		    sizeof(xstate->field));				\
886 	}
887 
888 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
889 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
890 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
891 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
892 
893 #undef COPY_COMPONENT
894 
895 	return 0;
896 }
897 
898 int
899 process_verify_xstate(const struct xstate *xstate)
900 {
901 	/* xstate_bv must be a subset of RFBM */
902 	if (xstate->xs_xstate_bv & ~xstate->xs_rfbm)
903 		return EINVAL;
904 
905 	switch (x86_fpu_save) {
906 	case FPU_SAVE_FSAVE:
907 		if ((xstate->xs_rfbm & ~XCR0_X87))
908 			return EINVAL;
909 		break;
910 	case FPU_SAVE_FXSAVE:
911 		if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE)))
912 			return EINVAL;
913 		break;
914 	default:
915 		/* Verify whether no unsupported features are enabled */
916 		if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0)
917 			return EINVAL;
918 	}
919 
920 	return 0;
921 }
922 
923 int
924 process_write_xstate(struct lwp *l, const struct xstate *xstate)
925 {
926 	union savefpu *fpu_save = fpu_lwp_area(l);
927 
928 	/* Convert data into legacy FSAVE format. */
929 	if (x86_fpu_save == FPU_SAVE_FSAVE) {
930 		if (xstate->xs_xstate_bv & XCR0_X87)
931 			process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87);
932 		return 0;
933 	}
934 
935 	/* If XSAVE is supported, make sure that xstate_bv is set correctly. */
936 	if (x86_fpu_save >= FPU_SAVE_XSAVE) {
937 		/*
938 		 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv :
939 		 *           fpu_save->sv_xsave_hdr.xsh_xstate_bv"
940 		 */
941 		fpu_save->sv_xsave_hdr.xsh_xstate_bv =
942 		    (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) |
943 		    xstate->xs_xstate_bv;
944 	}
945 
946 	if (xstate->xs_xstate_bv & XCR0_X87) {
947 		/*
948 		 * X87 state is split into two areas, interspersed with SSE
949 		 * data.
950 		 */
951 		memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24);
952 		memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac,
953 		    sizeof(xstate->xs_fxsave.fx_87_ac));
954 	}
955 
956 	/*
957 	 * Copy MXCSR if either SSE or AVX state is requested, to match the
958 	 * XSAVE behavior for those flags.
959 	 */
960 	if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) {
961 		/*
962 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
963 		 */
964 		fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask
965 		    & x86_fpu_mxcsr_mask;
966 		fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr &
967 		    fpu_save->sv_xmm.fx_mxcsr_mask;
968 	}
969 
970 	if (xstate->xs_xstate_bv & XCR0_SSE) {
971 		memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160],
972 		    xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm));
973 	}
974 
975 #define COPY_COMPONENT(xcr0_val, xsave_val, field)			\
976 	if (xstate->xs_xstate_bv & xcr0_val) {				\
977 		KASSERT(x86_xsave_offsets[xsave_val]			\
978 		    >= sizeof(struct xsave_header));			\
979 		KASSERT(x86_xsave_sizes[xsave_val]			\
980 		    >= sizeof(xstate->field));				\
981 		memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val],	\
982 		    &xstate->field, sizeof(xstate->field));		\
983 	}
984 
985 	COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128);
986 	COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask);
987 	COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256);
988 	COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm);
989 
990 #undef COPY_COMPONENT
991 
992 	return 0;
993 }
994