xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /*	$NetBSD: fpu.c,v 1.28 2018/02/09 08:58:01 maxv Exp $	*/
2 
3 /*
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.28 2018/02/09 08:58:01 maxv Exp $");
100 
101 #include "opt_multiprocessor.h"
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/conf.h>
106 #include <sys/cpu.h>
107 #include <sys/file.h>
108 #include <sys/proc.h>
109 #include <sys/kernel.h>
110 
111 #include <machine/cpu.h>
112 #include <machine/intr.h>
113 #include <machine/cpufunc.h>
114 #include <machine/pcb.h>
115 #include <machine/trap.h>
116 #include <machine/specialreg.h>
117 #include <x86/cpu.h>
118 #include <x86/fpu.h>
119 
120 /* Check some duplicate definitions match */
121 #include <machine/fenv.h>
122 
123 #ifdef XEN
124 #define clts() HYPERVISOR_fpu_taskswitch(0)
125 #define stts() HYPERVISOR_fpu_taskswitch(1)
126 #endif
127 
128 static uint32_t x86_fpu_mxcsr_mask __read_mostly = 0;
129 
130 static inline union savefpu *
131 process_fpframe(struct lwp *lwp)
132 {
133 	struct pcb *pcb = lwp_getpcb(lwp);
134 
135 	return &pcb->pcb_savefpu;
136 }
137 
138 /*
139  * The following table is used to ensure that the FPE_... value
140  * that is passed as a trapcode to the signal handler of the user
141  * process does not have more than one bit set.
142  *
143  * Multiple bits may be set if SSE simd instructions generate errors
144  * on more than one value or if the user process modifies the control
145  * word while a status word bit is already set (which this is a sign
146  * of bad coding).
147  * We have no choise than to narrow them down to one bit, since we must
148  * not send a trapcode that is not exactly one of the FPE_ macros.
149  *
150  * The mechanism has a static table with 127 entries.  Each combination
151  * of the 7 FPU status word exception bits directly translates to a
152  * position in this table, where a single FPE_... value is stored.
153  * This FPE_... value stored there is considered the "most important"
154  * of the exception bits and will be sent as the signal code.  The
155  * precedence of the bits is based upon Intel Document "Numerical
156  * Applications", Chapter "Special Computational Situations".
157  *
158  * The code to choose one of these values does these steps:
159  * 1) Throw away status word bits that cannot be masked.
160  * 2) Throw away the bits currently masked in the control word,
161  *    assuming the user isn't interested in them anymore.
162  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
163  *    cannot be masked but must be presered.
164  *    'Stack fault' is a sub-class of 'invalid operation'.
165  * 4) Use the remaining bits to point into the trapcode table.
166  *
167  * The 6 maskable bits in order of their preference, as stated in the
168  * above referenced Intel manual:
169  * 1  Invalid operation (FP_X_INV)
170  * 1a   Stack underflow
171  * 1b   Stack overflow
172  * 1c   Operand of unsupported format
173  * 1d   SNaN operand.
174  * 2  QNaN operand (not an exception, irrelavant here)
175  * 3  Any other invalid-operation not mentioned above or zero divide
176  *      (FP_X_INV, FP_X_DZ)
177  * 4  Denormal operand (FP_X_DNML)
178  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
179  * 6  Inexact result (FP_X_IMP)
180  *
181  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
182  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
183  * status.
184  *
185  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
186  * are swapped).
187  *
188  * This table assumes that any stack fault is cleared - so that an INVOP
189  * fault will only be reported as FLTSUB once.
190  * This might not happen if the mask is being changed.
191  */
192 #define FPE_xxx1(f) (f & EN_SW_INVOP \
193 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
194 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
195 	: f & EN_SW_DENORM ? FPE_FLTUND \
196 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
197 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
198 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
199 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
200 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
201 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
202 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
203 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
204 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
205 static const uint8_t fpetable[128] = {
206 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
207 };
208 #undef FPE_xxx1
209 #undef FPE_xxx2
210 #undef FPE_xxx4
211 #undef FPE_xxx8
212 #undef FPE_xxx16
213 #undef FPE_xxx32
214 
215 /*
216  * Init the FPU.
217  *
218  * This might not be strictly necessary since it will be initialised
219  * for each process.  However it does no harm.
220  */
221 void
222 fpuinit(struct cpu_info *ci)
223 {
224 
225 	clts();
226 	fninit();
227 	stts();
228 }
229 
230 /*
231  * Get the value of MXCSR_MASK supported by the CPU.
232  */
233 void
234 fpuinit_mxcsr_mask(void)
235 {
236 #ifndef XEN
237 	union savefpu fpusave __aligned(16);
238 	u_long psl;
239 
240 	memset(&fpusave, 0, sizeof(fpusave));
241 
242 	/* Disable interrupts, and enable FPU */
243 	psl = x86_read_psl();
244 	x86_disable_intr();
245 	clts();
246 
247 	/* Fill in the FPU area */
248 	fxsave(&fpusave);
249 
250 	/* Restore previous state */
251 	stts();
252 	x86_write_psl(psl);
253 
254 	if (fpusave.sv_xmm.fx_mxcsr_mask == 0) {
255 		x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
256 	} else {
257 		x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask;
258 	}
259 #else
260 	x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__;
261 #endif
262 }
263 
264 /*
265  * This is a synchronous trap on either an x87 instruction (due to an
266  * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
267  * instruction due to an error on the instruction itself.
268  *
269  * If trap actually generates a signal, then the fpu state is saved
270  * and then copied onto the process's user-stack, and then recovered
271  * from there when the signal returns (or from the jmp_buf if the
272  * signal handler exits with a longjmp()).
273  *
274  * All this code need to do is save the reason for the trap.
275  * For x87 interrupts the status word bits need clearing to stop the
276  * trap re-occurring.
277  *
278  * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
279  *
280  * Since this is a synchronous trap, the fpu registers must still belong
281  * to the correct process (we trap through an interrupt gate so that
282  * interrupts are disabled on entry).
283  * Interrupts (these better include IPIs) are left disabled until we've
284  * finished looking at fpu registers.
285  *
286  * For amd64 the calling code (in amd64_trap.S) has already checked
287  * that we trapped from usermode.
288  */
289 
290 void
291 fputrap(struct trapframe *frame)
292 {
293 	uint32_t statbits;
294 	ksiginfo_t ksi;
295 
296 	if (!USERMODE(frame->tf_cs))
297 		panic("fpu trap from kernel, trapframe %p\n", frame);
298 
299 	/*
300 	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
301 	 * should be set, and we should have gotten a DNA exception.
302 	 */
303 	KASSERT(curcpu()->ci_fpcurlwp == curlwp);
304 
305 	if (frame->tf_trapno == T_XMM) {
306 		uint32_t mxcsr;
307 		x86_stmxcsr(&mxcsr);
308 		statbits = mxcsr;
309 		/* Clear the sticky status bits */
310 		mxcsr &= ~0x3f;
311 		x86_ldmxcsr(&mxcsr);
312 
313 		/* Remove masked interrupts and non-status bits */
314 		statbits &= ~(statbits >> 7) & 0x3f;
315 		/* Mark this is an XMM status */
316 		statbits |= 0x10000;
317 	} else {
318 		uint16_t cw, sw;
319 		/* Get current control and status words */
320 		fnstcw(&cw);
321 		fnstsw(&sw);
322 		/* Clear any pending exceptions from status word */
323 		fnclex();
324 
325 		/* Removed masked interrupts */
326 		statbits = sw & ~(cw & 0x3f);
327 	}
328 
329 	/* Doesn't matter now if we get pre-empted */
330 	x86_enable_intr();
331 
332 	KSI_INIT_TRAP(&ksi);
333 	ksi.ksi_signo = SIGFPE;
334 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
335 	ksi.ksi_code = fpetable[statbits & 0x7f];
336 	ksi.ksi_trap = statbits;
337 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
338 }
339 
340 /*
341  * Implement device not available (DNA) exception
342  *
343  * If we were the last lwp to use the FPU, we can simply return.
344  * Otherwise, we save the previous state, if necessary, and restore
345  * our last saved state.
346  *
347  * Called directly from the trap 0x13 entry with interrupts still disabled.
348  */
349 void
350 fpudna(struct trapframe *frame)
351 {
352 	struct cpu_info *ci;
353 	struct lwp *l, *fl;
354 	struct pcb *pcb;
355 	int s;
356 
357 	if (!USERMODE(frame->tf_cs))
358 		panic("fpudna from kernel, ip %p, trapframe %p\n",
359 		    (void *)X86_TF_RIP(frame), frame);
360 
361 	ci = curcpu();
362 
363 	/* Save soft spl level - interrupts are hard disabled */
364 	s = splhigh();
365 
366 	/* Save state on current CPU. */
367 	l = ci->ci_curlwp;
368 	pcb = lwp_getpcb(l);
369 	fl = ci->ci_fpcurlwp;
370 	if (fl != NULL) {
371 		/*
372 		 * It seems we can get here on Xen even if we didn't
373 		 * switch lwp.  In this case do nothing
374 		 */
375 		if (fl == l) {
376 			KASSERT(pcb->pcb_fpcpu == ci);
377 			clts();
378 			splx(s);
379 			return;
380 		}
381 		fpusave_cpu(true);
382 	}
383 
384 	/* Save our state if on a remote CPU. */
385 	if (pcb->pcb_fpcpu != NULL) {
386 		/* Explicitly disable preemption before dropping spl. */
387 		kpreempt_disable();
388 		splx(s);
389 
390 		/* Actually enable interrupts */
391 		x86_enable_intr();
392 
393 		fpusave_lwp(l, true);
394 		KASSERT(pcb->pcb_fpcpu == NULL);
395 		s = splhigh();
396 		kpreempt_enable();
397 	}
398 
399 	/*
400 	 * Restore state on this CPU, or initialize.  Ensure that
401 	 * the entire update is atomic with respect to FPU-sync IPIs.
402 	 */
403 	clts();
404 	ci->ci_fpcurlwp = l;
405 	pcb->pcb_fpcpu = ci;
406 
407 	switch (x86_fpu_save) {
408 		case FPU_SAVE_FSAVE:
409 			frstor(&pcb->pcb_savefpu);
410 			break;
411 
412 		case FPU_SAVE_FXSAVE:
413 			/*
414 			 * AMD FPU's do not restore FIP, FDP, and FOP on
415 			 * fxrstor, leaking other process's execution history.
416 			 * Clear them manually by loading a zero.
417 			 *
418 			 * Clear the ES bit in the x87 status word if it is
419 			 * currently set, in order to avoid causing a fault
420 			 * in the upcoming load.
421 			 */
422 			if (fngetsw() & 0x80)
423 				fnclex();
424 			fldummy();
425 			fxrstor(&pcb->pcb_savefpu);
426 			break;
427 
428 		case FPU_SAVE_XSAVE:
429 		case FPU_SAVE_XSAVEOPT:
430 			xrstor(&pcb->pcb_savefpu, x86_xsave_features);
431 			break;
432 	}
433 
434 	KASSERT(ci == curcpu());
435 	splx(s);
436 }
437 
438 /*
439  * Save current CPU's FPU state.  Must be called at IPL_HIGH.
440  */
441 void
442 fpusave_cpu(bool save)
443 {
444 	struct cpu_info *ci;
445 	struct pcb *pcb;
446 	struct lwp *l;
447 
448 	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
449 
450 	ci = curcpu();
451 	l = ci->ci_fpcurlwp;
452 	if (l == NULL) {
453 		return;
454 	}
455 	pcb = lwp_getpcb(l);
456 
457 	if (save) {
458 		clts();
459 
460 		switch (x86_fpu_save) {
461 			case FPU_SAVE_FSAVE:
462 				fnsave(&pcb->pcb_savefpu);
463 				break;
464 
465 			case FPU_SAVE_FXSAVE:
466 				fxsave(&pcb->pcb_savefpu);
467 				break;
468 
469 			case FPU_SAVE_XSAVE:
470 				xsave(&pcb->pcb_savefpu, x86_xsave_features);
471 				break;
472 
473 			case FPU_SAVE_XSAVEOPT:
474 				xsaveopt(&pcb->pcb_savefpu, x86_xsave_features);
475 				break;
476 		}
477 	}
478 
479 	stts();
480 	pcb->pcb_fpcpu = NULL;
481 	ci->ci_fpcurlwp = NULL;
482 }
483 
484 /*
485  * Save l's FPU state, which may be on this processor or another processor.
486  * It may take some time, so we avoid disabling preemption where possible.
487  * Caller must know that the target LWP is stopped, otherwise this routine
488  * may race against it.
489  */
490 void
491 fpusave_lwp(struct lwp *l, bool save)
492 {
493 	struct pcb *pcb = lwp_getpcb(l);
494 	struct cpu_info *oci;
495 	int s, spins, ticks;
496 
497 	spins = 0;
498 	ticks = hardclock_ticks;
499 	for (;;) {
500 		s = splhigh();
501 		oci = pcb->pcb_fpcpu;
502 		if (oci == NULL) {
503 			splx(s);
504 			break;
505 		}
506 		if (oci == curcpu()) {
507 			KASSERT(oci->ci_fpcurlwp == l);
508 			fpusave_cpu(save);
509 			splx(s);
510 			break;
511 		}
512 		splx(s);
513 #ifdef XEN
514 		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
515 			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
516 			    cpu_name(oci));
517 		}
518 #else
519 		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
520 #endif
521 		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
522 			x86_pause();
523 			spins++;
524 		}
525 		if (spins > 100000000) {
526 			panic("fpusave_lwp: did not");
527 		}
528 	}
529 }
530 
531 void
532 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
533 {
534 	union savefpu *fpu_save = process_fpframe(l);
535 	struct pcb *pcb = lwp_getpcb(l);
536 
537 	if (i386_use_fxsave) {
538 		fpu_save->sv_xmm.fx_cw = x87_cw;
539 
540 		/* Force a reload of CW */
541 		if ((x87_cw != __INITIAL_NPXCW__) &&
542 		    (x86_fpu_save == FPU_SAVE_XSAVE ||
543 		    x86_fpu_save == FPU_SAVE_XSAVEOPT)) {
544 			fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
545 			    XCR0_X87;
546 		}
547 	} else {
548 		fpu_save->sv_87.s87_cw = x87_cw;
549 	}
550 	pcb->pcb_fpu_dflt_cw = x87_cw;
551 }
552 
553 void
554 fpu_save_area_clear(struct lwp *l, unsigned int x87_cw)
555 {
556 	union savefpu *fpu_save;
557 	struct pcb *pcb;
558 
559 	fpusave_lwp(l, false);
560 	fpu_save = process_fpframe(l);
561 	pcb = lwp_getpcb(l);
562 
563 	if (i386_use_fxsave) {
564 		memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size);
565 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
566 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
567 		fpu_save->sv_xmm.fx_cw = x87_cw;
568 
569 		/* Force a reload of CW */
570 		if ((x87_cw != __INITIAL_NPXCW__) &&
571 		    (x86_fpu_save == FPU_SAVE_XSAVE ||
572 		    x86_fpu_save == FPU_SAVE_XSAVEOPT)) {
573 			fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
574 			    XCR0_X87;
575 		}
576 	} else {
577 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
578 		fpu_save->sv_87.s87_tw = 0xffff;
579 		fpu_save->sv_87.s87_cw = x87_cw;
580 	}
581 	pcb->pcb_fpu_dflt_cw = x87_cw;
582 }
583 
584 void
585 fpu_save_area_reset(struct lwp *l)
586 {
587 	union savefpu *fpu_save = process_fpframe(l);
588 	struct pcb *pcb = lwp_getpcb(l);
589 
590 	/*
591 	 * For signal handlers the register values don't matter. Just reset
592 	 * a few fields.
593 	 */
594 	if (i386_use_fxsave) {
595 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
596 		fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask;
597 		fpu_save->sv_xmm.fx_tw = 0;
598 		fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw;
599 	} else {
600 		fpu_save->sv_87.s87_tw = 0xffff;
601 		fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw;
602 	}
603 }
604 
605 void
606 fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1)
607 {
608 	ssize_t extra;
609 
610 	/*
611 	 * The pcb itself has been copied, but the xsave area
612 	 * extends further.
613 	 */
614 	extra = offsetof(struct pcb, pcb_savefpu) + x86_fpu_save_size -
615 	    sizeof (struct pcb);
616 
617 	if (extra > 0)
618 		memcpy(pcb2 + 1, pcb1 + 1, extra);
619 }
620 
621 void
622 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
623 {
624 	union savefpu *fpu_save;
625 
626 	fpusave_lwp(l, false);
627 	fpu_save = process_fpframe(l);
628 
629 	if (i386_use_fxsave) {
630 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
631 
632 		/*
633 		 * Invalid bits in mxcsr or mxcsr_mask will cause faults.
634 		 */
635 		fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask;
636 		fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask;
637 
638 		/*
639 		 * Make sure the x87 and SSE bits are set in xstate_bv.
640 		 * Otherwise xrstor will not restore them.
641 		 */
642 		if (x86_fpu_save == FPU_SAVE_XSAVE ||
643 		    x86_fpu_save == FPU_SAVE_XSAVEOPT) {
644 			fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
645 			    (XCR0_X87 | XCR0_SSE);
646 		}
647 	} else {
648 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
649 	}
650 }
651 
652 void
653 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
654 {
655 	union savefpu *fpu_save;
656 
657 	if (i386_use_fxsave) {
658 		/* Save so we don't lose the xmm registers */
659 		fpusave_lwp(l, true);
660 		fpu_save = process_fpframe(l);
661 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
662 
663 		/*
664 		 * Make sure the x87 and SSE bits are set in xstate_bv.
665 		 * Otherwise xrstor will not restore them.
666 		 */
667 		if (x86_fpu_save == FPU_SAVE_XSAVE ||
668 		    x86_fpu_save == FPU_SAVE_XSAVEOPT) {
669 			fpu_save->sv_xsave_hdr.xsh_xstate_bv |=
670 			    (XCR0_X87 | XCR0_SSE);
671 		}
672 	} else {
673 		fpusave_lwp(l, false);
674 		fpu_save = process_fpframe(l);
675 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
676 	}
677 }
678 
679 void
680 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
681 {
682 	union savefpu *fpu_save;
683 
684 	fpusave_lwp(l, true);
685 	fpu_save = process_fpframe(l);
686 
687 	if (i386_use_fxsave) {
688 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
689 	} else {
690 		memset(fpregs, 0, sizeof(*fpregs));
691 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
692 	}
693 }
694 
695 void
696 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
697 {
698 	union savefpu *fpu_save;
699 
700 	fpusave_lwp(l, true);
701 	fpu_save = process_fpframe(l);
702 
703 	if (i386_use_fxsave) {
704 		memset(fpregs, 0, sizeof(*fpregs));
705 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
706 	} else {
707 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
708 	}
709 }
710