xref: /netbsd-src/sys/arch/x86/x86/fpu.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*	$NetBSD: fpu.c,v 1.12 2016/09/29 17:01:43 maxv Exp $	*/
2 
3 /*-
4  * Copyright (c) 2008 The NetBSD Foundation, Inc.  All
5  * rights reserved.
6  *
7  * This code is derived from software developed for The NetBSD Foundation
8  * by Andrew Doran.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1991 The Regents of the University of California.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  * 3. Neither the name of the University nor the names of its contributors
45  *    may be used to endorse or promote products derived from this software
46  *    without specific prior written permission.
47  *
48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58  * SUCH DAMAGE.
59  *
60  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
61  */
62 
63 /*-
64  * Copyright (c) 1994, 1995, 1998 Charles M. Hannum.  All rights reserved.
65  * Copyright (c) 1990 William Jolitz.
66  *
67  * Redistribution and use in source and binary forms, with or without
68  * modification, are permitted provided that the following conditions
69  * are met:
70  * 1. Redistributions of source code must retain the above copyright
71  *    notice, this list of conditions and the following disclaimer.
72  * 2. Redistributions in binary form must reproduce the above copyright
73  *    notice, this list of conditions and the following disclaimer in the
74  *    documentation and/or other materials provided with the distribution.
75  * 3. All advertising materials mentioning features or use of this software
76  *    must display the following acknowledgement:
77  *	This product includes software developed by the University of
78  *	California, Berkeley and its contributors.
79  * 4. Neither the name of the University nor the names of its contributors
80  *    may be used to endorse or promote products derived from this software
81  *    without specific prior written permission.
82  *
83  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
85  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
86  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
87  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
88  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
89  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
90  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
91  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
92  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
93  * SUCH DAMAGE.
94  *
95  *	@(#)npx.c	7.2 (Berkeley) 5/12/91
96  */
97 
98 #include <sys/cdefs.h>
99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.12 2016/09/29 17:01:43 maxv Exp $");
100 
101 #include "opt_multiprocessor.h"
102 
103 #include <sys/param.h>
104 #include <sys/systm.h>
105 #include <sys/conf.h>
106 #include <sys/cpu.h>
107 #include <sys/file.h>
108 #include <sys/proc.h>
109 #include <sys/kernel.h>
110 
111 #include <machine/cpu.h>
112 #include <machine/intr.h>
113 #include <machine/cpufunc.h>
114 #include <machine/pcb.h>
115 #include <machine/trap.h>
116 #include <machine/specialreg.h>
117 #include <x86/cpu.h>
118 #include <x86/fpu.h>
119 
120 /* Check some duplicate definitions match */
121 #include <machine/fenv.h>
122 
123 #ifdef XEN
124 #define clts() HYPERVISOR_fpu_taskswitch(0)
125 #define stts() HYPERVISOR_fpu_taskswitch(1)
126 #endif
127 
128 static inline union savefpu *
129 process_fpframe(struct lwp *lwp)
130 {
131 	struct pcb *pcb = lwp_getpcb(lwp);
132 
133 	return &pcb->pcb_savefpu;
134 }
135 
136 /*
137  * The following table is used to ensure that the FPE_... value
138  * that is passed as a trapcode to the signal handler of the user
139  * process does not have more than one bit set.
140  *
141  * Multiple bits may be set if SSE simd instructions generate errors
142  * on more than one value or if the user process modifies the control
143  * word while a status word bit is already set (which this is a sign
144  * of bad coding).
145  * We have no choise than to narrow them down to one bit, since we must
146  * not send a trapcode that is not exactly one of the FPE_ macros.
147  *
148  * The mechanism has a static table with 127 entries.  Each combination
149  * of the 7 FPU status word exception bits directly translates to a
150  * position in this table, where a single FPE_... value is stored.
151  * This FPE_... value stored there is considered the "most important"
152  * of the exception bits and will be sent as the signal code.  The
153  * precedence of the bits is based upon Intel Document "Numerical
154  * Applications", Chapter "Special Computational Situations".
155  *
156  * The code to choose one of these values does these steps:
157  * 1) Throw away status word bits that cannot be masked.
158  * 2) Throw away the bits currently masked in the control word,
159  *    assuming the user isn't interested in them anymore.
160  * 3) Reinsert status word bit 7 (stack fault) if it is set, which
161  *    cannot be masked but must be presered.
162  *    'Stack fault' is a sub-class of 'invalid operation'.
163  * 4) Use the remaining bits to point into the trapcode table.
164  *
165  * The 6 maskable bits in order of their preference, as stated in the
166  * above referenced Intel manual:
167  * 1  Invalid operation (FP_X_INV)
168  * 1a   Stack underflow
169  * 1b   Stack overflow
170  * 1c   Operand of unsupported format
171  * 1d   SNaN operand.
172  * 2  QNaN operand (not an exception, irrelavant here)
173  * 3  Any other invalid-operation not mentioned above or zero divide
174  *      (FP_X_INV, FP_X_DZ)
175  * 4  Denormal operand (FP_X_DNML)
176  * 5  Numeric over/underflow (FP_X_OFL, FP_X_UFL)
177  * 6  Inexact result (FP_X_IMP)
178  *
179  * NB: the above seems to mix up the mxscr error bits and the x87 ones.
180  * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx
181  * status.
182  *
183  * The table is nearly, but not quite, in bit order (ZERODIV and DENORM
184  * are swapped).
185  *
186  * This table assumes that any stack fault is cleared - so that an INVOP
187  * fault will only be reported as FLTSUB once.
188  * This might not happen if the mask is being changed.
189  */
190 #define FPE_xxx1(f) (f & EN_SW_INVOP \
191 		? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \
192 	: f & EN_SW_ZERODIV ? FPE_FLTDIV \
193 	: f & EN_SW_DENORM ? FPE_FLTUND \
194 	: f & EN_SW_OVERFLOW ? FPE_FLTOVF \
195 	: f & EN_SW_UNDERFLOW ? FPE_FLTUND \
196 	: f & EN_SW_PRECLOSS ? FPE_FLTRES \
197 	: f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0)
198 #define	FPE_xxx2(f)	FPE_xxx1(f),	FPE_xxx1((f + 1))
199 #define	FPE_xxx4(f)	FPE_xxx2(f),	FPE_xxx2((f + 2))
200 #define	FPE_xxx8(f)	FPE_xxx4(f),	FPE_xxx4((f + 4))
201 #define	FPE_xxx16(f)	FPE_xxx8(f),	FPE_xxx8((f + 8))
202 #define	FPE_xxx32(f)	FPE_xxx16(f),	FPE_xxx16((f + 16))
203 static const uint8_t fpetable[128] = {
204 	FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96)
205 };
206 #undef FPE_xxx1
207 #undef FPE_xxx2
208 #undef FPE_xxx4
209 #undef FPE_xxx8
210 #undef FPE_xxx16
211 #undef FPE_xxx32
212 
213 /*
214  * Init the FPU.
215  *
216  * This might not be strictly necessary since it will be initialised
217  * for each process.  However it does no harm.
218  */
219 void
220 fpuinit(struct cpu_info *ci)
221 {
222 	if (!i386_fpu_present)
223 		return;
224 
225 	clts();
226 	fninit();
227 	stts();
228 }
229 
230 static void
231 send_sigill(void *rip)
232 {
233 	/* No fpu (486SX) - send SIGILL */
234 	ksiginfo_t ksi;
235 
236 	x86_enable_intr();
237 	KSI_INIT_TRAP(&ksi);
238 	ksi.ksi_signo = SIGILL;
239 	ksi.ksi_addr = rip;
240 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
241 	return;
242 }
243 
244 /*
245  * This is a synchronous trap on either an x87 instruction (due to an
246  * unmasked error on the previous x87 instruction) or on an SSE/SSE2 etc
247  * instruction due to an error on the instruction itself.
248  *
249  * If trap actually generates a signal, then the fpu state is saved
250  * and then copied onto the process's user-stack, and then recovered
251  * from there when the signal returns (or from the jmp_buf if the
252  * signal handler exits with a longjmp()).
253  *
254  * All this code need to do is save the reason for the trap.
255  * For x87 interrupts the status word bits need clearing to stop the
256  * trap re-occurring.
257  *
258  * The mxcsr bits are 'sticky' and need clearing to not confuse a later trap.
259  *
260  * Since this is a synchronous trap, the fpu registers must still belong
261  * to the correct process (we trap through an interrupt gate so that
262  * interrupts are disabled on entry).
263  * Interrupts (these better include IPIs) are left disabled until we've
264  * finished looking at fpu registers.
265  *
266  * For amd64 the calling code (in amd64_trap.S) has already checked
267  * that we trapped from usermode.
268  */
269 
270 void
271 fputrap(struct trapframe *frame)
272 {
273 	uint32_t statbits;
274 	ksiginfo_t ksi;
275 
276 	if (!USERMODE(frame->tf_cs, frame->tf_eflags))
277 		panic("fpu trap from kernel, trapframe %p\n", frame);
278 
279 	if (i386_fpu_present == 0) {
280 		send_sigill((void *)X86_TF_RIP(frame));
281 		return;
282 	}
283 
284 	/*
285 	 * At this point, fpcurlwp should be curlwp.  If it wasn't, the TS bit
286 	 * should be set, and we should have gotten a DNA exception.
287 	 */
288 	KASSERT(curcpu()->ci_fpcurlwp == curlwp);
289 
290 	if (frame->tf_trapno == T_XMM) {
291 		uint32_t mxcsr;
292 		x86_stmxcsr(&mxcsr);
293 		statbits = mxcsr;
294 		/* Clear the sticky status bits */
295 		mxcsr &= ~0x3f;
296 		x86_ldmxcsr(&mxcsr);
297 
298 		/* Remove masked interrupts and non-status bits */
299 		statbits &= ~(statbits >> 7) & 0x3f;
300 		/* Mark this is an XMM status */
301 		statbits |= 0x10000;
302 	} else {
303 		uint16_t cw, sw;
304 		/* Get current control and status words */
305 		fnstcw(&cw);
306 		fnstsw(&sw);
307 		/* Clear any pending exceptions from status word */
308 		fnclex();
309 
310 		/* Removed masked interrupts */
311 		statbits = sw & ~(cw & 0x3f);
312 	}
313 
314 	/* Doesn't matter now if we get pre-empted */
315 	x86_enable_intr();
316 
317 	KSI_INIT_TRAP(&ksi);
318 	ksi.ksi_signo = SIGFPE;
319 	ksi.ksi_addr = (void *)X86_TF_RIP(frame);
320 	ksi.ksi_code = fpetable[statbits & 0x7f];
321 	ksi.ksi_trap = statbits;
322 	(*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi);
323 }
324 
325 /*
326  * Implement device not available (DNA) exception
327  *
328  * If we were the last lwp to use the FPU, we can simply return.
329  * Otherwise, we save the previous state, if necessary, and restore
330  * our last saved state.
331  *
332  * Called directly from the trap 0x13 entry with interrupts still disabled.
333  */
334 void
335 fpudna(struct trapframe *frame)
336 {
337 	struct cpu_info *ci;
338 	struct lwp *l, *fl;
339 	struct pcb *pcb;
340 	int s;
341 
342 	if (!USERMODE(frame->tf_cs, frame->tf_eflags))
343 		panic("fpudna from kernel, ip %p, trapframe %p\n",
344 		    (void *)X86_TF_RIP(frame), frame);
345 
346 	if (i386_fpu_present == 0) {
347 		send_sigill((void *)X86_TF_RIP(frame));
348 		return;
349 	}
350 
351 	ci = curcpu();
352 
353 	/* Save soft spl level - interrupts are hard disabled */
354 	s = splhigh();
355 
356 	/* Save state on current CPU. */
357 	l = ci->ci_curlwp;
358 	pcb = lwp_getpcb(l);
359 	fl = ci->ci_fpcurlwp;
360 	if (fl != NULL) {
361 		/*
362 		 * It seems we can get here on Xen even if we didn't
363 		 * switch lwp.  In this case do nothing
364 		 */
365 		if (fl == l) {
366 			KASSERT(pcb->pcb_fpcpu == ci);
367 			clts();
368 			splx(s);
369 			return;
370 		}
371 		fpusave_cpu(true);
372 	}
373 
374 	/* Save our state if on a remote CPU. */
375 	if (pcb->pcb_fpcpu != NULL) {
376 		/* Explicitly disable preemption before dropping spl. */
377 		kpreempt_disable();
378 		splx(s);
379 
380 		/* Actually enable interrupts */
381 		x86_enable_intr();
382 
383 		fpusave_lwp(l, true);
384 		KASSERT(pcb->pcb_fpcpu == NULL);
385 		s = splhigh();
386 		kpreempt_enable();
387 	}
388 
389 	/*
390 	 * Restore state on this CPU, or initialize.  Ensure that
391 	 * the entire update is atomic with respect to FPU-sync IPIs.
392 	 */
393 	clts();
394 	ci->ci_fpcurlwp = l;
395 	pcb->pcb_fpcpu = ci;
396 
397 	if (i386_use_fxsave) {
398 		if (x86_xsave_features != 0) {
399 			xrstor(&pcb->pcb_savefpu, x86_xsave_features);
400 		} else {
401 			/*
402 			 * AMD FPU's do not restore FIP, FDP, and FOP on
403 			 * fxrstor, leaking other process's execution history.
404 			 * Clear them manually by loading a zero.
405 			 *
406 			 * Clear the ES bit in the x87 status word if it is
407 			 * currently set, in order to avoid causing a fault
408 			 * in the upcoming load.
409 			 */
410 			if (fngetsw() & 0x80)
411 				fnclex();
412 			fldummy();
413 
414 			fxrstor(&pcb->pcb_savefpu);
415 		}
416 	} else {
417 		frstor(&pcb->pcb_savefpu);
418 	}
419 
420 	KASSERT(ci == curcpu());
421 	splx(s);
422 }
423 
424 /*
425  * Save current CPU's FPU state.  Must be called at IPL_HIGH.
426  */
427 void
428 fpusave_cpu(bool save)
429 {
430 	struct cpu_info *ci;
431 	struct pcb *pcb;
432 	struct lwp *l;
433 
434 	KASSERT(curcpu()->ci_ilevel == IPL_HIGH);
435 
436 	ci = curcpu();
437 	l = ci->ci_fpcurlwp;
438 	if (l == NULL) {
439 		return;
440 	}
441 	pcb = lwp_getpcb(l);
442 
443 	if (save) {
444 		clts();
445 		if (i386_use_fxsave) {
446 			if (x86_xsave_features != 0)
447 				xsave(&pcb->pcb_savefpu, x86_xsave_features);
448 			else
449 				fxsave(&pcb->pcb_savefpu);
450 		} else {
451 			fnsave(&pcb->pcb_savefpu);
452 		}
453 	}
454 
455 	stts();
456 	pcb->pcb_fpcpu = NULL;
457 	ci->ci_fpcurlwp = NULL;
458 }
459 
460 /*
461  * Save l's FPU state, which may be on this processor or another processor.
462  * It may take some time, so we avoid disabling preemption where possible.
463  * Caller must know that the target LWP is stopped, otherwise this routine
464  * may race against it.
465  */
466 void
467 fpusave_lwp(struct lwp *l, bool save)
468 {
469 	struct pcb *pcb = lwp_getpcb(l);
470 	struct cpu_info *oci;
471 	int s, spins, ticks;
472 
473 	spins = 0;
474 	ticks = hardclock_ticks;
475 	for (;;) {
476 		s = splhigh();
477 		oci = pcb->pcb_fpcpu;
478 		if (oci == NULL) {
479 			splx(s);
480 			break;
481 		}
482 		if (oci == curcpu()) {
483 			KASSERT(oci->ci_fpcurlwp == l);
484 			fpusave_cpu(save);
485 			splx(s);
486 			break;
487 		}
488 		splx(s);
489 #ifdef XEN
490 		if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) {
491 			panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.",
492 			    cpu_name(oci));
493 		}
494 #else /* XEN */
495 		x86_send_ipi(oci, X86_IPI_SYNCH_FPU);
496 #endif
497 		while (pcb->pcb_fpcpu == oci && ticks == hardclock_ticks) {
498 			x86_pause();
499 			spins++;
500 		}
501 		if (spins > 100000000) {
502 			panic("fpusave_lwp: did not");
503 		}
504 	}
505 }
506 
507 void
508 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw)
509 {
510 	union savefpu *fpu_save = process_fpframe(l);
511 
512 	if (i386_use_fxsave)
513 		fpu_save->sv_xmm.fx_cw = x87_cw;
514 	else
515 		fpu_save->sv_87.s87_cw = x87_cw;
516 	fpu_save->sv_os.fxo_dflt_cw = x87_cw;
517 }
518 
519 /*
520  * Exec needs to clear the fpu save area to avoid leaking info from the
521  * old process to userspace.
522  */
523 void
524 fpu_save_area_clear(struct lwp *l, unsigned int x87_cw)
525 {
526 	union savefpu *fpu_save;
527 
528 	fpusave_lwp(l, false);
529 	fpu_save = process_fpframe(l);
530 
531 	if (i386_use_fxsave) {
532 		memset(&fpu_save->sv_xmm, 0, sizeof(fpu_save->sv_xmm));
533 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
534 		fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
535 		fpu_save->sv_xmm.fx_cw = x87_cw;
536 	} else {
537 		memset(&fpu_save->sv_87, 0, x86_fpu_save_size);
538 		fpu_save->sv_87.s87_tw = 0xffff;
539 		fpu_save->sv_87.s87_cw = x87_cw;
540 	}
541 	fpu_save->sv_os.fxo_dflt_cw = x87_cw;
542 }
543 
544 /* For signal handlers the register values don't matter */
545 void
546 fpu_save_area_reset(struct lwp *l)
547 {
548 	union savefpu *fpu_save = process_fpframe(l);
549 
550 	if (i386_use_fxsave) {
551 		fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__;
552 		fpu_save->sv_xmm.fx_mxcsr_mask = __INITIAL_MXCSR_MASK__;
553 		fpu_save->sv_xmm.fx_tw = 0;
554 		fpu_save->sv_xmm.fx_cw = fpu_save->sv_os.fxo_dflt_cw;
555 	} else {
556 		fpu_save->sv_87.s87_tw = 0xffff;
557 		fpu_save->sv_87.s87_cw = fpu_save->sv_os.fxo_dflt_cw;
558 	}
559 }
560 
561 /* During fork the xsave data needs to be copied */
562 void
563 fpu_save_area_fork(struct pcb *pcb2, const struct pcb *pcb1)
564 {
565 	ssize_t extra;
566 
567 	/* The pcb itself has been copied, but the xsave area
568 	 * extends further. */
569 
570 	extra = offsetof(struct pcb, pcb_savefpu) + x86_fpu_save_size -
571 	    sizeof (struct pcb);
572 
573 	if (extra > 0)
574 		memcpy(pcb2 + 1, pcb1 + 1, extra);
575 }
576 
577 
578 /*
579  * Write the FP registers.
580  * Buffer has usually come from userspace so should not be trusted.
581  */
582 void
583 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs)
584 {
585 	union savefpu *fpu_save;
586 
587 	fpusave_lwp(l, false);
588 	fpu_save = process_fpframe(l);
589 
590 	if (i386_use_fxsave) {
591 		memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm));
592 		/* Invalid bits in the mxcsr_mask will cause faults */
593 		fpu_save->sv_xmm.fx_mxcsr_mask &= __INITIAL_MXCSR_MASK__;
594 	} else {
595 		process_xmm_to_s87(fpregs, &fpu_save->sv_87);
596 	}
597 }
598 
599 /* We need to use x87 format for 32bit ptrace */
600 void
601 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs)
602 {
603 	union savefpu *fpu_save;
604 
605 	if (i386_use_fxsave) {
606 		/* Save so we don't lose the xmm registers */
607 		fpusave_lwp(l, true);
608 		fpu_save = process_fpframe(l);
609 		process_s87_to_xmm(fpregs, &fpu_save->sv_xmm);
610 	} else {
611 		fpusave_lwp(l, false);
612 		fpu_save = process_fpframe(l);
613 		memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87));
614 	}
615 }
616 
617 /*
618  * Read fpu registers, the buffer is usually copied out to userspace.
619  * Ensure we write to the entire structure.
620  */
621 void
622 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs)
623 {
624 	union savefpu *fpu_save;
625 
626 	fpusave_lwp(l, true);
627 	fpu_save = process_fpframe(l);
628 
629 	if (i386_use_fxsave) {
630 		memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm));
631 	} else {
632 		/* This usually gets copied to userspace */
633 		memset(fpregs, 0, sizeof(*fpregs));
634 		process_s87_to_xmm(&fpu_save->sv_87, fpregs);
635 	}
636 }
637 
638 void
639 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs)
640 {
641 	union savefpu *fpu_save;
642 
643 	fpusave_lwp(l, true);
644 	fpu_save = process_fpframe(l);
645 
646 	if (i386_use_fxsave) {
647 		memset(fpregs, 0, 12);
648 		process_xmm_to_s87(&fpu_save->sv_xmm, fpregs);
649 	} else {
650 		memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87));
651 	}
652 }
653