1 /* $NetBSD: fpu.c,v 1.86 2023/03/03 14:40:16 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All 5 * rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran and Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1991 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)npx.c 7.2 (Berkeley) 5/12/91 61 */ 62 63 /* 64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved. 65 * Copyright (c) 1990 William Jolitz. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 3. All advertising materials mentioning features or use of this software 76 * must display the following acknowledgement: 77 * This product includes software developed by the University of 78 * California, Berkeley and its contributors. 79 * 4. Neither the name of the University nor the names of its contributors 80 * may be used to endorse or promote products derived from this software 81 * without specific prior written permission. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 * 95 * @(#)npx.c 7.2 (Berkeley) 5/12/91 96 */ 97 98 #include <sys/cdefs.h> 99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.86 2023/03/03 14:40:16 riastradh Exp $"); 100 101 #include "opt_multiprocessor.h" 102 103 #include <sys/param.h> 104 #include <sys/systm.h> 105 #include <sys/conf.h> 106 #include <sys/cpu.h> 107 #include <sys/file.h> 108 #include <sys/proc.h> 109 #include <sys/kernel.h> 110 #include <sys/sysctl.h> 111 #include <sys/xcall.h> 112 113 #include <machine/cpu.h> 114 #include <machine/cpuvar.h> 115 #include <machine/cputypes.h> 116 #include <machine/intr.h> 117 #include <machine/cpufunc.h> 118 #include <machine/pcb.h> 119 #include <machine/trap.h> 120 #include <machine/specialreg.h> 121 #include <x86/cpu.h> 122 #include <x86/fpu.h> 123 124 #ifdef XENPV 125 #define clts() HYPERVISOR_fpu_taskswitch(0) 126 #define stts() HYPERVISOR_fpu_taskswitch(1) 127 #endif 128 129 void fpu_handle_deferred(void); 130 void fpu_switch(struct lwp *, struct lwp *); 131 132 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; 133 134 static inline union savefpu * 135 fpu_lwp_area(struct lwp *l) 136 { 137 struct pcb *pcb = lwp_getpcb(l); 138 union savefpu *area = &pcb->pcb_savefpu; 139 140 KASSERT((l->l_flag & LW_SYSTEM) == 0); 141 if (l == curlwp) { 142 fpu_save(); 143 } 144 KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU)); 145 146 return area; 147 } 148 149 static inline void 150 fpu_save_lwp(struct lwp *l) 151 { 152 struct pcb *pcb = lwp_getpcb(l); 153 union savefpu *area = &pcb->pcb_savefpu; 154 int s; 155 156 s = splvm(); 157 if (l->l_md.md_flags & MDL_FPU_IN_CPU) { 158 KASSERT((l->l_flag & LW_SYSTEM) == 0); 159 fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); 160 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 161 } 162 splx(s); 163 } 164 165 /* 166 * Bring curlwp's FPU state in memory. It will get installed back in the CPU 167 * when returning to userland. 168 */ 169 void 170 fpu_save(void) 171 { 172 fpu_save_lwp(curlwp); 173 } 174 175 void 176 fpuinit(struct cpu_info *ci) 177 { 178 /* 179 * This might not be strictly necessary since it will be initialized 180 * for each process. However it does no harm. 181 */ 182 clts(); 183 fninit(); 184 stts(); 185 } 186 187 void 188 fpuinit_mxcsr_mask(void) 189 { 190 #ifndef XENPV 191 union savefpu fpusave __aligned(64); 192 u_long psl; 193 194 memset(&fpusave, 0, sizeof(fpusave)); 195 196 /* Disable interrupts, and enable FPU */ 197 psl = x86_read_psl(); 198 x86_disable_intr(); 199 clts(); 200 201 /* Fill in the FPU area */ 202 fxsave(&fpusave); 203 204 /* Restore previous state */ 205 stts(); 206 x86_write_psl(psl); 207 208 if (fpusave.sv_xmm.fx_mxcsr_mask == 0) { 209 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 210 } else { 211 x86_fpu_mxcsr_mask = fpusave.sv_xmm.fx_mxcsr_mask; 212 } 213 #else 214 /* 215 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because 216 * &fpusave is not 16-byte aligned. Stack alignment problem 217 * somewhere, it seems. 218 */ 219 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 220 #endif 221 } 222 223 static inline void 224 fpu_errata_amd(void) 225 { 226 uint16_t sw; 227 228 /* 229 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor 230 * when FSW.ES=0, leaking other threads' execution history. 231 * 232 * Clear them manually by loading a zero (fldummy). We do this 233 * unconditionally, regardless of FSW.ES. 234 * 235 * Before that, clear the ES bit in the x87 status word if it is 236 * currently set, in order to avoid causing a fault in the 237 * upcoming load. 238 * 239 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], 240 * which indicates that FIP/FDP/FOP are restored (same behavior 241 * as Intel). We're not using it though. 242 */ 243 fnstsw(&sw); 244 if (sw & 0x80) 245 fnclex(); 246 fldummy(); 247 } 248 249 #ifdef __x86_64__ 250 #define XS64(x) (is_64bit ? x##64 : x) 251 #else 252 #define XS64(x) x 253 #endif 254 255 void 256 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) 257 { 258 switch (x86_fpu_save) { 259 case FPU_SAVE_FSAVE: 260 fnsave(area); 261 break; 262 case FPU_SAVE_FXSAVE: 263 XS64(fxsave)(area); 264 break; 265 case FPU_SAVE_XSAVE: 266 XS64(xsave)(area, xsave_features); 267 break; 268 case FPU_SAVE_XSAVEOPT: 269 XS64(xsaveopt)(area, xsave_features); 270 break; 271 } 272 273 stts(); 274 } 275 276 void 277 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) 278 { 279 clts(); 280 281 switch (x86_fpu_save) { 282 case FPU_SAVE_FSAVE: 283 frstor(area); 284 break; 285 case FPU_SAVE_FXSAVE: 286 if (cpu_vendor == CPUVENDOR_AMD) 287 fpu_errata_amd(); 288 XS64(fxrstor)(area); 289 break; 290 case FPU_SAVE_XSAVE: 291 case FPU_SAVE_XSAVEOPT: 292 if (cpu_vendor == CPUVENDOR_AMD) 293 fpu_errata_amd(); 294 XS64(xrstor)(area, xsave_features); 295 break; 296 } 297 } 298 299 void 300 fpu_handle_deferred(void) 301 { 302 struct pcb *pcb = lwp_getpcb(curlwp); 303 fpu_area_restore(&pcb->pcb_savefpu, x86_xsave_features, 304 !(curlwp->l_proc->p_flag & PK_32)); 305 } 306 307 void 308 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) 309 { 310 struct cpu_info *ci __diagused = curcpu(); 311 struct pcb *pcb; 312 313 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", 314 cpu_index(ci), ci->ci_ilevel); 315 316 if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { 317 KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); 318 pcb = lwp_getpcb(oldlwp); 319 fpu_area_save(&pcb->pcb_savefpu, x86_xsave_features, 320 !(oldlwp->l_proc->p_flag & PK_32)); 321 oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; 322 } 323 KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU)); 324 } 325 326 void 327 fpu_lwp_fork(struct lwp *l1, struct lwp *l2) 328 { 329 struct pcb *pcb2 = lwp_getpcb(l2); 330 union savefpu *fpu_save; 331 332 /* Kernel threads have no FPU. */ 333 if (__predict_false(l2->l_flag & LW_SYSTEM)) { 334 return; 335 } 336 /* For init(8). */ 337 if (__predict_false(l1->l_flag & LW_SYSTEM)) { 338 memset(&pcb2->pcb_savefpu, 0, x86_fpu_save_size); 339 return; 340 } 341 342 fpu_save = fpu_lwp_area(l1); 343 memcpy(&pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size); 344 l2->l_md.md_flags &= ~MDL_FPU_IN_CPU; 345 } 346 347 void 348 fpu_lwp_abandon(struct lwp *l) 349 { 350 int s; 351 352 KASSERT(l == curlwp); 353 s = splvm(); 354 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 355 stts(); 356 splx(s); 357 } 358 359 /* -------------------------------------------------------------------------- */ 360 361 /* 362 * fpu_kern_enter() 363 * 364 * Begin using the FPU. Raises to splvm, disabling most 365 * interrupts and rendering the thread non-preemptible; caller 366 * should not use this for long periods of time, and must call 367 * fpu_kern_leave() afterward. Non-recursive -- you cannot call 368 * fpu_kern_enter() again without calling fpu_kern_leave() first. 369 * 370 * Must be used only at IPL_VM or below -- never in IPL_SCHED or 371 * IPL_HIGH interrupt handlers. 372 */ 373 void 374 fpu_kern_enter(void) 375 { 376 static const union savefpu safe_fpu __aligned(64) = { 377 .sv_xmm = { 378 .fx_mxcsr = __SAFE_MXCSR__, 379 }, 380 }; 381 struct lwp *l = curlwp; 382 struct cpu_info *ci; 383 int s; 384 385 s = splvm(); 386 387 ci = curcpu(); 388 #if 0 389 /* 390 * Can't assert this because if the caller holds a spin lock at 391 * IPL_VM, and previously held and released a spin lock at 392 * higher IPL, the IPL remains raised above IPL_VM. 393 */ 394 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", 395 ci->ci_ilevel); 396 #endif 397 KASSERT(ci->ci_kfpu_spl == -1); 398 ci->ci_kfpu_spl = s; 399 400 /* 401 * If we are in a softint and have a pinned lwp, the fpu state is that 402 * of the pinned lwp, so save it there. 403 */ 404 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) 405 l = l->l_switchto; 406 fpu_save_lwp(l); 407 408 /* 409 * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- 410 * otherwise the CPU will trap if we try to use the FPU under 411 * the false impression that there has been a task switch since 412 * the last FPU usage requiring that we save the FPU state. 413 */ 414 clts(); 415 416 /* 417 * Zero the FPU registers and install safe control words. 418 */ 419 fpu_area_restore(&safe_fpu, x86_xsave_features, /*is_64bit*/false); 420 } 421 422 /* 423 * fpu_kern_leave() 424 * 425 * End using the FPU after fpu_kern_enter(). 426 */ 427 void 428 fpu_kern_leave(void) 429 { 430 static const union savefpu zero_fpu __aligned(64); 431 struct cpu_info *ci = curcpu(); 432 int s; 433 434 #if 0 435 /* 436 * Can't assert this because if the caller holds a spin lock at 437 * IPL_VM, and previously held and released a spin lock at 438 * higher IPL, the IPL remains raised above IPL_VM. 439 */ 440 KASSERT(ci->ci_ilevel == IPL_VM || cold); 441 #endif 442 KASSERT(ci->ci_kfpu_spl != -1); 443 444 /* 445 * Zero the fpu registers; otherwise we might leak secrets 446 * through Spectre-class attacks to userland, even if there are 447 * no bugs in fpu state management. 448 */ 449 fpu_area_restore(&zero_fpu, x86_xsave_features, /*is_64bit*/false); 450 451 /* 452 * Set CR0_TS again so that the kernel can't accidentally use 453 * the FPU. 454 */ 455 stts(); 456 457 s = ci->ci_kfpu_spl; 458 ci->ci_kfpu_spl = -1; 459 splx(s); 460 } 461 462 /* -------------------------------------------------------------------------- */ 463 464 /* 465 * The following table is used to ensure that the FPE_... value 466 * that is passed as a trapcode to the signal handler of the user 467 * process does not have more than one bit set. 468 * 469 * Multiple bits may be set if SSE simd instructions generate errors 470 * on more than one value or if the user process modifies the control 471 * word while a status word bit is already set (which this is a sign 472 * of bad coding). 473 * We have no choice than to narrow them down to one bit, since we must 474 * not send a trapcode that is not exactly one of the FPE_ macros. 475 * 476 * The mechanism has a static table with 127 entries. Each combination 477 * of the 7 FPU status word exception bits directly translates to a 478 * position in this table, where a single FPE_... value is stored. 479 * This FPE_... value stored there is considered the "most important" 480 * of the exception bits and will be sent as the signal code. The 481 * precedence of the bits is based upon Intel Document "Numerical 482 * Applications", Chapter "Special Computational Situations". 483 * 484 * The code to choose one of these values does these steps: 485 * 1) Throw away status word bits that cannot be masked. 486 * 2) Throw away the bits currently masked in the control word, 487 * assuming the user isn't interested in them anymore. 488 * 3) Reinsert status word bit 7 (stack fault) if it is set, which 489 * cannot be masked but must be preserved. 490 * 'Stack fault' is a sub-class of 'invalid operation'. 491 * 4) Use the remaining bits to point into the trapcode table. 492 * 493 * The 6 maskable bits in order of their preference, as stated in the 494 * above referenced Intel manual: 495 * 1 Invalid operation (FP_X_INV) 496 * 1a Stack underflow 497 * 1b Stack overflow 498 * 1c Operand of unsupported format 499 * 1d SNaN operand. 500 * 2 QNaN operand (not an exception, irrelevant here) 501 * 3 Any other invalid-operation not mentioned above or zero divide 502 * (FP_X_INV, FP_X_DZ) 503 * 4 Denormal operand (FP_X_DNML) 504 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 505 * 6 Inexact result (FP_X_IMP) 506 * 507 * NB: the above seems to mix up the mxscr error bits and the x87 ones. 508 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx 509 * status. 510 * 511 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM 512 * are swapped). 513 * 514 * This table assumes that any stack fault is cleared - so that an INVOP 515 * fault will only be reported as FLTSUB once. 516 * This might not happen if the mask is being changed. 517 */ 518 #define FPE_xxx1(f) (f & EN_SW_INVOP \ 519 ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ 520 : f & EN_SW_ZERODIV ? FPE_FLTDIV \ 521 : f & EN_SW_DENORM ? FPE_FLTUND \ 522 : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ 523 : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ 524 : f & EN_SW_PRECLOSS ? FPE_FLTRES \ 525 : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) 526 #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) 527 #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) 528 #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) 529 #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) 530 #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) 531 static const uint8_t fpetable[128] = { 532 FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) 533 }; 534 #undef FPE_xxx1 535 #undef FPE_xxx2 536 #undef FPE_xxx4 537 #undef FPE_xxx8 538 #undef FPE_xxx16 539 #undef FPE_xxx32 540 541 /* 542 * This is a synchronous trap on either an x87 instruction (due to an unmasked 543 * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due 544 * to an error on the instruction itself. 545 * 546 * If trap actually generates a signal, then the fpu state is saved and then 547 * copied onto the lwp's user-stack, and then recovered from there when the 548 * signal returns. 549 * 550 * All this code needs to do is save the reason for the trap. For x87 traps the 551 * status word bits need clearing to stop the trap re-occurring. For SSE traps 552 * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap. 553 * 554 * We come here with interrupts disabled. 555 */ 556 void 557 fputrap(struct trapframe *frame) 558 { 559 uint32_t statbits; 560 ksiginfo_t ksi; 561 562 if (__predict_false(!USERMODE(frame->tf_cs))) { 563 panic("fpu trap from kernel, trapframe %p\n", frame); 564 } 565 566 KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU); 567 568 if (frame->tf_trapno == T_XMM) { 569 uint32_t mxcsr; 570 x86_stmxcsr(&mxcsr); 571 statbits = mxcsr; 572 /* Clear the sticky status bits */ 573 mxcsr &= ~0x3f; 574 x86_ldmxcsr(&mxcsr); 575 576 /* Remove masked interrupts and non-status bits */ 577 statbits &= ~(statbits >> 7) & 0x3f; 578 /* Mark this is an XMM status */ 579 statbits |= 0x10000; 580 } else { 581 uint16_t cw, sw; 582 /* Get current control and status words */ 583 fnstcw(&cw); 584 fnstsw(&sw); 585 /* Clear any pending exceptions from status word */ 586 fnclex(); 587 588 /* Remove masked interrupts */ 589 statbits = sw & ~(cw & 0x3f); 590 } 591 592 /* Doesn't matter now if we get pre-empted */ 593 x86_enable_intr(); 594 595 KSI_INIT_TRAP(&ksi); 596 ksi.ksi_signo = SIGFPE; 597 ksi.ksi_addr = (void *)X86_TF_RIP(frame); 598 ksi.ksi_code = fpetable[statbits & 0x7f]; 599 ksi.ksi_trap = statbits; 600 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); 601 } 602 603 void 604 fpudna(struct trapframe *frame) 605 { 606 panic("fpudna from %s, ip %p, trapframe %p", 607 USERMODE(frame->tf_cs) ? "userland" : "kernel", 608 (void *)X86_TF_RIP(frame), frame); 609 } 610 611 /* -------------------------------------------------------------------------- */ 612 613 static inline void 614 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) 615 { 616 /* 617 * Force a reload of the given xstate during the next XRSTOR. 618 */ 619 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 620 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; 621 } 622 } 623 624 void 625 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) 626 { 627 union savefpu *fpu_save = fpu_lwp_area(l); 628 struct pcb *pcb = lwp_getpcb(l); 629 630 if (i386_use_fxsave) { 631 fpu_save->sv_xmm.fx_cw = x87_cw; 632 if (x87_cw != __INITIAL_NPXCW__) { 633 fpu_xstate_reload(fpu_save, XCR0_X87); 634 } 635 } else { 636 fpu_save->sv_87.s87_cw = x87_cw; 637 } 638 pcb->pcb_fpu_dflt_cw = x87_cw; 639 } 640 641 void 642 fpu_clear(struct lwp *l, unsigned int x87_cw) 643 { 644 union savefpu *fpu_save; 645 struct pcb *pcb; 646 647 KASSERT(l == curlwp); 648 fpu_save = fpu_lwp_area(l); 649 650 switch (x86_fpu_save) { 651 case FPU_SAVE_FSAVE: 652 memset(&fpu_save->sv_87, 0, x86_fpu_save_size); 653 fpu_save->sv_87.s87_tw = 0xffff; 654 fpu_save->sv_87.s87_cw = x87_cw; 655 break; 656 case FPU_SAVE_FXSAVE: 657 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 658 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 659 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 660 fpu_save->sv_xmm.fx_cw = x87_cw; 661 break; 662 case FPU_SAVE_XSAVE: 663 case FPU_SAVE_XSAVEOPT: 664 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 665 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 666 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 667 fpu_save->sv_xmm.fx_cw = x87_cw; 668 if (__predict_false(x87_cw != __INITIAL_NPXCW__)) { 669 fpu_xstate_reload(fpu_save, XCR0_X87); 670 } 671 break; 672 } 673 674 pcb = lwp_getpcb(l); 675 pcb->pcb_fpu_dflt_cw = x87_cw; 676 } 677 678 void 679 fpu_sigreset(struct lwp *l) 680 { 681 union savefpu *fpu_save = fpu_lwp_area(l); 682 struct pcb *pcb = lwp_getpcb(l); 683 684 /* 685 * For signal handlers the register values don't matter. Just reset 686 * a few fields. 687 */ 688 if (i386_use_fxsave) { 689 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 690 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 691 fpu_save->sv_xmm.fx_tw = 0; 692 fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw; 693 } else { 694 fpu_save->sv_87.s87_tw = 0xffff; 695 fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw; 696 } 697 } 698 699 void 700 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) 701 { 702 union savefpu *fpu_save = fpu_lwp_area(l); 703 704 if (i386_use_fxsave) { 705 memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); 706 707 /* 708 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 709 */ 710 fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 711 fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask; 712 713 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 714 } else { 715 process_xmm_to_s87(fpregs, &fpu_save->sv_87); 716 } 717 } 718 719 void 720 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) 721 { 722 union savefpu *fpu_save = fpu_lwp_area(l); 723 724 if (i386_use_fxsave) { 725 process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); 726 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 727 } else { 728 memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); 729 } 730 } 731 732 void 733 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) 734 { 735 union savefpu *fpu_save = fpu_lwp_area(l); 736 737 if (i386_use_fxsave) { 738 memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); 739 } else { 740 memset(fpregs, 0, sizeof(*fpregs)); 741 process_s87_to_xmm(&fpu_save->sv_87, fpregs); 742 } 743 } 744 745 void 746 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) 747 { 748 union savefpu *fpu_save = fpu_lwp_area(l); 749 750 if (i386_use_fxsave) { 751 memset(fpregs, 0, sizeof(*fpregs)); 752 process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); 753 } else { 754 memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); 755 } 756 } 757 758 int 759 process_read_xstate(struct lwp *l, struct xstate *xstate) 760 { 761 union savefpu *fpu_save = fpu_lwp_area(l); 762 763 if (x86_fpu_save == FPU_SAVE_FSAVE) { 764 /* Convert from legacy FSAVE format. */ 765 memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave)); 766 process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave); 767 768 /* We only got x87 data. */ 769 xstate->xs_rfbm = XCR0_X87; 770 xstate->xs_xstate_bv = XCR0_X87; 771 return 0; 772 } 773 774 /* Copy the legacy area. */ 775 memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave, 776 sizeof(xstate->xs_fxsave)); 777 778 if (x86_fpu_save == FPU_SAVE_FXSAVE) { 779 /* FXSAVE means we've got x87 + SSE data. */ 780 xstate->xs_rfbm = XCR0_X87 | XCR0_SSE; 781 xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE; 782 return 0; 783 } 784 785 /* Copy the bitmap indicating which states are available. */ 786 xstate->xs_rfbm = x86_xsave_features & XCR0_FPU; 787 xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv; 788 KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm)); 789 790 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 791 if (xstate->xs_xstate_bv & xcr0_val) { \ 792 KASSERT(x86_xsave_offsets[xsave_val] \ 793 >= sizeof(struct xsave_header)); \ 794 KASSERT(x86_xsave_sizes[xsave_val] \ 795 >= sizeof(xstate->field)); \ 796 memcpy(&xstate->field, \ 797 (char*)fpu_save + x86_xsave_offsets[xsave_val], \ 798 sizeof(xstate->field)); \ 799 } 800 801 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 802 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 803 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 804 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 805 806 #undef COPY_COMPONENT 807 808 return 0; 809 } 810 811 int 812 process_verify_xstate(const struct xstate *xstate) 813 { 814 /* xstate_bv must be a subset of RFBM */ 815 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) 816 return EINVAL; 817 818 switch (x86_fpu_save) { 819 case FPU_SAVE_FSAVE: 820 if ((xstate->xs_rfbm & ~XCR0_X87)) 821 return EINVAL; 822 break; 823 case FPU_SAVE_FXSAVE: 824 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) 825 return EINVAL; 826 break; 827 default: 828 /* Verify whether no unsupported features are enabled */ 829 if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0) 830 return EINVAL; 831 } 832 833 return 0; 834 } 835 836 int 837 process_write_xstate(struct lwp *l, const struct xstate *xstate) 838 { 839 union savefpu *fpu_save = fpu_lwp_area(l); 840 841 /* Convert data into legacy FSAVE format. */ 842 if (x86_fpu_save == FPU_SAVE_FSAVE) { 843 if (xstate->xs_xstate_bv & XCR0_X87) 844 process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); 845 return 0; 846 } 847 848 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ 849 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 850 /* 851 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : 852 * fpu_save->sv_xsave_hdr.xsh_xstate_bv" 853 */ 854 fpu_save->sv_xsave_hdr.xsh_xstate_bv = 855 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | 856 xstate->xs_xstate_bv; 857 } 858 859 if (xstate->xs_xstate_bv & XCR0_X87) { 860 /* 861 * X87 state is split into two areas, interspersed with SSE 862 * data. 863 */ 864 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); 865 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, 866 sizeof(xstate->xs_fxsave.fx_87_ac)); 867 } 868 869 /* 870 * Copy MXCSR if either SSE or AVX state is requested, to match the 871 * XSAVE behavior for those flags. 872 */ 873 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { 874 /* 875 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 876 */ 877 fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask 878 & x86_fpu_mxcsr_mask; 879 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & 880 fpu_save->sv_xmm.fx_mxcsr_mask; 881 } 882 883 if (xstate->xs_xstate_bv & XCR0_SSE) { 884 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], 885 xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm)); 886 } 887 888 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 889 if (xstate->xs_xstate_bv & xcr0_val) { \ 890 KASSERT(x86_xsave_offsets[xsave_val] \ 891 >= sizeof(struct xsave_header)); \ 892 KASSERT(x86_xsave_sizes[xsave_val] \ 893 >= sizeof(xstate->field)); \ 894 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ 895 &xstate->field, sizeof(xstate->field)); \ 896 } 897 898 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 899 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 900 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 901 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 902 903 #undef COPY_COMPONENT 904 905 return 0; 906 } 907