1 /* $OpenBSD: npx.c,v 1.61 2015/02/11 05:54:48 dlg Exp $ */ 2 /* $NetBSD: npx.c,v 1.57 1996/05/12 23:12:24 mycroft Exp $ */ 3 4 #if 0 5 #define IPRINTF(x) printf x 6 #else 7 #define IPRINTF(x) 8 #endif 9 10 /*- 11 * Copyright (c) 1994, 1995 Charles M. Hannum. All rights reserved. 12 * Copyright (c) 1990 William Jolitz. 13 * Copyright (c) 1991 The Regents of the University of California. 14 * All rights reserved. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 1. Redistributions of source code must retain the above copyright 20 * notice, this list of conditions and the following disclaimer. 21 * 2. Redistributions in binary form must reproduce the above copyright 22 * notice, this list of conditions and the following disclaimer in the 23 * documentation and/or other materials provided with the distribution. 24 * 3. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)npx.c 7.2 (Berkeley) 5/12/91 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/conf.h> 46 #include <sys/file.h> 47 #include <sys/proc.h> 48 #include <sys/signalvar.h> 49 #include <sys/user.h> 50 #include <sys/ioctl.h> 51 #include <sys/device.h> 52 53 #include <uvm/uvm_extern.h> 54 55 #include <machine/cpu.h> 56 #include <machine/intr.h> 57 #include <machine/npx.h> 58 #include <machine/pio.h> 59 #include <machine/cpufunc.h> 60 #include <machine/pcb.h> 61 #include <machine/trap.h> 62 #include <machine/specialreg.h> 63 #include <machine/i8259.h> 64 #include <machine/lock.h> 65 66 #include <dev/isa/isareg.h> 67 #include <dev/isa/isavar.h> 68 69 /* 70 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 71 * 72 * We do lazy initialization and switching using the TS bit in cr0 and the 73 * MDP_USEDFPU bit in mdproc. 74 * 75 * DNA exceptions are handled like this: 76 * 77 * 1) If there is no NPX, return and go to the emulator. 78 * 2) If someone else has used the NPX, save its state into that process's PCB. 79 * 3a) If MDP_USEDFPU is not set, set it and initialize the NPX. 80 * 3b) Otherwise, reload the process's previous NPX state. 81 * 82 * When a process is created or exec()s, its saved cr0 image has the TS bit 83 * set and the MDP_USEDFPU bit clear. The MDP_USEDFPU bit is set when the 84 * process first gets a DNA and the NPX is initialized. The TS bit is turned 85 * off when the NPX is used, and turned on again later when the process's NPX 86 * state is saved. 87 */ 88 89 #define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) 90 #define fnclex() __asm("fnclex") 91 #define fninit() __asm("fninit") 92 #define fnsave(addr) __asm("fnsave %0" : "=m" (*addr)) 93 #define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr)) 94 #define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr)) 95 #define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait") 96 #define frstor(addr) __asm("frstor %0" : : "m" (*addr)) 97 #define fwait() __asm("fwait") 98 #define clts() __asm("clts") 99 #define stts() lcr0(rcr0() | CR0_TS) 100 101 /* 102 * The mxcsr_mask for this host, taken from fxsave() on the primary CPU 103 */ 104 uint32_t fpu_mxcsr_mask; 105 106 int npxintr(void *); 107 static int npxprobe1(struct isa_attach_args *); 108 static int x86fpflags_to_siginfo(u_int32_t); 109 110 111 struct npx_softc { 112 struct device sc_dev; 113 void *sc_ih; 114 }; 115 116 int npxprobe(struct device *, void *, void *); 117 void npxattach(struct device *, struct device *, void *); 118 119 struct cfattach npx_ca = { 120 sizeof(struct npx_softc), npxprobe, npxattach 121 }; 122 123 struct cfdriver npx_cd = { 124 NULL, "npx", DV_DULL 125 }; 126 127 enum npx_type { 128 NPX_NONE = 0, 129 NPX_INTERRUPT, 130 NPX_EXCEPTION, 131 NPX_BROKEN, 132 NPX_CPUID, 133 }; 134 135 static enum npx_type npx_type; 136 static volatile u_int npx_intrs_while_probing; 137 static volatile u_int npx_traps_while_probing; 138 139 extern int i386_fpu_present; 140 extern int i386_fpu_exception; 141 extern int i386_fpu_fdivbug; 142 143 #define fxsave(addr) __asm("fxsave %0" : "=m" (*addr)) 144 #define fxrstor(addr) __asm("fxrstor %0" : : "m" (*addr)) 145 #define ldmxcsr(addr) __asm("ldmxcsr %0" : : "m" (*addr)) 146 147 static __inline void 148 fpu_save(union savefpu *addr) 149 { 150 151 if (i386_use_fxsave) { 152 fxsave(&addr->sv_xmm); 153 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */ 154 fninit(); 155 } else 156 fnsave(&addr->sv_87); 157 } 158 159 static int 160 npxdna_notset(struct cpu_info *ci) 161 { 162 panic("npxdna vector not initialized"); 163 } 164 165 int (*npxdna_func)(struct cpu_info *) = npxdna_notset; 166 int npxdna_s87(struct cpu_info *); 167 int npxdna_xmm(struct cpu_info *); 168 void npxexit(void); 169 170 /* 171 * Special interrupt handlers. Someday intr0-intr15 will be used to count 172 * interrupts. We'll still need a special exception 16 handler. The busy 173 * latch stuff in probintr() can be moved to npxprobe(). 174 */ 175 void probeintr(void); 176 asm (".text\n\t" 177 "probeintr:\n\t" 178 "ss\n\t" 179 "incl npx_intrs_while_probing\n\t" 180 "pushl %eax\n\t" 181 "movb $0x20,%al # EOI (asm in strings loses cpp features)\n\t" 182 "outb %al,$0xa0 # IO_ICU2\n\t" 183 "outb %al,$0x20 # IO_ICU1\n\t" 184 "movb $0,%al\n\t" 185 "outb %al,$0xf0 # clear BUSY# latch\n\t" 186 "popl %eax\n\t" 187 "iret\n\t"); 188 189 void probetrap(void); 190 asm (".text\n\t" 191 "probetrap:\n\t" 192 "ss\n\t" 193 "incl npx_traps_while_probing\n\t" 194 "fnclex\n\t" 195 "iret\n\t"); 196 197 static inline int 198 npxprobe1(struct isa_attach_args *ia) 199 { 200 int control; 201 int status; 202 203 ia->ia_iosize = 16; 204 ia->ia_msize = 0; 205 206 /* 207 * Finish resetting the coprocessor, if any. If there is an error 208 * pending, then we may get a bogus IRQ13, but probeintr() will handle 209 * it OK. Bogus halts have never been observed, but we enabled 210 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 211 */ 212 fninit(); 213 delay(1000); /* wait for any IRQ13 (fwait might hang) */ 214 215 /* 216 * Check for a status of mostly zero. 217 */ 218 status = 0x5a5a; 219 fnstsw(&status); 220 if ((status & 0xb8ff) == 0) { 221 /* 222 * Good, now check for a proper control word. 223 */ 224 control = 0x5a5a; 225 fnstcw(&control); 226 if ((control & 0x1f3f) == 0x033f) { 227 /* 228 * We have an npx, now divide by 0 to see if exception 229 * 16 works. 230 */ 231 control &= ~(1 << 2); /* enable divide by 0 trap */ 232 fldcw(&control); 233 npx_traps_while_probing = npx_intrs_while_probing = 0; 234 fp_divide_by_0(); 235 delay(1); 236 if (npx_traps_while_probing != 0) { 237 /* 238 * Good, exception 16 works. 239 */ 240 npx_type = NPX_EXCEPTION; 241 ia->ia_irq = IRQUNK; /* zap the interrupt */ 242 i386_fpu_exception = 1; 243 } else if (npx_intrs_while_probing != 0) { 244 /* 245 * Bad, we are stuck with IRQ13. 246 */ 247 npx_type = NPX_INTERRUPT; 248 } else { 249 /* 250 * Worse, even IRQ13 is broken. 251 */ 252 npx_type = NPX_BROKEN; 253 ia->ia_irq = IRQUNK; 254 } 255 return 1; 256 } 257 } 258 259 /* 260 * Probe failed. There is no usable FPU. 261 */ 262 npx_type = NPX_NONE; 263 return 0; 264 } 265 266 /* 267 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 268 * whether the device exists or not (XXX should be elsewhere). Set flags 269 * to tell npxattach() what to do. Modify device struct if npx doesn't 270 * need to use interrupts. Return 1 if device exists. 271 */ 272 int 273 npxprobe(struct device *parent, void *match, void *aux) 274 { 275 struct isa_attach_args *ia = aux; 276 int irq; 277 int result; 278 u_long save_eflags; 279 unsigned save_imen; 280 struct gate_descriptor save_idt_npxintr; 281 struct gate_descriptor save_idt_npxtrap; 282 283 if (cpu_feature & CPUID_FPU) { 284 npx_type = NPX_CPUID; 285 i386_fpu_exception = 1; 286 ia->ia_irq = IRQUNK; /* Don't want the interrupt vector */ 287 ia->ia_iosize = 16; 288 ia->ia_msize = 0; 289 return 1; 290 } 291 292 /* 293 * This routine is now just a wrapper for npxprobe1(), to install 294 * special npx interrupt and trap handlers, to enable npx interrupts 295 * and to disable other interrupts. Someday isa_configure() will 296 * install suitable handlers and run with interrupts enabled so we 297 * won't need to do so much here. 298 */ 299 irq = NRSVIDT + ia->ia_irq; 300 save_eflags = read_eflags(); 301 disable_intr(); 302 save_idt_npxintr = idt[irq]; 303 save_idt_npxtrap = idt[16]; 304 setgate(&idt[irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, GICODE_SEL); 305 setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, GCODE_SEL); 306 save_imen = imen; 307 imen = ~((1 << IRQ_SLAVE) | (1 << ia->ia_irq)); 308 SET_ICUS(); 309 310 /* 311 * Partially reset the coprocessor, if any. Some BIOS's don't reset 312 * it after a warm boot. 313 */ 314 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 315 delay(1000); 316 outb(0xf0, 0); /* clear BUSY# latch */ 317 318 /* 319 * We set CR0 in locore to trap all ESC and WAIT instructions. 320 * We have to turn off the CR0_EM bit temporarily while probing. 321 */ 322 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 323 enable_intr(); 324 result = npxprobe1(ia); 325 disable_intr(); 326 lcr0(rcr0() | (CR0_EM|CR0_TS)); 327 328 imen = save_imen; 329 SET_ICUS(); 330 idt[irq] = save_idt_npxintr; 331 idt[16] = save_idt_npxtrap; 332 write_eflags(save_eflags); 333 return (result); 334 } 335 336 int npx586bug1(int, int); 337 asm (".text\n\t" 338 "npx586bug1:\n\t" 339 "fildl 4(%esp) # x\n\t" 340 "fildl 8(%esp) # y\n\t" 341 "fld %st(1)\n\t" 342 "fdiv %st(1),%st # x/y\n\t" 343 "fmulp %st,%st(1) # (x/y)*y\n\t" 344 "fsubrp %st,%st(1) # x-(x/y)*y\n\t" 345 "pushl $0\n\t" 346 "fistpl (%esp)\n\t" 347 "popl %eax\n\t" 348 "ret\n\t"); 349 350 void 351 npxinit(struct cpu_info *ci) 352 { 353 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 354 fninit(); 355 if (npx586bug1(4195835, 3145727) != 0) { 356 i386_fpu_fdivbug = 1; 357 printf("%s: WARNING: Pentium FDIV bug detected!\n", 358 ci->ci_dev.dv_xname); 359 } 360 if (fpu_mxcsr_mask == 0 && i386_use_fxsave) { 361 struct savexmm xm __attribute__((aligned(16))); 362 363 bzero(&xm, sizeof(xm)); 364 fxsave(&xm); 365 if (xm.sv_env.en_mxcsr_mask) 366 fpu_mxcsr_mask = xm.sv_env.en_mxcsr_mask; 367 else 368 fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 369 } 370 lcr0(rcr0() | (CR0_TS)); 371 } 372 373 /* 374 * Attach routine - announce which it is, and wire into system 375 */ 376 void 377 npxattach(struct device *parent, struct device *self, void *aux) 378 { 379 struct npx_softc *sc = (void *)self; 380 struct isa_attach_args *ia = aux; 381 382 switch (npx_type) { 383 case NPX_INTERRUPT: 384 printf("\n"); 385 lcr0(rcr0() & ~CR0_NE); 386 sc->sc_ih = isa_intr_establish(ia->ia_ic, ia->ia_irq, 387 IST_EDGE, IPL_NONE, npxintr, 0, sc->sc_dev.dv_xname); 388 break; 389 case NPX_EXCEPTION: 390 printf(": using exception 16\n"); 391 break; 392 case NPX_CPUID: 393 printf(": reported by CPUID; using exception 16\n"); 394 npx_type = NPX_EXCEPTION; 395 break; 396 case NPX_BROKEN: 397 printf(": error reporting broken; not using\n"); 398 npx_type = NPX_NONE; 399 return; 400 case NPX_NONE: 401 return; 402 } 403 404 npxinit(&cpu_info_primary); 405 i386_fpu_present = 1; 406 407 if (i386_use_fxsave) 408 npxdna_func = npxdna_xmm; 409 else 410 npxdna_func = npxdna_s87; 411 } 412 413 /* 414 * Record the FPU state and reinitialize it all except for the control word. 415 * Then generate a SIGFPE. 416 * 417 * Reinitializing the state allows naive SIGFPE handlers to longjmp without 418 * doing any fixups. 419 * 420 * XXX there is currently no way to pass the full error state to signal 421 * handlers, and if this is a nested interrupt there is no way to pass even 422 * a status code! So there is no way to have a non-naive SIGFPE handler. At 423 * best a handler could do an fninit followed by an fldcw of a static value. 424 * fnclex would be of little use because it would leave junk on the FPU stack. 425 * Returning from the handler would be even less safe than usual because 426 * IRQ13 exception handling makes exceptions even less precise than usual. 427 */ 428 int 429 npxintr(void *arg) 430 { 431 struct cpu_info *ci = curcpu(); 432 struct proc *p = ci->ci_fpcurproc; 433 union savefpu *addr; 434 struct intrframe *frame = arg; 435 int code; 436 union sigval sv; 437 438 uvmexp.traps++; 439 IPRINTF(("%s: fp intr\n", ci->ci_dev.dv_xname)); 440 441 if (p == NULL || npx_type == NPX_NONE) { 442 /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ 443 printf("npxintr: p = %lx, curproc = %lx, npx_type = %d\n", 444 (u_long) p, (u_long) curproc, npx_type); 445 panic("npxintr from nowhere"); 446 } 447 /* 448 * Clear the interrupt latch. 449 */ 450 outb(0xf0, 0); 451 /* 452 * If we're saving, ignore the interrupt. The FPU will happily 453 * generate another one when we restore the state later. 454 */ 455 if (ci->ci_fpsaving) 456 return (1); 457 458 #ifdef DIAGNOSTIC 459 /* 460 * At this point, fpcurproc should be curproc. If it wasn't, the TS 461 * bit should be set, and we should have gotten a DNA exception. 462 */ 463 if (p != curproc) 464 panic("npxintr: wrong process"); 465 #endif 466 467 /* 468 * Find the address of fpcurproc's saved FPU state. (Given the 469 * invariant above, this is always the one in curpcb.) 470 */ 471 addr = &p->p_addr->u_pcb.pcb_savefpu; 472 /* 473 * Save state. This does an implied fninit. It had better not halt 474 * the cpu or we'll hang. 475 */ 476 fpu_save(addr); 477 fwait(); 478 /* 479 * Restore control word (was clobbered by fpu_save). 480 */ 481 if (i386_use_fxsave) { 482 fldcw(&addr->sv_xmm.sv_env.en_cw); 483 /* 484 * FNINIT doesn't affect MXCSR or the XMM registers; 485 * no need to re-load MXCSR here. 486 */ 487 } else 488 fldcw(&addr->sv_87.sv_env.en_cw); 489 fwait(); 490 /* 491 * Remember the exception status word and tag word. The current 492 * (almost fninit'ed) fpu state is in the fpu and the exception 493 * state just saved will soon be junk. However, the implied fninit 494 * doesn't change the error pointers or register contents, and we 495 * preserved the control word and will copy the status and tag 496 * words, so the complete exception state can be recovered. 497 */ 498 if (i386_use_fxsave) { 499 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 500 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 501 } else { 502 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw; 503 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw; 504 } 505 506 /* 507 * Pass exception to process. If it's the current process, try to do 508 * it immediately. 509 */ 510 if (p == curproc && USERMODE(frame->if_cs, frame->if_eflags)) { 511 /* 512 * Interrupt is essentially a trap, so we can afford to call 513 * the SIGFPE handler (if any) as soon as the interrupt 514 * returns. 515 * 516 * XXX little or nothing is gained from this, and plenty is 517 * lost - the interrupt frame has to contain the trap frame 518 * (this is otherwise only necessary for the rescheduling trap 519 * in doreti, and the frame for that could easily be set up 520 * just before it is used). 521 */ 522 p->p_md.md_regs = (struct trapframe *)&frame->if_fs; 523 524 /* 525 * Encode the appropriate code for detailed information on 526 * this exception. 527 */ 528 if (i386_use_fxsave) 529 code = x86fpflags_to_siginfo(addr->sv_xmm.sv_ex_sw); 530 else 531 code = x86fpflags_to_siginfo(addr->sv_87.sv_ex_sw); 532 sv.sival_int = frame->if_eip; 533 trapsignal(p, SIGFPE, T_ARITHTRAP, code, sv); 534 } else { 535 /* 536 * Nested interrupt. These losers occur when: 537 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 538 * o immediately after an fnsave or frstor of an 539 * error state. 540 * o a couple of 386 instructions after 541 * "fstpl _memvar" causes a stack overflow. 542 * These are especially nasty when combined with a 543 * trace trap. 544 * o an IRQ13 occurs at the same time as another higher- 545 * priority interrupt. 546 * 547 * Treat them like a true async interrupt. 548 */ 549 psignal(p, SIGFPE); 550 } 551 552 return (1); 553 } 554 555 void 556 npxtrap(struct trapframe *frame) 557 { 558 struct proc *p = curcpu()->ci_fpcurproc; 559 union savefpu *addr = &p->p_addr->u_pcb.pcb_savefpu; 560 u_int32_t mxcsr, statbits; 561 int code; 562 union sigval sv; 563 564 #ifdef DIAGNOSTIC 565 /* 566 * At this point, fpcurproc should be curproc. If it wasn't, the TS 567 * bit should be set, and we should have gotten a DNA exception. 568 */ 569 if (p != curproc) 570 panic("npxtrap: wrong process"); 571 #endif 572 573 fxsave(&addr->sv_xmm); 574 mxcsr = addr->sv_xmm.sv_env.en_mxcsr; 575 statbits = mxcsr; 576 mxcsr &= ~0x3f; 577 ldmxcsr(&mxcsr); 578 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 579 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 580 code = x86fpflags_to_siginfo (statbits); 581 sv.sival_int = frame->tf_eip; 582 KERNEL_LOCK(); 583 trapsignal(p, SIGFPE, frame->tf_err, code, sv); 584 KERNEL_UNLOCK(); 585 } 586 587 static int 588 x86fpflags_to_siginfo(u_int32_t flags) 589 { 590 int i; 591 static int x86fp_siginfo_table[] = { 592 FPE_FLTINV, /* bit 0 - invalid operation */ 593 FPE_FLTRES, /* bit 1 - denormal operand */ 594 FPE_FLTDIV, /* bit 2 - divide by zero */ 595 FPE_FLTOVF, /* bit 3 - fp overflow */ 596 FPE_FLTUND, /* bit 4 - fp underflow */ 597 FPE_FLTRES, /* bit 5 - fp precision */ 598 FPE_FLTINV, /* bit 6 - stack fault */ 599 }; 600 601 for (i=0;i < sizeof(x86fp_siginfo_table)/sizeof(int); i++) { 602 if (flags & (1 << i)) 603 return (x86fp_siginfo_table[i]); 604 } 605 /* punt if flags not set */ 606 return (FPE_FLTINV); 607 } 608 609 /* 610 * Implement device not available (DNA) exception 611 * 612 * If we were the last process to use the FPU, we can simply return. 613 * Otherwise, we save the previous state, if necessary, and restore our last 614 * saved state. 615 */ 616 int 617 npxdna_xmm(struct cpu_info *ci) 618 { 619 union savefpu *sfp; 620 struct proc *p; 621 int s; 622 623 if (ci->ci_fpsaving) { 624 printf("recursive npx trap; cr0=%x\n", rcr0()); 625 return (0); 626 } 627 628 s = splipi(); /* lock out IPI's while we clean house.. */ 629 630 #ifdef MULTIPROCESSOR 631 p = ci->ci_curproc; 632 #else 633 p = curproc; 634 #endif 635 636 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p, 637 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 638 639 /* 640 * XXX should have a fast-path here when no save/restore is necessary 641 */ 642 /* 643 * Initialize the FPU state to clear any exceptions. If someone else 644 * was using the FPU, save their state (which does an implicit 645 * initialization). 646 */ 647 if (ci->ci_fpcurproc != NULL) { 648 IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname, 649 (u_long)ci->ci_fpcurproc)); 650 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 651 } else { 652 clts(); 653 IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname)); 654 fninit(); 655 fwait(); 656 stts(); 657 } 658 splx(s); 659 660 IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname)); 661 KDASSERT(ci->ci_fpcurproc == NULL); 662 #ifndef MULTIPROCESSOR 663 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 664 #else 665 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 666 npxsave_proc(p, 1); 667 #endif 668 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 669 clts(); 670 s = splipi(); 671 ci->ci_fpcurproc = p; 672 p->p_addr->u_pcb.pcb_fpcpu = ci; 673 splx(s); 674 uvmexp.fpswtch++; 675 676 sfp = &p->p_addr->u_pcb.pcb_savefpu; 677 678 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 679 bzero(&sfp->sv_xmm, sizeof(sfp->sv_xmm)); 680 sfp->sv_xmm.sv_env.en_cw = __INITIAL_NPXCW__; 681 sfp->sv_xmm.sv_env.en_mxcsr = __INITIAL_MXCSR__; 682 fxrstor(&sfp->sv_xmm); 683 p->p_md.md_flags |= MDP_USEDFPU; 684 } else { 685 static double zero = 0.0; 686 687 /* 688 * amd fpu does not restore fip, fdp, fop on fxrstor 689 * thus leaking other process's execution history. 690 */ 691 fnclex(); 692 __asm volatile("ffree %%st(7)\n\tfldl %0" : : "m" (zero)); 693 fxrstor(&sfp->sv_xmm); 694 } 695 696 return (1); 697 } 698 699 int 700 npxdna_s87(struct cpu_info *ci) 701 { 702 union savefpu *sfp; 703 struct proc *p; 704 int s; 705 706 KDASSERT(i386_use_fxsave == 0); 707 708 if (ci->ci_fpsaving) { 709 printf("recursive npx trap; cr0=%x\n", rcr0()); 710 return (0); 711 } 712 713 s = splipi(); /* lock out IPI's while we clean house.. */ 714 #ifdef MULTIPROCESSOR 715 p = ci->ci_curproc; 716 #else 717 p = curproc; 718 #endif 719 720 IPRINTF(("%s: dna for %lx%s\n", ci->ci_dev.dv_xname, (u_long)p, 721 (p->p_md.md_flags & MDP_USEDFPU) ? " (used fpu)" : "")); 722 723 /* 724 * If someone else was using our FPU, save their state (which does an 725 * implicit initialization); otherwise, initialize the FPU state to 726 * clear any exceptions. 727 */ 728 if (ci->ci_fpcurproc != NULL) { 729 IPRINTF(("%s: fp save %lx\n", ci->ci_dev.dv_xname, 730 (u_long)ci->ci_fpcurproc)); 731 npxsave_cpu(ci, ci->ci_fpcurproc != &proc0); 732 } else { 733 clts(); 734 IPRINTF(("%s: fp init\n", ci->ci_dev.dv_xname)); 735 fninit(); 736 fwait(); 737 stts(); 738 } 739 splx(s); 740 741 IPRINTF(("%s: done saving\n", ci->ci_dev.dv_xname)); 742 KDASSERT(ci->ci_fpcurproc == NULL); 743 #ifndef MULTIPROCESSOR 744 KDASSERT(p->p_addr->u_pcb.pcb_fpcpu == NULL); 745 #else 746 if (p->p_addr->u_pcb.pcb_fpcpu != NULL) 747 npxsave_proc(p, 1); 748 #endif 749 p->p_addr->u_pcb.pcb_cr0 &= ~CR0_TS; 750 clts(); 751 s = splipi(); 752 ci->ci_fpcurproc = p; 753 p->p_addr->u_pcb.pcb_fpcpu = ci; 754 splx(s); 755 uvmexp.fpswtch++; 756 757 sfp = &p->p_addr->u_pcb.pcb_savefpu; 758 759 if ((p->p_md.md_flags & MDP_USEDFPU) == 0) { 760 bzero(&sfp->sv_87, sizeof(sfp->sv_87)); 761 sfp->sv_87.sv_env.en_cw = __INITIAL_NPXCW__; 762 sfp->sv_87.sv_env.en_tw = 0xffff; 763 frstor(&sfp->sv_87); 764 p->p_md.md_flags |= MDP_USEDFPU; 765 } else { 766 /* 767 * The following frstor may cause an IRQ13 when the state being 768 * restored has a pending error. The error will appear to have 769 * been triggered by the current (npx) user instruction even 770 * when that instruction is a no-wait instruction that should 771 * not trigger an error (e.g., fnclex). On at least one 486 772 * system all of the no-wait instructions are broken the same 773 * as frstor, so our treatment does not amplify the breakage. 774 * On at least one 386/Cyrix 387 system, fnclex works correctly 775 * while frstor and fnsave are broken, so our treatment breaks 776 * fnclex if it is the first FPU instruction after a context 777 * switch. 778 */ 779 frstor(&sfp->sv_87); 780 } 781 782 return (1); 783 } 784 785 /* 786 * The FNSAVE instruction clears the FPU state. Rather than reloading the FPU 787 * immediately, we clear fpcurproc and turn on CR0_TS to force a DNA and a 788 * reload of the FPU state the next time we try to use it. This routine 789 * is only called when forking, core dumping, or debugging, or swapping, 790 * so the lazy reload at worst forces us to trap once per fork(), and at best 791 * saves us a reload once per fork(). 792 */ 793 void 794 npxsave_cpu(struct cpu_info *ci, int save) 795 { 796 struct proc *p; 797 int s; 798 799 KDASSERT(ci == curcpu()); 800 801 p = ci->ci_fpcurproc; 802 if (p == NULL) 803 return; 804 805 IPRINTF(("%s: fp cpu %s %lx\n", ci->ci_dev.dv_xname, 806 save ? "save" : "flush", (u_long)p)); 807 808 if (save) { 809 #ifdef DIAGNOSTIC 810 if (ci->ci_fpsaving != 0) 811 panic("npxsave_cpu: recursive save!"); 812 #endif 813 /* 814 * Set ci->ci_fpsaving, so that any pending exception will be 815 * thrown away. (It will be caught again if/when the FPU 816 * state is restored.) 817 * 818 * XXX on i386 and earlier, this routine should always be 819 * called at spl0; if it might called with the NPX interrupt 820 * masked, it would be necessary to forcibly unmask the NPX 821 * interrupt so that it could succeed. 822 * XXX this is irrelevant on 486 and above (systems 823 * which report FP failures via traps rather than irq13). 824 * XXX punting for now.. 825 */ 826 clts(); 827 ci->ci_fpsaving = 1; 828 fpu_save(&p->p_addr->u_pcb.pcb_savefpu); 829 ci->ci_fpsaving = 0; 830 /* It is unclear if this is needed. */ 831 fwait(); 832 } 833 834 /* 835 * We set the TS bit in the saved CR0 for this process, so that it 836 * will get a DNA exception on any FPU instruction and force a reload. 837 */ 838 stts(); 839 p->p_addr->u_pcb.pcb_cr0 |= CR0_TS; 840 841 s = splipi(); 842 p->p_addr->u_pcb.pcb_fpcpu = NULL; 843 ci->ci_fpcurproc = NULL; 844 splx(s); 845 } 846 847 /* 848 * Save p's FPU state, which may be on this processor or another processor. 849 */ 850 void 851 npxsave_proc(struct proc *p, int save) 852 { 853 struct cpu_info *ci = curcpu(); 854 struct cpu_info *oci; 855 856 KDASSERT(p->p_addr != NULL); 857 858 oci = p->p_addr->u_pcb.pcb_fpcpu; 859 if (oci == NULL) 860 return; 861 862 IPRINTF(("%s: fp proc %s %lx\n", ci->ci_dev.dv_xname, 863 save ? "save" : "flush", (u_long)p)); 864 865 #if defined(MULTIPROCESSOR) 866 if (oci == ci) { 867 int s = splipi(); 868 npxsave_cpu(ci, save); 869 splx(s); 870 } else { 871 IPRINTF(("%s: fp ipi to %s %s %lx\n", ci->ci_dev.dv_xname, 872 oci->ci_dev.dv_xname, save ? "save" : "flush", (u_long)p)); 873 874 oci->ci_fpsaveproc = p; 875 i386_send_ipi(oci, 876 save ? I386_IPI_SYNCH_FPU : I386_IPI_FLUSH_FPU); 877 while (p->p_addr->u_pcb.pcb_fpcpu != NULL) 878 SPINLOCK_SPIN_HOOK; 879 } 880 #else 881 KASSERT(ci->ci_fpcurproc == p); 882 npxsave_cpu(ci, save); 883 #endif 884 } 885 886 void 887 fpu_kernel_enter(void) 888 { 889 struct cpu_info *ci = curcpu(); 890 uint32_t cw; 891 int s; 892 893 /* 894 * Fast path. If the kernel was using the FPU before, there 895 * is no work to do besides clearing TS. 896 */ 897 if (ci->ci_fpcurproc == &proc0) { 898 clts(); 899 return; 900 } 901 902 s = splipi(); 903 904 if (ci->ci_fpcurproc != NULL) { 905 npxsave_cpu(ci, 1); 906 uvmexp.fpswtch++; 907 } 908 909 /* Claim the FPU */ 910 ci->ci_fpcurproc = &proc0; 911 912 splx(s); 913 914 /* Disable DNA exceptions */ 915 clts(); 916 917 /* Initialize the FPU */ 918 fninit(); 919 cw = __INITIAL_NPXCW__; 920 fldcw(&cw); 921 if (i386_has_sse || i386_has_sse2) { 922 cw = __INITIAL_MXCSR__; 923 ldmxcsr(&cw); 924 } 925 } 926 927 void 928 fpu_kernel_exit(void) 929 { 930 /* Enable DNA exceptions */ 931 stts(); 932 } 933