1 /*- 2 * Copyright (c) 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (C) 1994, David Greenman 5 * Copyright (c) 2008 The DragonFly Project. 6 * Copyright (c) 2008 Jordan Gordeev. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * the University of Utah, and William Jolitz. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 * 39 * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 40 * $FreeBSD: src/sys/i386/i386/trap.c,v 1.147.2.11 2003/02/27 19:09:59 luoqi Exp $ 41 */ 42 43 /* 44 * x86_64 Trap and System call handling 45 */ 46 47 #include "use_isa.h" 48 49 #include "opt_ddb.h" 50 #include "opt_ktrace.h" 51 52 #include <machine/frame.h> 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/kernel.h> 56 #include <sys/kerneldump.h> 57 #include <sys/proc.h> 58 #include <sys/pioctl.h> 59 #include <sys/types.h> 60 #include <sys/signal2.h> 61 #include <sys/syscall.h> 62 #include <sys/sysctl.h> 63 #include <sys/sysent.h> 64 #include <sys/systm.h> 65 #ifdef KTRACE 66 #include <sys/ktrace.h> 67 #endif 68 #include <sys/ktr.h> 69 #include <sys/sysmsg.h> 70 #include <sys/sysproto.h> 71 #include <sys/sysunion.h> 72 73 #include <vm/pmap.h> 74 #include <vm/vm.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_kern.h> 77 #include <vm/vm_param.h> 78 #include <machine/cpu.h> 79 #include <machine/pcb.h> 80 #include <machine/smp.h> 81 #include <machine/thread.h> 82 #include <machine/clock.h> 83 #include <machine/vmparam.h> 84 #include <machine/md_var.h> 85 #include <machine_base/isa/isa_intr.h> 86 #include <machine_base/apic/lapic.h> 87 88 #include <ddb/ddb.h> 89 90 #include <sys/thread2.h> 91 #include <sys/mplock2.h> 92 93 #ifdef SMP 94 95 #define MAKEMPSAFE(have_mplock) \ 96 if (have_mplock == 0) { \ 97 get_mplock(); \ 98 have_mplock = 1; \ 99 } 100 101 #else 102 103 #define MAKEMPSAFE(have_mplock) 104 105 #endif 106 107 extern void trap(struct trapframe *frame); 108 109 static int trap_pfault(struct trapframe *, int); 110 static void trap_fatal(struct trapframe *, vm_offset_t); 111 void dblfault_handler(struct trapframe *frame); 112 113 #define MAX_TRAP_MSG 30 114 static char *trap_msg[] = { 115 "", /* 0 unused */ 116 "privileged instruction fault", /* 1 T_PRIVINFLT */ 117 "", /* 2 unused */ 118 "breakpoint instruction fault", /* 3 T_BPTFLT */ 119 "", /* 4 unused */ 120 "", /* 5 unused */ 121 "arithmetic trap", /* 6 T_ARITHTRAP */ 122 "system forced exception", /* 7 T_ASTFLT */ 123 "", /* 8 unused */ 124 "general protection fault", /* 9 T_PROTFLT */ 125 "trace trap", /* 10 T_TRCTRAP */ 126 "", /* 11 unused */ 127 "page fault", /* 12 T_PAGEFLT */ 128 "", /* 13 unused */ 129 "alignment fault", /* 14 T_ALIGNFLT */ 130 "", /* 15 unused */ 131 "", /* 16 unused */ 132 "", /* 17 unused */ 133 "integer divide fault", /* 18 T_DIVIDE */ 134 "non-maskable interrupt trap", /* 19 T_NMI */ 135 "overflow trap", /* 20 T_OFLOW */ 136 "FPU bounds check fault", /* 21 T_BOUND */ 137 "FPU device not available", /* 22 T_DNA */ 138 "double fault", /* 23 T_DOUBLEFLT */ 139 "FPU operand fetch fault", /* 24 T_FPOPFLT */ 140 "invalid TSS fault", /* 25 T_TSSFLT */ 141 "segment not present fault", /* 26 T_SEGNPFLT */ 142 "stack fault", /* 27 T_STKFLT */ 143 "machine check trap", /* 28 T_MCHK */ 144 "SIMD floating-point exception", /* 29 T_XMMFLT */ 145 "reserved (unknown) fault", /* 30 T_RESERVED */ 146 }; 147 148 #ifdef DDB 149 static int ddb_on_nmi = 1; 150 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_nmi, CTLFLAG_RW, 151 &ddb_on_nmi, 0, "Go to DDB on NMI"); 152 static int ddb_on_seg_fault = 0; 153 SYSCTL_INT(_machdep, OID_AUTO, ddb_on_seg_fault, CTLFLAG_RW, 154 &ddb_on_seg_fault, 0, "Go to DDB on user seg-fault"); 155 static int freeze_on_seg_fault = 0; 156 SYSCTL_INT(_machdep, OID_AUTO, freeze_on_seg_fault, CTLFLAG_RW, 157 &freeze_on_seg_fault, 0, "Go to DDB on user seg-fault"); 158 #endif 159 static int panic_on_nmi = 1; 160 SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, 161 &panic_on_nmi, 0, "Panic on NMI"); 162 static int fast_release; 163 SYSCTL_INT(_machdep, OID_AUTO, fast_release, CTLFLAG_RW, 164 &fast_release, 0, "Passive Release was optimal"); 165 static int slow_release; 166 SYSCTL_INT(_machdep, OID_AUTO, slow_release, CTLFLAG_RW, 167 &slow_release, 0, "Passive Release was nonoptimal"); 168 169 /* 170 * System call debugging records the worst-case system call 171 * overhead (inclusive of blocking), but may be inaccurate. 172 */ 173 /*#define SYSCALL_DEBUG*/ 174 #ifdef SYSCALL_DEBUG 175 uint64_t SysCallsWorstCase[SYS_MAXSYSCALL]; 176 #endif 177 178 /* 179 * Passively intercepts the thread switch function to increase 180 * the thread priority from a user priority to a kernel priority, reducing 181 * syscall and trap overhead for the case where no switch occurs. 182 * 183 * Synchronizes td_ucred with p_ucred. This is used by system calls, 184 * signal handling, faults, AST traps, and anything else that enters the 185 * kernel from userland and provides the kernel with a stable read-only 186 * copy of the process ucred. 187 */ 188 static __inline void 189 userenter(struct thread *curtd, struct proc *curp) 190 { 191 struct ucred *ocred; 192 struct ucred *ncred; 193 194 curtd->td_release = lwkt_passive_release; 195 196 if (curtd->td_ucred != curp->p_ucred) { 197 ncred = crhold(curp->p_ucred); 198 ocred = curtd->td_ucred; 199 curtd->td_ucred = ncred; 200 if (ocred) 201 crfree(ocred); 202 } 203 } 204 205 /* 206 * Handle signals, upcalls, profiling, and other AST's and/or tasks that 207 * must be completed before we can return to or try to return to userland. 208 * 209 * Note that td_sticks is a 64 bit quantity, but there's no point doing 64 210 * arithmatic on the delta calculation so the absolute tick values are 211 * truncated to an integer. 212 */ 213 static void 214 userret(struct lwp *lp, struct trapframe *frame, int sticks) 215 { 216 struct proc *p = lp->lwp_proc; 217 int sig; 218 219 /* 220 * Charge system time if profiling. Note: times are in microseconds. 221 * This may do a copyout and block, so do it first even though it 222 * means some system time will be charged as user time. 223 */ 224 if (p->p_flag & P_PROFIL) { 225 addupc_task(p, frame->tf_rip, 226 (u_int)((int)lp->lwp_thread->td_sticks - sticks)); 227 } 228 229 recheck: 230 /* 231 * If the jungle wants us dead, so be it. 232 */ 233 if (lp->lwp_flag & LWP_WEXIT) { 234 lwkt_gettoken(&p->p_token); 235 lwp_exit(0); 236 lwkt_reltoken(&p->p_token); /* NOT REACHED */ 237 } 238 239 /* 240 * Block here if we are in a stopped state. 241 */ 242 if (p->p_stat == SSTOP || dump_stop_usertds) { 243 get_mplock(); 244 tstop(); 245 rel_mplock(); 246 goto recheck; 247 } 248 249 /* 250 * Post any pending upcalls. If running a virtual kernel be sure 251 * to restore the virtual kernel's vmspace before posting the upcall. 252 */ 253 if (p->p_flag & (P_SIGVTALRM | P_SIGPROF | P_UPCALLPEND)) { 254 lwkt_gettoken(&p->p_token); 255 if (p->p_flag & P_SIGVTALRM) { 256 p->p_flag &= ~P_SIGVTALRM; 257 ksignal(p, SIGVTALRM); 258 } 259 if (p->p_flag & P_SIGPROF) { 260 p->p_flag &= ~P_SIGPROF; 261 ksignal(p, SIGPROF); 262 } 263 if (p->p_flag & P_UPCALLPEND) { 264 p->p_flag &= ~P_UPCALLPEND; 265 postupcall(lp); 266 } 267 lwkt_reltoken(&p->p_token); 268 goto recheck; 269 } 270 271 /* 272 * Post any pending signals. If running a virtual kernel be sure 273 * to restore the virtual kernel's vmspace before posting the signal. 274 * 275 * WARNING! postsig() can exit and not return. 276 */ 277 if ((sig = CURSIG_TRACE(lp)) != 0) { 278 lwkt_gettoken(&p->p_token); 279 postsig(sig); 280 lwkt_reltoken(&p->p_token); 281 goto recheck; 282 } 283 284 /* 285 * block here if we are swapped out, but still process signals 286 * (such as SIGKILL). proc0 (the swapin scheduler) is already 287 * aware of our situation, we do not have to wake it up. 288 */ 289 if (p->p_flag & P_SWAPPEDOUT) { 290 lwkt_gettoken(&p->p_token); 291 get_mplock(); 292 p->p_flag |= P_SWAPWAIT; 293 swapin_request(); 294 if (p->p_flag & P_SWAPWAIT) 295 tsleep(p, PCATCH, "SWOUT", 0); 296 p->p_flag &= ~P_SWAPWAIT; 297 rel_mplock(); 298 lwkt_reltoken(&p->p_token); 299 goto recheck; 300 } 301 302 /* 303 * Make sure postsig() handled request to restore old signal mask after 304 * running signal handler. 305 */ 306 KKASSERT((lp->lwp_flag & LWP_OLDMASK) == 0); 307 } 308 309 /* 310 * Cleanup from userenter and any passive release that might have occured. 311 * We must reclaim the current-process designation before we can return 312 * to usermode. We also handle both LWKT and USER reschedule requests. 313 */ 314 static __inline void 315 userexit(struct lwp *lp) 316 { 317 struct thread *td = lp->lwp_thread; 318 /* globaldata_t gd = td->td_gd;*/ 319 320 /* 321 * Handle stop requests at kernel priority. Any requests queued 322 * after this loop will generate another AST. 323 */ 324 while (lp->lwp_proc->p_stat == SSTOP) { 325 get_mplock(); 326 tstop(); 327 rel_mplock(); 328 } 329 330 /* 331 * Reduce our priority in preparation for a return to userland. If 332 * our passive release function was still in place, our priority was 333 * never raised and does not need to be reduced. 334 */ 335 lwkt_passive_recover(td); 336 337 /* 338 * Become the current user scheduled process if we aren't already, 339 * and deal with reschedule requests and other factors. 340 */ 341 lp->lwp_proc->p_usched->acquire_curproc(lp); 342 /* WARNING: we may have migrated cpu's */ 343 /* gd = td->td_gd; */ 344 } 345 346 #if !defined(KTR_KERNENTRY) 347 #define KTR_KERNENTRY KTR_ALL 348 #endif 349 KTR_INFO_MASTER(kernentry); 350 KTR_INFO(KTR_KERNENTRY, kernentry, trap, 0, "STR", 351 sizeof(long) + sizeof(long) + sizeof(long) + sizeof(vm_offset_t)); 352 KTR_INFO(KTR_KERNENTRY, kernentry, trap_ret, 0, "STR", 353 sizeof(long) + sizeof(long)); 354 KTR_INFO(KTR_KERNENTRY, kernentry, syscall, 0, "STR", 355 sizeof(long) + sizeof(long) + sizeof(long)); 356 KTR_INFO(KTR_KERNENTRY, kernentry, syscall_ret, 0, "STR", 357 sizeof(long) + sizeof(long) + sizeof(long)); 358 KTR_INFO(KTR_KERNENTRY, kernentry, fork_ret, 0, "STR", 359 sizeof(long) + sizeof(long)); 360 361 /* 362 * Exception, fault, and trap interface to the kernel. 363 * This common code is called from assembly language IDT gate entry 364 * routines that prepare a suitable stack frame, and restore this 365 * frame after the exception has been processed. 366 * 367 * This function is also called from doreti in an interlock to handle ASTs. 368 * For example: hardwareint->INTROUTINE->(set ast)->doreti->trap 369 * 370 * NOTE! We have to retrieve the fault address prior to obtaining the 371 * MP lock because get_mplock() may switch out. YYY cr2 really ought 372 * to be retrieved by the assembly code, not here. 373 * 374 * XXX gd_trap_nesting_level currently prevents lwkt_switch() from panicing 375 * if an attempt is made to switch from a fast interrupt or IPI. This is 376 * necessary to properly take fatal kernel traps on SMP machines if 377 * get_mplock() has to block. 378 */ 379 380 void 381 trap(struct trapframe *frame) 382 { 383 struct globaldata *gd = mycpu; 384 struct thread *td = gd->gd_curthread; 385 struct lwp *lp = td->td_lwp; 386 struct proc *p; 387 int sticks = 0; 388 int i = 0, ucode = 0, type, code; 389 #ifdef SMP 390 int have_mplock = 0; 391 #endif 392 #ifdef INVARIANTS 393 int crit_count = td->td_critcount; 394 lwkt_tokref_t curstop = td->td_toks_stop; 395 #endif 396 vm_offset_t eva; 397 398 p = td->td_proc; 399 clear_quickret(); 400 401 #ifdef DDB 402 /* 403 * We need to allow T_DNA faults when the debugger is active since 404 * some dumping paths do large bcopy() which use the floating 405 * point registers for faster copying. 406 */ 407 if (db_active && frame->tf_trapno != T_DNA) { 408 eva = (frame->tf_trapno == T_PAGEFLT ? frame->tf_addr : 0); 409 ++gd->gd_trap_nesting_level; 410 MAKEMPSAFE(have_mplock); 411 trap_fatal(frame, eva); 412 --gd->gd_trap_nesting_level; 413 goto out2; 414 } 415 #endif 416 417 eva = 0; 418 419 if ((frame->tf_rflags & PSL_I) == 0) { 420 /* 421 * Buggy application or kernel code has disabled interrupts 422 * and then trapped. Enabling interrupts now is wrong, but 423 * it is better than running with interrupts disabled until 424 * they are accidentally enabled later. 425 */ 426 type = frame->tf_trapno; 427 if (ISPL(frame->tf_cs) == SEL_UPL) { 428 MAKEMPSAFE(have_mplock); 429 /* JG curproc can be NULL */ 430 kprintf( 431 "pid %ld (%s): trap %d with interrupts disabled\n", 432 (long)curproc->p_pid, curproc->p_comm, type); 433 } else if (type != T_NMI && type != T_BPTFLT && 434 type != T_TRCTRAP) { 435 /* 436 * XXX not quite right, since this may be for a 437 * multiple fault in user mode. 438 */ 439 MAKEMPSAFE(have_mplock); 440 kprintf("kernel trap %d with interrupts disabled\n", 441 type); 442 } 443 cpu_enable_intr(); 444 } 445 446 type = frame->tf_trapno; 447 code = frame->tf_err; 448 449 if (ISPL(frame->tf_cs) == SEL_UPL) { 450 /* user trap */ 451 452 KTR_LOG(kernentry_trap, p->p_pid, lp->lwp_tid, 453 frame->tf_trapno, eva); 454 455 userenter(td, p); 456 457 sticks = (int)td->td_sticks; 458 KASSERT(lp->lwp_md.md_regs == frame, 459 ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); 460 461 switch (type) { 462 case T_PRIVINFLT: /* privileged instruction fault */ 463 ucode = ILL_PRVOPC; 464 i = SIGILL; 465 break; 466 467 case T_BPTFLT: /* bpt instruction fault */ 468 case T_TRCTRAP: /* trace trap */ 469 frame->tf_rflags &= ~PSL_T; 470 ucode = TRAP_TRACE; 471 i = SIGTRAP; 472 break; 473 474 case T_ARITHTRAP: /* arithmetic trap */ 475 ucode = code; 476 i = SIGFPE; 477 #if 0 478 #if JG 479 ucode = fputrap(); 480 #else 481 ucode = code; 482 #endif 483 i = SIGFPE; 484 #endif 485 break; 486 487 case T_ASTFLT: /* Allow process switch */ 488 mycpu->gd_cnt.v_soft++; 489 if (mycpu->gd_reqflags & RQF_AST_OWEUPC) { 490 atomic_clear_int(&mycpu->gd_reqflags, 491 RQF_AST_OWEUPC); 492 addupc_task(p, p->p_prof.pr_addr, 493 p->p_prof.pr_ticks); 494 } 495 goto out; 496 497 case T_PROTFLT: /* general protection fault */ 498 i = SIGBUS; 499 ucode = BUS_OBJERR; 500 break; 501 case T_SEGNPFLT: /* segment not present fault */ 502 i = SIGBUS; 503 ucode = BUS_ADRERR; 504 break; 505 case T_TSSFLT: /* invalid TSS fault */ 506 case T_DOUBLEFLT: /* double fault */ 507 i = SIGBUS; 508 ucode = BUS_OBJERR; 509 default: 510 #if 0 511 ucode = code + BUS_SEGM_FAULT ; /* XXX: ???*/ 512 #endif 513 ucode = BUS_OBJERR; 514 i = SIGBUS; 515 break; 516 517 case T_PAGEFLT: /* page fault */ 518 i = trap_pfault(frame, TRUE); 519 if (frame->tf_rip == 0) { 520 kprintf("T_PAGEFLT: Warning %%rip == 0!\n"); 521 while (freeze_on_seg_fault) { 522 tsleep(p, 0, "freeze", hz * 20); 523 } 524 } 525 if (i == -1) 526 goto out; 527 if (i == 0) 528 goto out; 529 530 #if 0 531 ucode = T_PAGEFLT; 532 #endif 533 if (i == SIGSEGV) 534 ucode = SEGV_MAPERR; 535 else 536 ucode = BUS_ADRERR; 537 break; 538 539 case T_DIVIDE: /* integer divide fault */ 540 ucode = FPE_INTDIV; 541 i = SIGFPE; 542 break; 543 544 #if NISA > 0 545 case T_NMI: 546 MAKEMPSAFE(have_mplock); 547 /* machine/parity/power fail/"kitchen sink" faults */ 548 if (isa_nmi(code) == 0) { 549 #ifdef DDB 550 /* 551 * NMI can be hooked up to a pushbutton 552 * for debugging. 553 */ 554 if (ddb_on_nmi) { 555 kprintf ("NMI ... going to debugger\n"); 556 kdb_trap(type, 0, frame); 557 } 558 #endif /* DDB */ 559 goto out2; 560 } else if (panic_on_nmi) 561 panic("NMI indicates hardware failure"); 562 break; 563 #endif /* NISA > 0 */ 564 565 case T_OFLOW: /* integer overflow fault */ 566 ucode = FPE_INTOVF; 567 i = SIGFPE; 568 break; 569 570 case T_BOUND: /* bounds check fault */ 571 ucode = FPE_FLTSUB; 572 i = SIGFPE; 573 break; 574 575 case T_DNA: 576 /* 577 * Virtual kernel intercept - pass the DNA exception 578 * to the virtual kernel if it asked to handle it. 579 * This occurs when the virtual kernel is holding 580 * onto the FP context for a different emulated 581 * process then the one currently running. 582 * 583 * We must still call npxdna() since we may have 584 * saved FP state that the virtual kernel needs 585 * to hand over to a different emulated process. 586 */ 587 if (lp->lwp_vkernel && lp->lwp_vkernel->ve && 588 (td->td_pcb->pcb_flags & FP_VIRTFP) 589 ) { 590 npxdna(); 591 break; 592 } 593 594 /* 595 * The kernel may have switched out the FP unit's 596 * state, causing the user process to take a fault 597 * when it tries to use the FP unit. Restore the 598 * state here 599 */ 600 if (npxdna()) 601 goto out; 602 i = SIGFPE; 603 ucode = FPE_FPU_NP_TRAP; 604 break; 605 606 case T_FPOPFLT: /* FPU operand fetch fault */ 607 ucode = ILL_COPROC; 608 i = SIGILL; 609 break; 610 611 case T_XMMFLT: /* SIMD floating-point exception */ 612 ucode = 0; /* XXX */ 613 i = SIGFPE; 614 break; 615 } 616 } else { 617 /* kernel trap */ 618 619 switch (type) { 620 case T_PAGEFLT: /* page fault */ 621 trap_pfault(frame, FALSE); 622 goto out2; 623 624 case T_DNA: 625 /* 626 * The kernel is apparently using fpu for copying. 627 * XXX this should be fatal unless the kernel has 628 * registered such use. 629 */ 630 if (npxdna()) 631 goto out2; 632 break; 633 634 case T_STKFLT: /* stack fault */ 635 break; 636 637 case T_PROTFLT: /* general protection fault */ 638 case T_SEGNPFLT: /* segment not present fault */ 639 /* 640 * Invalid segment selectors and out of bounds 641 * %rip's and %rsp's can be set up in user mode. 642 * This causes a fault in kernel mode when the 643 * kernel tries to return to user mode. We want 644 * to get this fault so that we can fix the 645 * problem here and not have to check all the 646 * selectors and pointers when the user changes 647 * them. 648 */ 649 if (mycpu->gd_intr_nesting_level == 0) { 650 if (td->td_pcb->pcb_onfault) { 651 frame->tf_rip = (register_t) 652 td->td_pcb->pcb_onfault; 653 goto out2; 654 } 655 if (frame->tf_rip == (long)doreti_iret) { 656 frame->tf_rip = (long)doreti_iret_fault; 657 goto out2; 658 } 659 } 660 break; 661 662 case T_TSSFLT: 663 /* 664 * PSL_NT can be set in user mode and isn't cleared 665 * automatically when the kernel is entered. This 666 * causes a TSS fault when the kernel attempts to 667 * `iret' because the TSS link is uninitialized. We 668 * want to get this fault so that we can fix the 669 * problem here and not every time the kernel is 670 * entered. 671 */ 672 if (frame->tf_rflags & PSL_NT) { 673 frame->tf_rflags &= ~PSL_NT; 674 goto out2; 675 } 676 break; 677 678 case T_TRCTRAP: /* trace trap */ 679 #if 0 680 if (frame->tf_rip == (int)IDTVEC(syscall)) { 681 /* 682 * We've just entered system mode via the 683 * syscall lcall. Continue single stepping 684 * silently until the syscall handler has 685 * saved the flags. 686 */ 687 goto out2; 688 } 689 if (frame->tf_rip == (int)IDTVEC(syscall) + 1) { 690 /* 691 * The syscall handler has now saved the 692 * flags. Stop single stepping it. 693 */ 694 frame->tf_rflags &= ~PSL_T; 695 goto out2; 696 } 697 #endif 698 699 /* 700 * Ignore debug register trace traps due to 701 * accesses in the user's address space, which 702 * can happen under several conditions such as 703 * if a user sets a watchpoint on a buffer and 704 * then passes that buffer to a system call. 705 * We still want to get TRCTRAPS for addresses 706 * in kernel space because that is useful when 707 * debugging the kernel. 708 */ 709 #if JG 710 if (user_dbreg_trap()) { 711 /* 712 * Reset breakpoint bits because the 713 * processor doesn't 714 */ 715 /* XXX check upper bits here */ 716 load_dr6(rdr6() & 0xfffffff0); 717 goto out2; 718 } 719 #endif 720 /* 721 * FALLTHROUGH (TRCTRAP kernel mode, kernel address) 722 */ 723 case T_BPTFLT: 724 /* 725 * If DDB is enabled, let it handle the debugger trap. 726 * Otherwise, debugger traps "can't happen". 727 */ 728 ucode = TRAP_BRKPT; 729 #ifdef DDB 730 MAKEMPSAFE(have_mplock); 731 if (kdb_trap(type, 0, frame)) 732 goto out2; 733 #endif 734 break; 735 736 #if NISA > 0 737 case T_NMI: 738 MAKEMPSAFE(have_mplock); 739 /* machine/parity/power fail/"kitchen sink" faults */ 740 if (isa_nmi(code) == 0) { 741 #ifdef DDB 742 /* 743 * NMI can be hooked up to a pushbutton 744 * for debugging. 745 */ 746 if (ddb_on_nmi) { 747 kprintf ("NMI ... going to debugger\n"); 748 kdb_trap(type, 0, frame); 749 } 750 #endif /* DDB */ 751 goto out2; 752 } else if (panic_on_nmi == 0) 753 goto out2; 754 /* FALL THROUGH */ 755 #endif /* NISA > 0 */ 756 } 757 MAKEMPSAFE(have_mplock); 758 trap_fatal(frame, 0); 759 goto out2; 760 } 761 762 /* 763 * Virtual kernel intercept - if the fault is directly related to a 764 * VM context managed by a virtual kernel then let the virtual kernel 765 * handle it. 766 */ 767 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 768 vkernel_trap(lp, frame); 769 goto out; 770 } 771 772 /* 773 * Translate fault for emulators (e.g. Linux) 774 */ 775 if (*p->p_sysent->sv_transtrap) 776 i = (*p->p_sysent->sv_transtrap)(i, type); 777 778 MAKEMPSAFE(have_mplock); 779 trapsignal(lp, i, ucode); 780 781 #ifdef DEBUG 782 if (type <= MAX_TRAP_MSG) { 783 uprintf("fatal process exception: %s", 784 trap_msg[type]); 785 if ((type == T_PAGEFLT) || (type == T_PROTFLT)) 786 uprintf(", fault VA = 0x%lx", frame->tf_addr); 787 uprintf("\n"); 788 } 789 #endif 790 791 out: 792 userret(lp, frame, sticks); 793 userexit(lp); 794 out2: ; 795 #ifdef SMP 796 if (have_mplock) 797 rel_mplock(); 798 #endif 799 if (p != NULL && lp != NULL) 800 KTR_LOG(kernentry_trap_ret, p->p_pid, lp->lwp_tid); 801 #ifdef INVARIANTS 802 KASSERT(crit_count == td->td_critcount, 803 ("trap: critical section count mismatch! %d/%d", 804 crit_count, td->td_pri)); 805 KASSERT(curstop == td->td_toks_stop, 806 ("trap: extra tokens held after trap! %ld/%ld", 807 curstop - &td->td_toks_base, 808 td->td_toks_stop - &td->td_toks_base)); 809 #endif 810 } 811 812 static int 813 trap_pfault(struct trapframe *frame, int usermode) 814 { 815 vm_offset_t va; 816 struct vmspace *vm = NULL; 817 vm_map_t map; 818 int rv = 0; 819 int fault_flags; 820 vm_prot_t ftype; 821 thread_t td = curthread; 822 struct lwp *lp = td->td_lwp; 823 struct proc *p; 824 825 va = trunc_page(frame->tf_addr); 826 if (va >= VM_MIN_KERNEL_ADDRESS) { 827 /* 828 * Don't allow user-mode faults in kernel address space. 829 */ 830 if (usermode) { 831 fault_flags = -1; 832 ftype = -1; 833 goto nogo; 834 } 835 836 map = &kernel_map; 837 } else { 838 /* 839 * This is a fault on non-kernel virtual memory. 840 * vm is initialized above to NULL. If curproc is NULL 841 * or curproc->p_vmspace is NULL the fault is fatal. 842 */ 843 if (lp != NULL) 844 vm = lp->lwp_vmspace; 845 846 if (vm == NULL) { 847 fault_flags = -1; 848 ftype = -1; 849 goto nogo; 850 } 851 852 map = &vm->vm_map; 853 } 854 855 /* 856 * PGEX_I is defined only if the execute disable bit capability is 857 * supported and enabled. 858 */ 859 if (frame->tf_err & PGEX_W) 860 ftype = VM_PROT_WRITE; 861 #if JG 862 else if ((frame->tf_err & PGEX_I) && pg_nx != 0) 863 ftype = VM_PROT_EXECUTE; 864 #endif 865 else 866 ftype = VM_PROT_READ; 867 868 if (map != &kernel_map) { 869 /* 870 * Keep swapout from messing with us during this 871 * critical time. 872 */ 873 PHOLD(lp->lwp_proc); 874 875 /* 876 * Issue fault 877 */ 878 fault_flags = 0; 879 if (usermode) 880 fault_flags |= VM_FAULT_BURST; 881 if (ftype & VM_PROT_WRITE) 882 fault_flags |= VM_FAULT_DIRTY; 883 else 884 fault_flags |= VM_FAULT_NORMAL; 885 rv = vm_fault(map, va, ftype, fault_flags); 886 887 PRELE(lp->lwp_proc); 888 } else { 889 /* 890 * Don't have to worry about process locking or stacks 891 * in the kernel. 892 */ 893 fault_flags = VM_FAULT_NORMAL; 894 rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); 895 } 896 897 if (rv == KERN_SUCCESS) 898 return (0); 899 nogo: 900 if (!usermode) { 901 if (td->td_gd->gd_intr_nesting_level == 0 && 902 td->td_pcb->pcb_onfault) { 903 frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; 904 return (0); 905 } 906 trap_fatal(frame, frame->tf_addr); 907 return (-1); 908 } 909 910 /* 911 * NOTE: on x86_64 we have a tf_addr field in the trapframe, no 912 * kludge is needed to pass the fault address to signal handlers. 913 */ 914 p = td->td_proc; 915 if (td->td_lwp->lwp_vkernel == NULL) { 916 if (bootverbose || freeze_on_seg_fault || ddb_on_seg_fault) { 917 kprintf("seg-fault ft=%04x ff=%04x addr=%p rip=%p " 918 "pid=%d p_comm=%s\n", 919 ftype, fault_flags, 920 (void *)frame->tf_addr, 921 (void *)frame->tf_rip, 922 p->p_pid, p->p_comm); 923 } 924 #ifdef DDB 925 while (freeze_on_seg_fault) { 926 tsleep(p, 0, "freeze", hz * 20); 927 } 928 if (ddb_on_seg_fault) 929 Debugger("ddb_on_seg_fault"); 930 #endif 931 } 932 933 return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); 934 } 935 936 static void 937 trap_fatal(struct trapframe *frame, vm_offset_t eva) 938 { 939 int code, ss; 940 u_int type; 941 long rsp; 942 struct soft_segment_descriptor softseg; 943 char *msg; 944 945 code = frame->tf_err; 946 type = frame->tf_trapno; 947 sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); 948 949 if (type <= MAX_TRAP_MSG) 950 msg = trap_msg[type]; 951 else 952 msg = "UNKNOWN"; 953 kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, 954 ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); 955 #ifdef SMP 956 /* three separate prints in case of a trap on an unmapped page */ 957 kprintf("cpuid = %d; ", mycpu->gd_cpuid); 958 kprintf("lapic->id = %08x\n", lapic->id); 959 #endif 960 if (type == T_PAGEFLT) { 961 kprintf("fault virtual address = 0x%lx\n", eva); 962 kprintf("fault code = %s %s %s, %s\n", 963 code & PGEX_U ? "user" : "supervisor", 964 code & PGEX_W ? "write" : "read", 965 code & PGEX_I ? "instruction" : "data", 966 code & PGEX_P ? "protection violation" : "page not present"); 967 } 968 kprintf("instruction pointer = 0x%lx:0x%lx\n", 969 frame->tf_cs & 0xffff, frame->tf_rip); 970 if (ISPL(frame->tf_cs) == SEL_UPL) { 971 ss = frame->tf_ss & 0xffff; 972 rsp = frame->tf_rsp; 973 } else { 974 ss = GSEL(GDATA_SEL, SEL_KPL); 975 rsp = (long)&frame->tf_rsp; 976 } 977 kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); 978 kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); 979 kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", 980 softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); 981 kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", 982 softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, 983 softseg.ssd_gran); 984 kprintf("processor eflags = "); 985 if (frame->tf_rflags & PSL_T) 986 kprintf("trace trap, "); 987 if (frame->tf_rflags & PSL_I) 988 kprintf("interrupt enabled, "); 989 if (frame->tf_rflags & PSL_NT) 990 kprintf("nested task, "); 991 if (frame->tf_rflags & PSL_RF) 992 kprintf("resume, "); 993 kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); 994 kprintf("current process = "); 995 if (curproc) { 996 kprintf("%lu\n", 997 (u_long)curproc->p_pid); 998 } else { 999 kprintf("Idle\n"); 1000 } 1001 kprintf("current thread = pri %d ", curthread->td_pri); 1002 if (curthread->td_critcount) 1003 kprintf("(CRIT)"); 1004 kprintf("\n"); 1005 1006 #ifdef DDB 1007 if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) 1008 return; 1009 #endif 1010 kprintf("trap number = %d\n", type); 1011 if (type <= MAX_TRAP_MSG) 1012 panic("%s", trap_msg[type]); 1013 else 1014 panic("unknown/reserved trap"); 1015 } 1016 1017 /* 1018 * Double fault handler. Called when a fault occurs while writing 1019 * a frame for a trap/exception onto the stack. This usually occurs 1020 * when the stack overflows (such is the case with infinite recursion, 1021 * for example). 1022 */ 1023 static __inline 1024 int 1025 in_kstack_guard(register_t rptr) 1026 { 1027 thread_t td = curthread; 1028 1029 if ((char *)rptr >= td->td_kstack && 1030 (char *)rptr < td->td_kstack + PAGE_SIZE) { 1031 return 1; 1032 } 1033 return 0; 1034 } 1035 1036 void 1037 dblfault_handler(struct trapframe *frame) 1038 { 1039 thread_t td = curthread; 1040 1041 if (in_kstack_guard(frame->tf_rsp) || in_kstack_guard(frame->tf_rbp)) { 1042 kprintf("DOUBLE FAULT - KERNEL STACK GUARD HIT!\n"); 1043 if (in_kstack_guard(frame->tf_rsp)) 1044 frame->tf_rsp = (register_t)(td->td_kstack + PAGE_SIZE); 1045 if (in_kstack_guard(frame->tf_rbp)) 1046 frame->tf_rbp = (register_t)(td->td_kstack + PAGE_SIZE); 1047 } else { 1048 kprintf("DOUBLE FAULT\n"); 1049 } 1050 kprintf("\nFatal double fault\n"); 1051 kprintf("rip = 0x%lx\n", frame->tf_rip); 1052 kprintf("rsp = 0x%lx\n", frame->tf_rsp); 1053 kprintf("rbp = 0x%lx\n", frame->tf_rbp); 1054 #ifdef SMP 1055 /* three separate prints in case of a trap on an unmapped page */ 1056 kprintf("cpuid = %d; ", mycpu->gd_cpuid); 1057 kprintf("lapic->id = %08x\n", lapic->id); 1058 #endif 1059 panic("double fault"); 1060 } 1061 1062 /* 1063 * syscall2 - MP aware system call request C handler 1064 * 1065 * A system call is essentially treated as a trap except that the 1066 * MP lock is not held on entry or return. We are responsible for 1067 * obtaining the MP lock if necessary and for handling ASTs 1068 * (e.g. a task switch) prior to return. 1069 * 1070 * MPSAFE 1071 */ 1072 void 1073 syscall2(struct trapframe *frame) 1074 { 1075 struct thread *td = curthread; 1076 struct proc *p = td->td_proc; 1077 struct lwp *lp = td->td_lwp; 1078 caddr_t params; 1079 struct sysent *callp; 1080 register_t orig_tf_rflags; 1081 int sticks; 1082 int error; 1083 int narg; 1084 #ifdef INVARIANTS 1085 int crit_count = td->td_critcount; 1086 #endif 1087 #ifdef SMP 1088 int have_mplock = 0; 1089 #endif 1090 register_t *argp; 1091 u_int code; 1092 int reg, regcnt; 1093 union sysunion args; 1094 register_t *argsdst; 1095 1096 mycpu->gd_cnt.v_syscall++; 1097 1098 #ifdef DIAGNOSTIC 1099 if (ISPL(frame->tf_cs) != SEL_UPL) { 1100 get_mplock(); 1101 panic("syscall"); 1102 /* NOT REACHED */ 1103 } 1104 #endif 1105 1106 KTR_LOG(kernentry_syscall, p->p_pid, lp->lwp_tid, 1107 frame->tf_rax); 1108 1109 userenter(td, p); /* lazy raise our priority */ 1110 1111 reg = 0; 1112 regcnt = 6; 1113 /* 1114 * Misc 1115 */ 1116 sticks = (int)td->td_sticks; 1117 orig_tf_rflags = frame->tf_rflags; 1118 1119 /* 1120 * Virtual kernel intercept - if a VM context managed by a virtual 1121 * kernel issues a system call the virtual kernel handles it, not us. 1122 * Restore the virtual kernel context and return from its system 1123 * call. The current frame is copied out to the virtual kernel. 1124 */ 1125 if (lp->lwp_vkernel && lp->lwp_vkernel->ve) { 1126 vkernel_trap(lp, frame); 1127 error = EJUSTRETURN; 1128 goto out; 1129 } 1130 1131 /* 1132 * Get the system call parameters and account for time 1133 */ 1134 KASSERT(lp->lwp_md.md_regs == frame, 1135 ("Frame mismatch %p %p", lp->lwp_md.md_regs, frame)); 1136 params = (caddr_t)frame->tf_rsp + sizeof(register_t); 1137 code = frame->tf_rax; 1138 1139 if (p->p_sysent->sv_prepsyscall) { 1140 (*p->p_sysent->sv_prepsyscall)( 1141 frame, (int *)(&args.nosys.sysmsg + 1), 1142 &code, ¶ms); 1143 } else { 1144 if (code == SYS_syscall || code == SYS___syscall) { 1145 code = frame->tf_rdi; 1146 reg++; 1147 regcnt--; 1148 } 1149 } 1150 1151 if (p->p_sysent->sv_mask) 1152 code &= p->p_sysent->sv_mask; 1153 1154 if (code >= p->p_sysent->sv_size) 1155 callp = &p->p_sysent->sv_table[0]; 1156 else 1157 callp = &p->p_sysent->sv_table[code]; 1158 1159 narg = callp->sy_narg & SYF_ARGMASK; 1160 1161 /* 1162 * On x86_64 we get up to six arguments in registers. The rest are 1163 * on the stack. The first six members of 'struct trapframe' happen 1164 * to be the registers used to pass arguments, in exactly the right 1165 * order. 1166 */ 1167 argp = &frame->tf_rdi; 1168 argp += reg; 1169 argsdst = (register_t *)(&args.nosys.sysmsg + 1); 1170 /* 1171 * JG can we overflow the space pointed to by 'argsdst' 1172 * either with 'bcopy' or with 'copyin'? 1173 */ 1174 bcopy(argp, argsdst, sizeof(register_t) * regcnt); 1175 /* 1176 * copyin is MP aware, but the tracing code is not 1177 */ 1178 if (narg > regcnt) { 1179 KASSERT(params != NULL, ("copyin args with no params!")); 1180 error = copyin(params, &argsdst[regcnt], 1181 (narg - regcnt) * sizeof(register_t)); 1182 if (error) { 1183 #ifdef KTRACE 1184 if (KTRPOINT(td, KTR_SYSCALL)) { 1185 MAKEMPSAFE(have_mplock); 1186 1187 ktrsyscall(lp, code, narg, 1188 (void *)(&args.nosys.sysmsg + 1)); 1189 } 1190 #endif 1191 goto bad; 1192 } 1193 } 1194 1195 #ifdef KTRACE 1196 if (KTRPOINT(td, KTR_SYSCALL)) { 1197 MAKEMPSAFE(have_mplock); 1198 ktrsyscall(lp, code, narg, (void *)(&args.nosys.sysmsg + 1)); 1199 } 1200 #endif 1201 1202 /* 1203 * Default return value is 0 (will be copied to %rax). Double-value 1204 * returns use %rax and %rdx. %rdx is left unchanged for system 1205 * calls which return only one result. 1206 */ 1207 args.sysmsg_fds[0] = 0; 1208 args.sysmsg_fds[1] = frame->tf_rdx; 1209 1210 /* 1211 * The syscall might manipulate the trap frame. If it does it 1212 * will probably return EJUSTRETURN. 1213 */ 1214 args.sysmsg_frame = frame; 1215 1216 STOPEVENT(p, S_SCE, narg); /* MP aware */ 1217 1218 /* 1219 * NOTE: All system calls run MPSAFE now. The system call itself 1220 * is responsible for getting the MP lock. 1221 */ 1222 #ifdef SYSCALL_DEBUG 1223 uint64_t tscval = rdtsc(); 1224 #endif 1225 error = (*callp->sy_call)(&args); 1226 #ifdef SYSCALL_DEBUG 1227 tscval = rdtsc() - tscval; 1228 tscval = tscval * 1000000 / tsc_frequency; 1229 if (SysCallsWorstCase[code] < tscval) 1230 SysCallsWorstCase[code] = tscval; 1231 #endif 1232 1233 out: 1234 /* 1235 * MP SAFE (we may or may not have the MP lock at this point) 1236 */ 1237 //kprintf("SYSMSG %d ", error); 1238 switch (error) { 1239 case 0: 1240 /* 1241 * Reinitialize proc pointer `p' as it may be different 1242 * if this is a child returning from fork syscall. 1243 */ 1244 p = curproc; 1245 lp = curthread->td_lwp; 1246 frame->tf_rax = args.sysmsg_fds[0]; 1247 frame->tf_rdx = args.sysmsg_fds[1]; 1248 frame->tf_rflags &= ~PSL_C; 1249 break; 1250 case ERESTART: 1251 /* 1252 * Reconstruct pc, we know that 'syscall' is 2 bytes. 1253 * We have to do a full context restore so that %r10 1254 * (which was holding the value of %rcx) is restored for 1255 * the next iteration. 1256 */ 1257 frame->tf_rip -= frame->tf_err; 1258 frame->tf_r10 = frame->tf_rcx; 1259 break; 1260 case EJUSTRETURN: 1261 break; 1262 case EASYNC: 1263 panic("Unexpected EASYNC return value (for now)"); 1264 default: 1265 bad: 1266 if (p->p_sysent->sv_errsize) { 1267 if (error >= p->p_sysent->sv_errsize) 1268 error = -1; /* XXX */ 1269 else 1270 error = p->p_sysent->sv_errtbl[error]; 1271 } 1272 frame->tf_rax = error; 1273 frame->tf_rflags |= PSL_C; 1274 break; 1275 } 1276 1277 /* 1278 * Traced syscall. trapsignal() is not MP aware. 1279 */ 1280 if (orig_tf_rflags & PSL_T) { 1281 MAKEMPSAFE(have_mplock); 1282 frame->tf_rflags &= ~PSL_T; 1283 trapsignal(lp, SIGTRAP, TRAP_TRACE); 1284 } 1285 1286 /* 1287 * Handle reschedule and other end-of-syscall issues 1288 */ 1289 userret(lp, frame, sticks); 1290 1291 #ifdef KTRACE 1292 if (KTRPOINT(td, KTR_SYSRET)) { 1293 MAKEMPSAFE(have_mplock); 1294 ktrsysret(lp, code, error, args.sysmsg_result); 1295 } 1296 #endif 1297 1298 /* 1299 * This works because errno is findable through the 1300 * register set. If we ever support an emulation where this 1301 * is not the case, this code will need to be revisited. 1302 */ 1303 STOPEVENT(p, S_SCX, code); 1304 1305 userexit(lp); 1306 #ifdef SMP 1307 /* 1308 * Release the MP lock if we had to get it 1309 */ 1310 if (have_mplock) 1311 rel_mplock(); 1312 #endif 1313 KTR_LOG(kernentry_syscall_ret, p->p_pid, lp->lwp_tid, error); 1314 #ifdef INVARIANTS 1315 KASSERT(crit_count == td->td_critcount, 1316 ("syscall: critical section count mismatch! %d/%d", 1317 crit_count, td->td_pri)); 1318 KASSERT(&td->td_toks_base == td->td_toks_stop, 1319 ("syscall: extra tokens held after trap! %ld", 1320 td->td_toks_stop - &td->td_toks_base)); 1321 #endif 1322 } 1323 1324 /* 1325 * NOTE: mplock not held at any point 1326 */ 1327 void 1328 fork_return(struct lwp *lp, struct trapframe *frame) 1329 { 1330 frame->tf_rax = 0; /* Child returns zero */ 1331 frame->tf_rflags &= ~PSL_C; /* success */ 1332 frame->tf_rdx = 1; 1333 1334 generic_lwp_return(lp, frame); 1335 KTR_LOG(kernentry_fork_ret, lp->lwp_proc->p_pid, lp->lwp_tid); 1336 } 1337 1338 /* 1339 * Simplified back end of syscall(), used when returning from fork() 1340 * directly into user mode. 1341 * 1342 * This code will return back into the fork trampoline code which then 1343 * runs doreti. 1344 * 1345 * NOTE: The mplock is not held at any point. 1346 */ 1347 void 1348 generic_lwp_return(struct lwp *lp, struct trapframe *frame) 1349 { 1350 struct proc *p = lp->lwp_proc; 1351 1352 /* 1353 * Newly forked processes are given a kernel priority. We have to 1354 * adjust the priority to a normal user priority and fake entry 1355 * into the kernel (call userenter()) to install a passive release 1356 * function just in case userret() decides to stop the process. This 1357 * can occur when ^Z races a fork. If we do not install the passive 1358 * release function the current process designation will not be 1359 * released when the thread goes to sleep. 1360 */ 1361 lwkt_setpri_self(TDPRI_USER_NORM); 1362 userenter(lp->lwp_thread, p); 1363 userret(lp, frame, 0); 1364 #ifdef KTRACE 1365 if (KTRPOINT(lp->lwp_thread, KTR_SYSRET)) 1366 ktrsysret(lp, SYS_fork, 0, 0); 1367 #endif 1368 lp->lwp_flag |= LWP_PASSIVE_ACQ; 1369 userexit(lp); 1370 lp->lwp_flag &= ~LWP_PASSIVE_ACQ; 1371 } 1372 1373 /* 1374 * If PGEX_FPFAULT is set then set FP_VIRTFP in the PCB to force a T_DNA 1375 * fault (which is then passed back to the virtual kernel) if an attempt is 1376 * made to use the FP unit. 1377 * 1378 * XXX this is a fairly big hack. 1379 */ 1380 void 1381 set_vkernel_fp(struct trapframe *frame) 1382 { 1383 struct thread *td = curthread; 1384 1385 if (frame->tf_xflags & PGEX_FPFAULT) { 1386 td->td_pcb->pcb_flags |= FP_VIRTFP; 1387 if (mdcpu->gd_npxthread == td) 1388 npxexit(); 1389 } else { 1390 td->td_pcb->pcb_flags &= ~FP_VIRTFP; 1391 } 1392 } 1393 1394 /* 1395 * Called from vkernel_trap() to fixup the vkernel's syscall 1396 * frame for vmspace_ctl() return. 1397 */ 1398 void 1399 cpu_vkernel_trap(struct trapframe *frame, int error) 1400 { 1401 frame->tf_rax = error; 1402 if (error) 1403 frame->tf_rflags |= PSL_C; 1404 else 1405 frame->tf_rflags &= ~PSL_C; 1406 } 1407