1 /* $OpenBSD: trap.c,v 1.106 2024/09/04 07:54:51 mglocker Exp $ */ 2 /* $NetBSD: trap.c,v 1.2 2003/05/04 23:51:56 fvdl Exp $ */ 3 4 /*- 5 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Charles M. Hannum. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /*- 34 * Copyright (c) 1990 The Regents of the University of California. 35 * All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the University of Utah, and William Jolitz. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)trap.c 7.4 (Berkeley) 5/13/91 65 */ 66 67 /* 68 * amd64 Trap and System call handling 69 */ 70 #undef TRAP_SIGDEBUG 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/proc.h> 75 #include <sys/signalvar.h> 76 #include <sys/user.h> 77 #include <sys/signal.h> 78 #include <sys/syscall.h> 79 #include <sys/syscall_mi.h> 80 #include <sys/stdarg.h> 81 82 #include <uvm/uvm_extern.h> 83 84 #include <machine/cpu.h> 85 #include <machine/cpufunc.h> 86 #include <machine/fpu.h> 87 #include <machine/psl.h> 88 #include <machine/trap.h> 89 #ifdef DDB 90 #include <ddb/db_output.h> 91 #include <machine/db_machdep.h> 92 #endif 93 94 #include "isa.h" 95 96 int upageflttrap(struct trapframe *, uint64_t); 97 int kpageflttrap(struct trapframe *, uint64_t); 98 void kerntrap(struct trapframe *); 99 void usertrap(struct trapframe *); 100 void ast(struct trapframe *); 101 void syscall(struct trapframe *); 102 103 const char * const trap_type[] = { 104 "privileged instruction fault", /* 0 T_PRIVINFLT */ 105 "breakpoint trap", /* 1 T_BPTFLT */ 106 "arithmetic trap", /* 2 T_ARITHTRAP */ 107 "reserved trap", /* 3 T_RESERVED */ 108 "protection fault", /* 4 T_PROTFLT */ 109 "trace trap", /* 5 T_TRCTRAP */ 110 "page fault", /* 6 T_PAGEFLT */ 111 "alignment fault", /* 7 T_ALIGNFLT */ 112 "integer divide fault", /* 8 T_DIVIDE */ 113 "non-maskable interrupt", /* 9 T_NMI */ 114 "overflow trap", /* 10 T_OFLOW */ 115 "bounds check fault", /* 11 T_BOUND */ 116 "FPU not available fault", /* 12 T_DNA */ 117 "double fault", /* 13 T_DOUBLEFLT */ 118 "FPU operand fetch fault", /* 14 T_FPOPFLT */ 119 "invalid TSS fault", /* 15 T_TSSFLT */ 120 "segment not present fault", /* 16 T_SEGNPFLT */ 121 "stack fault", /* 17 T_STKFLT */ 122 "machine check", /* 18 T_MCA */ 123 "SSE FP exception", /* 19 T_XMM */ 124 "virtualization exception", /* 20 T_VE */ 125 "control protection exception", /* 21 T_CP */ 126 }; 127 const int trap_types = nitems(trap_type); 128 129 #ifdef DEBUG 130 int trapdebug = 0; 131 #endif 132 133 static void trap_print(struct trapframe *, int _type); 134 static inline void frame_dump(struct trapframe *_tf, struct proc *_p, 135 const char *_sig, uint64_t _cr2); 136 static inline void verify_smap(const char *_func); 137 static inline int verify_pkru(struct proc *); 138 static inline void debug_trap(struct trapframe *_frame, struct proc *_p, 139 long _type); 140 141 static inline void 142 fault(const char *fmt, ...) 143 { 144 struct cpu_info *ci = curcpu(); 145 va_list ap; 146 147 atomic_cas_ptr(&panicstr, NULL, ci->ci_panicbuf); 148 149 va_start(ap, fmt); 150 vsnprintf(ci->ci_panicbuf, sizeof(ci->ci_panicbuf), fmt, ap); 151 va_end(ap); 152 #ifdef DDB 153 db_printf("%s\n", ci->ci_panicbuf); 154 #else 155 printf("%s\n", ci->ci_panicbuf); 156 #endif 157 } 158 159 static inline int 160 pgex2access(int pgex) 161 { 162 if (pgex & PGEX_W) 163 return PROT_WRITE; 164 else if (pgex & PGEX_I) 165 return PROT_EXEC; 166 return PROT_READ; 167 } 168 169 /* 170 * upageflttrap(frame, usermode): page fault handler 171 * Returns non-zero if the fault was handled (possibly by generating 172 * a signal). Returns zero, possibly still holding the kernel lock, 173 * if something was so broken that we should panic. 174 */ 175 int 176 upageflttrap(struct trapframe *frame, uint64_t cr2) 177 { 178 struct proc *p = curproc; 179 vaddr_t va = trunc_page((vaddr_t)cr2); 180 vm_prot_t access_type = pgex2access(frame->tf_err); 181 union sigval sv; 182 int signal, sicode, error; 183 184 /* 185 * If NX is not enabled, we can't distinguish between PROT_READ 186 * and PROT_EXEC access, so try both. 187 */ 188 error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); 189 if (pg_nx == 0 && error == EACCES && access_type == PROT_READ) 190 error = uvm_fault(&p->p_vmspace->vm_map, va, 0, PROT_EXEC); 191 if (error == 0) { 192 uvm_grow(p, va); 193 return 1; 194 } 195 196 signal = SIGSEGV; 197 sicode = SEGV_MAPERR; 198 if (error == ENOMEM) { 199 printf("UVM: pid %d (%s), uid %d killed:" 200 " out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm, 201 p->p_ucred ? (int)p->p_ucred->cr_uid : -1); 202 signal = SIGKILL; 203 } else { 204 if (error == EACCES) 205 sicode = SEGV_ACCERR; 206 else if (error == EIO) { 207 signal = SIGBUS; 208 sicode = BUS_OBJERR; 209 } 210 } 211 sv.sival_ptr = (void *)cr2; 212 trapsignal(p, signal, T_PAGEFLT, sicode, sv); 213 return 1; 214 } 215 216 217 /* 218 * kpageflttrap(frame, usermode): page fault handler 219 * Returns non-zero if the fault was handled (possibly by generating a signal). 220 * Returns zero if something was so broken that we should panic. 221 */ 222 int 223 kpageflttrap(struct trapframe *frame, uint64_t cr2) 224 { 225 struct proc *p = curproc; 226 struct pcb *pcb; 227 vaddr_t va = trunc_page((vaddr_t)cr2); 228 struct vm_map *map; 229 vm_prot_t access_type = pgex2access(frame->tf_err); 230 caddr_t onfault; 231 int error; 232 233 if (p == NULL || p->p_addr == NULL || p->p_vmspace == NULL) 234 return 0; 235 236 pcb = &p->p_addr->u_pcb; 237 if (pcb->pcb_onfault != NULL) { 238 extern caddr_t __nofault_start[], __nofault_end[]; 239 caddr_t *nf = __nofault_start; 240 while (*nf++ != pcb->pcb_onfault) { 241 if (nf >= __nofault_end) { 242 fault("invalid pcb_nofault=%lx", 243 (long)pcb->pcb_onfault); 244 return 0; 245 } 246 } 247 } 248 249 /* This will only trigger if SMEP is enabled */ 250 if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && 251 frame->tf_err & PGEX_I) { 252 fault("attempt to execute user address %p " 253 "in supervisor mode", (void *)cr2); 254 return 0; 255 } 256 /* This will only trigger if SMAP is enabled */ 257 if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && 258 frame->tf_err & PGEX_P) { 259 fault("attempt to access user address %p " 260 "in supervisor mode", (void *)cr2); 261 return 0; 262 } 263 264 /* 265 * It is only a kernel address space fault iff: 266 * 1. when running in ring 0 and 267 * 2. pcb_onfault not set or 268 * 3. pcb_onfault set but supervisor space fault 269 * The last can occur during an exec() copyin where the 270 * argument space is lazy-allocated. 271 */ 272 map = &p->p_vmspace->vm_map; 273 if (va >= VM_MIN_KERNEL_ADDRESS) 274 map = kernel_map; 275 276 if (curcpu()->ci_inatomic == 0 || map == kernel_map) { 277 onfault = pcb->pcb_onfault; 278 pcb->pcb_onfault = NULL; 279 error = uvm_fault(map, va, 0, access_type); 280 pcb->pcb_onfault = onfault; 281 282 if (error == 0 && map != kernel_map) 283 uvm_grow(p, va); 284 } else 285 error = EFAULT; 286 287 if (error) { 288 if (pcb->pcb_onfault == NULL) { 289 /* bad memory access in the kernel */ 290 fault("uvm_fault(%p, 0x%llx, 0, %d) -> %x", 291 map, cr2, access_type, error); 292 return 0; 293 } 294 frame->tf_rip = (u_int64_t)pcb->pcb_onfault; 295 } 296 297 return 1; 298 } 299 300 301 /* 302 * kerntrap(frame): 303 * Exception, fault, and trap interface to BSD kernel. This 304 * common code is called from assembly language IDT gate entry 305 * routines that prepare a suitable stack frame, and restore this 306 * frame after the exception has been processed. 307 */ 308 void 309 kerntrap(struct trapframe *frame) 310 { 311 int type = (int)frame->tf_trapno; 312 uint64_t cr2 = rcr2(); 313 314 verify_smap(__func__); 315 uvmexp.traps++; 316 debug_trap(frame, curproc, type); 317 318 switch (type) { 319 320 default: 321 we_re_toast: 322 #ifdef DDB 323 if (db_ktrap(type, frame->tf_err, frame)) 324 return; 325 #endif 326 trap_print(frame, type); 327 panic("trap type %d, code=%llx, pc=%llx", 328 type, frame->tf_err, frame->tf_rip); 329 /*NOTREACHED*/ 330 331 case T_PAGEFLT: /* allow page faults in kernel mode */ 332 if (kpageflttrap(frame, cr2)) 333 return; 334 goto we_re_toast; 335 336 #if NISA > 0 337 case T_NMI: 338 #ifdef DDB 339 /* NMI can be hooked up to a pushbutton for debugging */ 340 printf ("NMI ... going to debugger\n"); 341 if (db_ktrap(type, 0, frame)) 342 return; 343 #endif 344 /* machine/parity/power fail/"kitchen sink" faults */ 345 346 if (x86_nmi() != 0) 347 goto we_re_toast; 348 else 349 return; 350 #endif /* NISA > 0 */ 351 } 352 } 353 354 /* If we find out userland changed the pkru register, punish the process */ 355 static inline int 356 verify_pkru(struct proc *p) 357 { 358 if (pg_xo == 0 || rdpkru(0) == PGK_VALUE) 359 return 0; 360 KERNEL_LOCK(); 361 sigabort(p); 362 KERNEL_UNLOCK(); 363 return 1; 364 } 365 366 /* 367 * usertrap(frame): handler for exceptions, faults, and traps from userspace 368 * This is called from the assembly language IDT gate entries 369 * which prepare a suitable stack frame and restores the CPU state 370 * after the fault has been processed. 371 */ 372 void 373 usertrap(struct trapframe *frame) 374 { 375 struct proc *p = curproc; 376 int type = (int)frame->tf_trapno; 377 uint64_t cr2 = rcr2(); 378 union sigval sv; 379 int sig, code; 380 381 verify_smap(__func__); 382 uvmexp.traps++; 383 debug_trap(frame, p, type); 384 385 p->p_md.md_regs = frame; 386 refreshcreds(p); 387 388 if (verify_pkru(p)) 389 goto out; 390 391 switch (type) { 392 case T_TSSFLT: 393 sig = SIGBUS; 394 code = BUS_OBJERR; 395 break; 396 case T_PROTFLT: /* protection fault */ 397 case T_SEGNPFLT: 398 case T_STKFLT: 399 frame_dump(frame, p, "SEGV", 0); 400 sig = SIGSEGV; 401 code = SEGV_MAPERR; 402 break; 403 case T_ALIGNFLT: 404 sig = SIGBUS; 405 code = BUS_ADRALN; 406 break; 407 case T_PRIVINFLT: /* privileged instruction fault */ 408 sig = SIGILL; 409 code = ILL_PRVOPC; 410 break; 411 case T_DIVIDE: 412 sig = SIGFPE; 413 code = FPE_INTDIV; 414 break; 415 case T_ARITHTRAP: 416 case T_XMM: /* real arithmetic exceptions */ 417 sig = SIGFPE; 418 code = fputrap(type); 419 break; 420 case T_BPTFLT: /* bpt instruction fault */ 421 case T_TRCTRAP: /* trace trap */ 422 sig = SIGTRAP; 423 code = TRAP_BRKPT; 424 break; 425 case T_CP: 426 sig = SIGILL; 427 code = (frame->tf_err & 0x7fff) < 4 ? ILL_BTCFI 428 : ILL_BADSTK; 429 break; 430 431 case T_PAGEFLT: /* page fault */ 432 if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), 433 "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", 434 uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) 435 goto out; 436 if (upageflttrap(frame, cr2)) 437 goto out; 438 /* FALLTHROUGH */ 439 440 default: 441 trap_print(frame, type); 442 panic("impossible trap"); 443 } 444 445 sv.sival_ptr = (void *)frame->tf_rip; 446 trapsignal(p, sig, type, code, sv); 447 448 out: 449 userret(p); 450 } 451 452 453 static void 454 trap_print(struct trapframe *frame, int type) 455 { 456 if (type < trap_types) 457 printf("fatal %s", trap_type[type]); 458 else 459 printf("unknown trap %d", type); 460 printf(" in %s mode\n", KERNELMODE(frame->tf_cs, frame->tf_rflags) ? 461 "supervisor" : "user"); 462 printf("trap type %d code %llx rip %llx cs %llx rflags %llx cr2 " 463 "%llx cpl %x rsp %llx\n", 464 type, frame->tf_err, frame->tf_rip, frame->tf_cs, 465 frame->tf_rflags, rcr2(), curcpu()->ci_ilevel, frame->tf_rsp); 466 printf("gsbase %p kgsbase %p\n", 467 (void *)rdmsr(MSR_GSBASE), (void *)rdmsr(MSR_KERNELGSBASE)); 468 } 469 470 471 static inline void 472 frame_dump(struct trapframe *tf, struct proc *p, const char *sig, uint64_t cr2) 473 { 474 #ifdef TRAP_SIGDEBUG 475 printf("pid %d (%s): %s at rip %llx addr %llx\n", 476 p->p_p->ps_pid, p->p_p->ps_comm, sig, tf->tf_rip, cr2); 477 printf("rip %p cs 0x%x rfl %p rsp %p ss 0x%x\n", 478 (void *)tf->tf_rip, (unsigned)tf->tf_cs & 0xffff, 479 (void *)tf->tf_rflags, 480 (void *)tf->tf_rsp, (unsigned)tf->tf_ss & 0xffff); 481 printf("err 0x%llx trapno 0x%llx\n", 482 tf->tf_err, tf->tf_trapno); 483 printf("rdi %p rsi %p rdx %p\n", 484 (void *)tf->tf_rdi, (void *)tf->tf_rsi, (void *)tf->tf_rdx); 485 printf("rcx %p r8 %p r9 %p\n", 486 (void *)tf->tf_rcx, (void *)tf->tf_r8, (void *)tf->tf_r9); 487 printf("r10 %p r11 %p r12 %p\n", 488 (void *)tf->tf_r10, (void *)tf->tf_r11, (void *)tf->tf_r12); 489 printf("r13 %p r14 %p r15 %p\n", 490 (void *)tf->tf_r13, (void *)tf->tf_r14, (void *)tf->tf_r15); 491 printf("rbp %p rbx %p rax %p\n", 492 (void *)tf->tf_rbp, (void *)tf->tf_rbx, (void *)tf->tf_rax); 493 #endif 494 } 495 496 static inline void 497 verify_smap(const char *func) 498 { 499 #ifdef DIAGNOSTIC 500 if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) { 501 u_long rf = read_rflags(); 502 if (rf & PSL_AC) { 503 write_rflags(rf & ~PSL_AC); 504 panic("%s: AC set on entry", func); 505 } 506 } 507 #endif 508 } 509 510 static inline void 511 debug_trap(struct trapframe *frame, struct proc *p, long type) 512 { 513 #ifdef DEBUG 514 if (trapdebug) { 515 printf("trap %ld code %llx rip %llx cs %llx rflags %llx " 516 "cr2 %llx cpl %x\n", 517 type, frame->tf_err, frame->tf_rip, frame->tf_cs, 518 frame->tf_rflags, rcr2(), curcpu()->ci_ilevel); 519 printf("curproc %p\n", (void *)p); 520 if (p != NULL) 521 printf("pid %d\n", p->p_p->ps_pid); 522 } 523 #endif 524 } 525 526 /* 527 * ast(frame): 528 * AST handler. This is called from assembly language stubs when 529 * returning to userspace after a syscall or interrupt. 530 */ 531 void 532 ast(struct trapframe *frame) 533 { 534 struct proc *p = curproc; 535 536 uvmexp.traps++; 537 KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_rflags)); 538 p->p_md.md_regs = frame; 539 refreshcreds(p); 540 uvmexp.softs++; 541 mi_ast(p, curcpu()->ci_want_resched); 542 userret(p); 543 } 544 545 546 /* 547 * syscall(frame): 548 * System call request from POSIX system call gate interface to kernel. 549 */ 550 void 551 syscall(struct trapframe *frame) 552 { 553 const struct sysent *callp; 554 struct proc *p; 555 int error = ENOSYS; 556 register_t code, *args, rval[2]; 557 558 verify_smap(__func__); 559 uvmexp.syscalls++; 560 p = curproc; 561 562 if (verify_pkru(p)) { 563 userret(p); 564 return; 565 } 566 567 code = frame->tf_rax; 568 args = (register_t *)&frame->tf_rdi; 569 570 if (code <= 0 || code >= SYS_MAXSYSCALL) 571 goto bad; 572 callp = sysent + code; 573 574 rval[0] = 0; 575 rval[1] = 0; 576 577 error = mi_syscall(p, code, callp, args, rval); 578 579 switch (error) { 580 case 0: 581 frame->tf_rax = rval[0]; 582 frame->tf_rflags &= ~PSL_C; /* carry bit */ 583 break; 584 case ERESTART: 585 /* Back up over the syscall instruction (2 bytes) */ 586 frame->tf_rip -= 2; 587 break; 588 case EJUSTRETURN: 589 /* nothing to do */ 590 break; 591 default: 592 bad: 593 frame->tf_rax = error; 594 frame->tf_rflags |= PSL_C; /* carry bit */ 595 break; 596 } 597 598 mi_syscall_return(p, code, error, rval); 599 } 600 601 void 602 child_return(void *arg) 603 { 604 struct proc *p = arg; 605 struct trapframe *tf = p->p_md.md_regs; 606 607 tf->tf_rax = 0; 608 tf->tf_rflags &= ~PSL_C; 609 610 KERNEL_UNLOCK(); 611 612 mi_child_return(p); 613 } 614 615