1 /* $OpenBSD: trap.c,v 1.165 2024/09/04 07:54:51 mglocker Exp $ */ 2 /* $NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $ */ 3 4 /*- 5 * Copyright (c) 1995 Charles M. Hannum. All rights reserved. 6 * Copyright (c) 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the University of Utah, and William Jolitz. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)trap.c 7.4 (Berkeley) 5/13/91 37 */ 38 39 /* 40 * 386 Trap and System call handling 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/proc.h> 46 #include <sys/signalvar.h> 47 #include <sys/user.h> 48 #include <sys/signal.h> 49 #include <sys/syscall.h> 50 #include <sys/syscall_mi.h> 51 52 #include <uvm/uvm_extern.h> 53 54 #include <machine/cpufunc.h> 55 #include <machine/psl.h> 56 #include <machine/trap.h> 57 #ifdef DDB 58 #include <machine/db_machdep.h> 59 #endif 60 61 #include "isa.h" 62 63 int upageflttrap(struct trapframe *, uint32_t); 64 int kpageflttrap(struct trapframe *, uint32_t); 65 void trap(struct trapframe *); 66 void ast(struct trapframe *); 67 void syscall(struct trapframe *); 68 69 char *trap_type[] = { 70 "privileged instruction fault", /* 0 T_PRIVINFLT */ 71 "breakpoint trap", /* 1 T_BPTFLT */ 72 "arithmetic trap", /* 2 T_ARITHTRAP */ 73 "reserved trap", /* 3 T_RESERVED */ 74 "protection fault", /* 4 T_PROTFLT */ 75 "trace trap", /* 5 T_TRCTRAP */ 76 "page fault", /* 6 T_PAGEFLT */ 77 "alignment fault", /* 7 T_ALIGNFLT */ 78 "integer divide fault", /* 8 T_DIVIDE */ 79 "non-maskable interrupt", /* 9 T_NMI */ 80 "overflow trap", /* 10 T_OFLOW */ 81 "bounds check fault", /* 11 T_BOUND */ 82 "FPU not available fault", /* 12 T_DNA */ 83 "double fault", /* 13 T_DOUBLEFLT */ 84 "FPU operand fetch fault", /* 14 T_FPOPFLT (![P]Pro) */ 85 "invalid TSS fault", /* 15 T_TSSFLT */ 86 "segment not present fault", /* 16 T_SEGNPFLT */ 87 "stack fault", /* 17 T_STKFLT */ 88 "machine check", /* 18 T_MACHK ([P]Pro) */ 89 "SIMD FP fault", /* 19 T_XFTRAP */ 90 }; 91 int trap_types = sizeof trap_type / sizeof trap_type[0]; 92 93 #ifdef DEBUG 94 int trapdebug = 0; 95 #endif 96 97 static inline int 98 pgex2access(int pgex) 99 { 100 if (pgex & PGEX_W) 101 return PROT_WRITE; 102 else if (pgex & PGEX_I) 103 return PROT_EXEC; 104 return PROT_READ; 105 } 106 107 /* 108 * upageflttrap(frame, usermode): page fault handler 109 * Returns non-zero if the fault was handled (possibly by generating 110 * a signal). Returns zero, possibly still holding the kernel lock, 111 * if something was so broken that we should panic. 112 */ 113 int 114 upageflttrap(struct trapframe *frame, uint32_t cr2) 115 { 116 struct proc *p = curproc; 117 vaddr_t va = trunc_page((vaddr_t)cr2); 118 vm_prot_t access_type = pgex2access(frame->tf_err); 119 union sigval sv; 120 int signal, sicode, error; 121 122 /* 123 * cpu_pae is true if system has PAE + NX. 124 * If NX is not enabled, we can't distinguish between PROT_READ 125 * and PROT_EXEC access, so try both. 126 */ 127 error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type); 128 if (cpu_pae == 0 && error == EACCES && access_type == PROT_READ) 129 error = uvm_fault(&p->p_vmspace->vm_map, va, 0, PROT_EXEC); 130 131 if (error == 0) { 132 uvm_grow(p, va); 133 return 1; 134 } 135 136 signal = SIGSEGV; 137 sicode = SEGV_MAPERR; 138 if (error == ENOMEM) { 139 printf("UVM: pid %d (%s), uid %d killed:" 140 " out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm, 141 p->p_ucred ? (int)p->p_ucred->cr_uid : -1); 142 signal = SIGKILL; 143 } else { 144 if (error == EACCES) 145 sicode = SEGV_ACCERR; 146 else if (error == EIO) { 147 signal = SIGBUS; 148 sicode = BUS_OBJERR; 149 } 150 } 151 sv.sival_ptr = (void *)cr2; 152 trapsignal(p, signal, T_PAGEFLT, sicode, sv); 153 return 1; 154 } 155 156 /* 157 * kpageflttrap(frame, usermode): page fault handler 158 * Returns non-zero if the fault was handled (possibly by generating 159 * a signal). Returns zero, possibly still holding the kernel lock, 160 * if something was so broken that we should panic. 161 */ 162 int 163 kpageflttrap(struct trapframe *frame, uint32_t cr2) 164 { 165 struct proc *p = curproc; 166 struct pcb *pcb; 167 vaddr_t va = trunc_page((vaddr_t)cr2); 168 struct vm_map *map; 169 vm_prot_t access_type = pgex2access(frame->tf_err); 170 caddr_t onfault; 171 int error; 172 173 if (p == NULL || p->p_addr == NULL || p->p_vmspace == NULL) 174 return 0; 175 176 pcb = &p->p_addr->u_pcb; 177 178 /* This will only trigger if SMEP is enabled */ 179 if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I) 180 panic("attempt to execute user address %p " 181 "in supervisor mode", (void *)cr2); 182 183 /* This will only trigger if SMAP is enabled */ 184 if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS && 185 frame->tf_err & PGEX_P) 186 panic("attempt to access user address %p " 187 "in supervisor mode", (void *)cr2); 188 189 /* 190 * It is only a kernel address space fault iff: 191 * 1. (type & T_USER) == 0 and 192 * 2. pcb_onfault not set or 193 * 3. pcb_onfault set but supervisor space fault 194 * The last can occur during an exec() copyin where the 195 * argument space is lazy-allocated. 196 */ 197 map = &p->p_vmspace->vm_map; 198 if (va >= VM_MIN_KERNEL_ADDRESS) 199 map = kernel_map; 200 201 if (curcpu()->ci_inatomic == 0 || map == kernel_map) { 202 onfault = pcb->pcb_onfault; 203 pcb->pcb_onfault = NULL; 204 error = uvm_fault(map, va, 0, access_type); 205 pcb->pcb_onfault = onfault; 206 207 if (error == 0 && map != kernel_map) 208 uvm_grow(p, va); 209 } else 210 error = EFAULT; 211 212 if (error) { 213 if (pcb->pcb_onfault == NULL) { 214 /* bad memory access in the kernel */ 215 panic("uvm_fault(%p, 0x%lx, 0, %d) -> %x", 216 map, va, access_type, error); 217 return 0; 218 } 219 frame->tf_eip = (u_int32_t)pcb->pcb_onfault; 220 } 221 return 1; 222 } 223 224 /* 225 * trap(frame): 226 * Exception, fault, and trap interface to BSD kernel. This 227 * common code is called from assembly language IDT gate entry 228 * routines that prepare a suitable stack frame, and restore this 229 * frame after the exception has been processed. 230 */ 231 void 232 trap(struct trapframe *frame) 233 { 234 struct proc *p = curproc; 235 int type = frame->tf_trapno; 236 struct pcb *pcb = NULL; 237 extern char resume_iret[], resume_pop_ds[], resume_pop_es[], 238 resume_pop_fs[], resume_pop_gs[]; 239 struct trapframe *vframe; 240 int resume; 241 union sigval sv; 242 vaddr_t gdt_cs = SEGDESC_LIMIT(curcpu()->ci_gdt[GUCODE_SEL].sd); 243 uint32_t cr2 = rcr2(); 244 245 uvmexp.traps++; 246 247 #ifdef DEBUG 248 if (trapdebug) { 249 printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n", 250 frame->tf_trapno, frame->tf_err, frame->tf_eip, 251 frame->tf_cs, frame->tf_eflags, cr2, lapic_tpr); 252 printf("curproc %p\n", curproc); 253 } 254 #endif 255 #ifdef DIAGNOSTIC 256 if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) { 257 u_int ef = read_eflags(); 258 if (ef & PSL_AC) { 259 write_eflags(ef & ~PSL_AC); 260 panic("%s: AC set on entry", "trap"); 261 } 262 } 263 #endif 264 265 if (!KERNELMODE(frame->tf_cs, frame->tf_eflags)) { 266 type |= T_USER; 267 p->p_md.md_regs = frame; 268 refreshcreds(p); 269 } 270 271 switch (type) { 272 273 /* trace trap */ 274 case T_PROTFLT: 275 case T_SEGNPFLT: 276 case T_ALIGNFLT: 277 /* Check for copyin/copyout fault. */ 278 if (p && p->p_addr) { 279 pcb = &p->p_addr->u_pcb; 280 if (pcb->pcb_onfault != 0) { 281 frame->tf_eip = (int)pcb->pcb_onfault; 282 return; 283 } 284 } 285 286 /* 287 * Check for failure during return to user mode. 288 * 289 * We do this by looking at the instruction we faulted on. The 290 * specific instructions we recognize only happen when 291 * returning from a trap, syscall, or interrupt. 292 * 293 * XXX 294 * The heuristic used here will currently fail for the case of 295 * one of the 2 pop instructions faulting when returning from a 296 * a fast interrupt. This should not be possible. It can be 297 * fixed by rearranging the trap frame so that the stack format 298 * at this point is the same as on exit from a `slow' 299 * interrupt. 300 */ 301 switch (*(u_char *)frame->tf_eip) { 302 case 0xcf: /* iret */ 303 vframe = (void *)((int)&frame->tf_esp - 304 offsetof(struct trapframe, tf_eip)); 305 resume = (int)resume_iret; 306 break; 307 case 0x1f: /* popl %ds */ 308 vframe = (void *)((int)&frame->tf_esp - 309 offsetof(struct trapframe, tf_ds)); 310 resume = (int)resume_pop_ds; 311 break; 312 case 0x07: /* popl %es */ 313 vframe = (void *)((int)&frame->tf_esp - 314 offsetof(struct trapframe, tf_es)); 315 resume = (int)resume_pop_es; 316 break; 317 case 0x0f: /* 0x0f prefix */ 318 switch (*(u_char *)(frame->tf_eip + 1)) { 319 case 0xa1: /* popl %fs */ 320 vframe = (void *)((int)&frame->tf_esp - 321 offsetof(struct trapframe, tf_fs)); 322 resume = (int)resume_pop_fs; 323 break; 324 case 0xa9: /* popl %gs */ 325 vframe = (void *)((int)&frame->tf_esp - 326 offsetof(struct trapframe, tf_gs)); 327 resume = (int)resume_pop_gs; 328 break; 329 default: 330 goto we_re_toast; 331 } 332 break; 333 default: 334 goto we_re_toast; 335 } 336 if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags)) 337 goto we_re_toast; 338 339 frame->tf_eip = resume; 340 return; 341 342 case T_PROTFLT|T_USER: /* protection fault */ 343 /* If pmap_exec_fixup does something, let's retry the trap. */ 344 if (cpu_pae == 0 && 345 pmap_exec_fixup(&p->p_vmspace->vm_map, frame, gdt_cs, 346 &p->p_addr->u_pcb)) 347 goto out; 348 349 sv.sival_int = frame->tf_eip; 350 trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv); 351 goto out; 352 353 case T_TSSFLT|T_USER: 354 sv.sival_int = frame->tf_eip; 355 trapsignal(p, SIGBUS, type &~ T_USER, BUS_OBJERR, sv); 356 goto out; 357 358 case T_SEGNPFLT|T_USER: 359 case T_STKFLT|T_USER: 360 sv.sival_int = frame->tf_eip; 361 trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv); 362 goto out; 363 364 case T_ALIGNFLT|T_USER: 365 sv.sival_int = frame->tf_eip; 366 trapsignal(p, SIGBUS, type &~ T_USER, BUS_ADRALN, sv); 367 goto out; 368 369 case T_PRIVINFLT|T_USER: /* privileged instruction fault */ 370 sv.sival_int = frame->tf_eip; 371 trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv); 372 goto out; 373 374 case T_FPOPFLT|T_USER: /* coprocessor operand fault */ 375 sv.sival_int = frame->tf_eip; 376 trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv); 377 goto out; 378 379 case T_DNA|T_USER: { 380 printf("pid %d killed due to lack of floating point\n", 381 p->p_p->ps_pid); 382 sv.sival_int = frame->tf_eip; 383 trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv); 384 goto out; 385 } 386 387 case T_BOUND|T_USER: 388 sv.sival_int = frame->tf_eip; 389 trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv); 390 goto out; 391 case T_OFLOW|T_USER: 392 sv.sival_int = frame->tf_eip; 393 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv); 394 goto out; 395 case T_DIVIDE|T_USER: 396 sv.sival_int = frame->tf_eip; 397 trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv); 398 goto out; 399 400 case T_ARITHTRAP|T_USER: 401 sv.sival_int = frame->tf_eip; 402 trapsignal(p, SIGFPE, frame->tf_err, FPE_INTOVF, sv); 403 goto out; 404 405 case T_XFTRAP|T_USER: 406 npxtrap(frame); 407 goto out; 408 409 case T_PAGEFLT: 410 if (kpageflttrap(frame, cr2)) 411 return; 412 goto we_re_toast; 413 414 case T_PAGEFLT|T_USER: { /* page fault */ 415 if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), 416 "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n", 417 uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial)) 418 goto out; 419 if (upageflttrap(frame, cr2)) 420 goto out; 421 goto we_re_toast; 422 } 423 424 #if 0 /* Should this be left out? */ 425 #if !defined(DDB) 426 /* XXX need to deal with this when DDB is present, too */ 427 case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */ 428 /* syscall has to turn off the trace bit itself */ 429 return; 430 #endif 431 #endif 432 433 case T_BPTFLT|T_USER: /* bpt instruction fault */ 434 sv.sival_int = cr2; 435 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv); 436 break; 437 case T_TRCTRAP|T_USER: /* trace trap */ 438 sv.sival_int = cr2; 439 trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv); 440 break; 441 442 #if NISA > 0 443 case T_NMI: 444 case T_NMI|T_USER: 445 #ifdef DDB 446 /* NMI can be hooked up to a pushbutton for debugging */ 447 printf ("NMI ... going to debugger\n"); 448 if (db_ktrap(type, 0, frame)) 449 return; 450 #endif 451 /* machine/parity/power fail/"kitchen sink" faults */ 452 if (isa_nmi() == 0) 453 return; 454 else 455 goto we_re_toast; 456 #endif 457 458 case T_TRCTRAP: 459 #ifndef DDB 460 return; /* Just return if no kernel debugger */ 461 #endif 462 /* FALLTHROUGH */ 463 default: 464 we_re_toast: 465 #ifdef DDB 466 if (db_ktrap(type, frame->tf_err, frame)) 467 return; 468 #endif 469 if (frame->tf_trapno < trap_types) 470 printf("fatal %s (%d)", trap_type[frame->tf_trapno], 471 frame->tf_trapno); 472 else 473 printf("unknown trap %d", frame->tf_trapno); 474 printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor"); 475 printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n", 476 type, frame->tf_err, frame->tf_eip, frame->tf_cs, 477 frame->tf_eflags, cr2, lapic_tpr); 478 479 panic("trap type %d, code=%x, pc=%x", 480 type, frame->tf_err, frame->tf_eip); 481 /*NOTREACHED*/ 482 } 483 484 if ((type & T_USER) == 0) 485 return; 486 out: 487 userret(p); 488 } 489 490 491 /* 492 * ast(frame): 493 * AST handler. This is called from assembly language stubs when 494 * returning to userspace after a syscall, trap, or interrupt. 495 */ 496 void 497 ast(struct trapframe *frame) 498 { 499 struct proc *p = curproc; 500 501 uvmexp.traps++; 502 KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_eflags)); 503 p->p_md.md_regs = frame; 504 refreshcreds(p); 505 uvmexp.softs++; 506 mi_ast(p, curcpu()->ci_want_resched); 507 userret(p); 508 } 509 510 511 /* 512 * syscall(frame): 513 * System call request from POSIX system call gate interface to kernel. 514 */ 515 void 516 syscall(struct trapframe *frame) 517 { 518 caddr_t params; 519 const struct sysent *callp = sysent; 520 struct proc *p = curproc; 521 int error; 522 register_t code, args[8], rval[2]; 523 #ifdef DIAGNOSTIC 524 int ocpl = lapic_tpr; 525 #endif 526 short argsize; 527 528 uvmexp.syscalls++; 529 #ifdef DIAGNOSTIC 530 if (!USERMODE(frame->tf_cs, frame->tf_eflags)) 531 panic("syscall"); 532 #endif 533 #ifdef DIAGNOSTIC 534 if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) { 535 u_int ef = read_eflags(); 536 if (ef & PSL_AC) { 537 write_eflags(ef & ~PSL_AC); 538 panic("%s: AC set on entry", "syscall"); 539 } 540 } 541 #endif 542 543 p->p_md.md_regs = frame; 544 545 code = frame->tf_eax; 546 // XXX out of range stays on syscall0, which we assume is enosys 547 if (code > 0 && code < SYS_MAXSYSCALL) 548 callp += code; 549 550 argsize = callp->sy_argsize; 551 params = (caddr_t)frame->tf_esp + sizeof(int); 552 if (argsize && (error = copyin(params, args, argsize))) 553 goto bad; 554 555 rval[0] = 0; 556 rval[1] = frame->tf_edx; 557 558 error = mi_syscall(p, code, callp, args, rval); 559 560 switch (error) { 561 case 0: 562 frame->tf_eax = rval[0]; 563 frame->tf_edx = rval[1]; 564 frame->tf_eflags &= ~PSL_C; /* carry bit */ 565 break; 566 case ERESTART: 567 /* Back up over the int$80 (2 bytes) that made the syscall */ 568 frame->tf_eip -= 2; 569 break; 570 case EJUSTRETURN: 571 /* nothing to do */ 572 break; 573 default: 574 bad: 575 frame->tf_eax = error; 576 frame->tf_eflags |= PSL_C; /* carry bit */ 577 break; 578 } 579 580 mi_syscall_return(p, code, error, rval); 581 582 #ifdef DIAGNOSTIC 583 if (lapic_tpr != ocpl) { 584 printf("WARNING: SPL (0x%x) NOT LOWERED ON " 585 "syscall(0x%lx, 0x%lx, 0x%lx, 0x%lx...) EXIT, PID %d\n", 586 lapic_tpr, code, args[0], args[1], args[2], 587 p->p_p->ps_pid); 588 lapic_tpr = ocpl; 589 } 590 #endif 591 } 592 593 void 594 child_return(void *arg) 595 { 596 struct proc *p = (struct proc *)arg; 597 struct trapframe *tf = p->p_md.md_regs; 598 599 tf->tf_eax = 0; 600 tf->tf_eflags &= ~PSL_C; 601 602 KERNEL_UNLOCK(); 603 604 mi_child_return(p); 605 } 606