xref: /openbsd-src/sys/arch/i386/i386/trap.c (revision 9593dc34da13a12012033a17061c846c208061c2)
1 /*	$OpenBSD: trap.c,v 1.165 2024/09/04 07:54:51 mglocker Exp $	*/
2 /*	$NetBSD: trap.c,v 1.95 1996/05/05 06:50:02 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1995 Charles M. Hannum.  All rights reserved.
6  * Copyright (c) 1990 The Regents of the University of California.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to Berkeley by
10  * the University of Utah, and William Jolitz.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  *	@(#)trap.c	7.4 (Berkeley) 5/13/91
37  */
38 
39 /*
40  * 386 Trap and System call handling
41  */
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/proc.h>
46 #include <sys/signalvar.h>
47 #include <sys/user.h>
48 #include <sys/signal.h>
49 #include <sys/syscall.h>
50 #include <sys/syscall_mi.h>
51 
52 #include <uvm/uvm_extern.h>
53 
54 #include <machine/cpufunc.h>
55 #include <machine/psl.h>
56 #include <machine/trap.h>
57 #ifdef DDB
58 #include <machine/db_machdep.h>
59 #endif
60 
61 #include "isa.h"
62 
63 int upageflttrap(struct trapframe *, uint32_t);
64 int kpageflttrap(struct trapframe *, uint32_t);
65 void trap(struct trapframe *);
66 void ast(struct trapframe *);
67 void syscall(struct trapframe *);
68 
69 char	*trap_type[] = {
70 	"privileged instruction fault",		/*  0 T_PRIVINFLT */
71 	"breakpoint trap",			/*  1 T_BPTFLT */
72 	"arithmetic trap",			/*  2 T_ARITHTRAP */
73 	"reserved trap",			/*  3 T_RESERVED */
74 	"protection fault",			/*  4 T_PROTFLT */
75 	"trace trap",				/*  5 T_TRCTRAP */
76 	"page fault",				/*  6 T_PAGEFLT */
77 	"alignment fault",			/*  7 T_ALIGNFLT */
78 	"integer divide fault",			/*  8 T_DIVIDE */
79 	"non-maskable interrupt",		/*  9 T_NMI */
80 	"overflow trap",			/* 10 T_OFLOW */
81 	"bounds check fault",			/* 11 T_BOUND */
82 	"FPU not available fault",		/* 12 T_DNA */
83 	"double fault",				/* 13 T_DOUBLEFLT */
84 	"FPU operand fetch fault",		/* 14 T_FPOPFLT (![P]Pro) */
85 	"invalid TSS fault",			/* 15 T_TSSFLT */
86 	"segment not present fault",		/* 16 T_SEGNPFLT */
87 	"stack fault",				/* 17 T_STKFLT */
88 	"machine check",			/* 18 T_MACHK ([P]Pro) */
89 	"SIMD FP fault",			/* 19 T_XFTRAP */
90 };
91 int	trap_types = sizeof trap_type / sizeof trap_type[0];
92 
93 #ifdef DEBUG
94 int	trapdebug = 0;
95 #endif
96 
97 static inline int
98 pgex2access(int pgex)
99 {
100 	if (pgex & PGEX_W)
101 		return PROT_WRITE;
102 	else if (pgex & PGEX_I)
103 		return PROT_EXEC;
104 	return PROT_READ;
105 }
106 
107 /*
108  * upageflttrap(frame, usermode): page fault handler
109  * Returns non-zero if the fault was handled (possibly by generating
110  * a signal).  Returns zero, possibly still holding the kernel lock,
111  * if something was so broken that we should panic.
112  */
113 int
114 upageflttrap(struct trapframe *frame, uint32_t cr2)
115 {
116 	struct proc *p = curproc;
117 	vaddr_t va = trunc_page((vaddr_t)cr2);
118 	vm_prot_t access_type = pgex2access(frame->tf_err);
119 	union sigval sv;
120 	int signal, sicode, error;
121 
122 	/*
123 	 * cpu_pae is true if system has PAE + NX.
124 	 * If NX is not enabled, we can't distinguish between PROT_READ
125 	 * and PROT_EXEC access, so try both.
126 	 */
127 	error = uvm_fault(&p->p_vmspace->vm_map, va, 0, access_type);
128 	if (cpu_pae == 0 && error == EACCES && access_type == PROT_READ)
129 		error = uvm_fault(&p->p_vmspace->vm_map, va, 0, PROT_EXEC);
130 
131 	if (error == 0) {
132 		uvm_grow(p, va);
133 		return 1;
134 	}
135 
136 	signal = SIGSEGV;
137 	sicode = SEGV_MAPERR;
138 	if (error == ENOMEM) {
139 		printf("UVM: pid %d (%s), uid %d killed:"
140 		    " out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm,
141 		    p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
142 		signal = SIGKILL;
143 	} else {
144 		if (error == EACCES)
145 			sicode = SEGV_ACCERR;
146 		else if (error == EIO) {
147 			signal = SIGBUS;
148 			sicode = BUS_OBJERR;
149 		}
150 	}
151 	sv.sival_ptr = (void *)cr2;
152 	trapsignal(p, signal, T_PAGEFLT, sicode, sv);
153 	return 1;
154 }
155 
156 /*
157  * kpageflttrap(frame, usermode): page fault handler
158  * Returns non-zero if the fault was handled (possibly by generating
159  * a signal).  Returns zero, possibly still holding the kernel lock,
160  * if something was so broken that we should panic.
161  */
162 int
163 kpageflttrap(struct trapframe *frame, uint32_t cr2)
164 {
165 	struct proc *p = curproc;
166 	struct pcb *pcb;
167 	vaddr_t va = trunc_page((vaddr_t)cr2);
168 	struct vm_map *map;
169 	vm_prot_t access_type = pgex2access(frame->tf_err);
170 	caddr_t onfault;
171 	int error;
172 
173 	if (p == NULL || p->p_addr == NULL || p->p_vmspace == NULL)
174 		return 0;
175 
176 	pcb = &p->p_addr->u_pcb;
177 
178 	/* This will only trigger if SMEP is enabled */
179 	if (cr2 <= VM_MAXUSER_ADDRESS && frame->tf_err & PGEX_I)
180 		panic("attempt to execute user address %p "
181 		    "in supervisor mode", (void *)cr2);
182 
183 	/* This will only trigger if SMAP is enabled */
184 	if (pcb->pcb_onfault == NULL && cr2 <= VM_MAXUSER_ADDRESS &&
185 	    frame->tf_err & PGEX_P)
186 		panic("attempt to access user address %p "
187 		    "in supervisor mode", (void *)cr2);
188 
189 	/*
190 	 * It is only a kernel address space fault iff:
191 	 *	1. (type & T_USER) == 0  and
192 	 *	2. pcb_onfault not set or
193 	 *	3. pcb_onfault set but supervisor space fault
194 	 * The last can occur during an exec() copyin where the
195 	 * argument space is lazy-allocated.
196 	 */
197 	map = &p->p_vmspace->vm_map;
198 	if (va >= VM_MIN_KERNEL_ADDRESS)
199 		map = kernel_map;
200 
201 	if (curcpu()->ci_inatomic == 0 || map == kernel_map) {
202 		onfault = pcb->pcb_onfault;
203 		pcb->pcb_onfault = NULL;
204 		error = uvm_fault(map, va, 0, access_type);
205 		pcb->pcb_onfault = onfault;
206 
207 		if (error == 0 && map != kernel_map)
208 			uvm_grow(p, va);
209 	} else
210 		error = EFAULT;
211 
212 	if (error) {
213 		if (pcb->pcb_onfault == NULL) {
214 			/* bad memory access in the kernel */
215 			panic("uvm_fault(%p, 0x%lx, 0, %d) -> %x",
216 			    map, va, access_type, error);
217 			return 0;
218 		}
219 		frame->tf_eip = (u_int32_t)pcb->pcb_onfault;
220 	}
221 	return 1;
222 }
223 
224 /*
225  * trap(frame):
226  *	Exception, fault, and trap interface to BSD kernel. This
227  * common code is called from assembly language IDT gate entry
228  * routines that prepare a suitable stack frame, and restore this
229  * frame after the exception has been processed.
230  */
231 void
232 trap(struct trapframe *frame)
233 {
234 	struct proc *p = curproc;
235 	int type = frame->tf_trapno;
236 	struct pcb *pcb = NULL;
237 	extern char resume_iret[], resume_pop_ds[], resume_pop_es[],
238 	    resume_pop_fs[], resume_pop_gs[];
239 	struct trapframe *vframe;
240 	int resume;
241 	union sigval sv;
242 	vaddr_t gdt_cs = SEGDESC_LIMIT(curcpu()->ci_gdt[GUCODE_SEL].sd);
243 	uint32_t cr2 = rcr2();
244 
245 	uvmexp.traps++;
246 
247 #ifdef DEBUG
248 	if (trapdebug) {
249 		printf("trap %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
250 		    frame->tf_trapno, frame->tf_err, frame->tf_eip,
251 		    frame->tf_cs, frame->tf_eflags, cr2, lapic_tpr);
252 		printf("curproc %p\n", curproc);
253 	}
254 #endif
255 #ifdef DIAGNOSTIC
256 	if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
257 		u_int ef = read_eflags();
258 		if (ef & PSL_AC) {
259 			write_eflags(ef & ~PSL_AC);
260 			panic("%s: AC set on entry", "trap");
261 		}
262 	}
263 #endif
264 
265 	if (!KERNELMODE(frame->tf_cs, frame->tf_eflags)) {
266 		type |= T_USER;
267 		p->p_md.md_regs = frame;
268 		refreshcreds(p);
269 	}
270 
271 	switch (type) {
272 
273 	/* trace trap */
274 	case T_PROTFLT:
275 	case T_SEGNPFLT:
276 	case T_ALIGNFLT:
277 		/* Check for copyin/copyout fault. */
278 		if (p && p->p_addr) {
279 			pcb = &p->p_addr->u_pcb;
280 			if (pcb->pcb_onfault != 0) {
281 				frame->tf_eip = (int)pcb->pcb_onfault;
282 				return;
283 			}
284 		}
285 
286 		/*
287 		 * Check for failure during return to user mode.
288 		 *
289 		 * We do this by looking at the instruction we faulted on.  The
290 		 * specific instructions we recognize only happen when
291 		 * returning from a trap, syscall, or interrupt.
292 		 *
293 		 * XXX
294 		 * The heuristic used here will currently fail for the case of
295 		 * one of the 2 pop instructions faulting when returning from a
296 		 * a fast interrupt.  This should not be possible.  It can be
297 		 * fixed by rearranging the trap frame so that the stack format
298 		 * at this point is the same as on exit from a `slow'
299 		 * interrupt.
300 		 */
301 		switch (*(u_char *)frame->tf_eip) {
302 		case 0xcf:	/* iret */
303 			vframe = (void *)((int)&frame->tf_esp -
304 			    offsetof(struct trapframe, tf_eip));
305 			resume = (int)resume_iret;
306 			break;
307 		case 0x1f:	/* popl %ds */
308 			vframe = (void *)((int)&frame->tf_esp -
309 			    offsetof(struct trapframe, tf_ds));
310 			resume = (int)resume_pop_ds;
311 			break;
312 		case 0x07:	/* popl %es */
313 			vframe = (void *)((int)&frame->tf_esp -
314 			    offsetof(struct trapframe, tf_es));
315 			resume = (int)resume_pop_es;
316 			break;
317 		case 0x0f:	/* 0x0f prefix */
318 			switch (*(u_char *)(frame->tf_eip + 1)) {
319 			case 0xa1:		/* popl %fs */
320 				vframe = (void *)((int)&frame->tf_esp -
321 				    offsetof(struct trapframe, tf_fs));
322 				resume = (int)resume_pop_fs;
323 				break;
324 			case 0xa9:		/* popl %gs */
325 				vframe = (void *)((int)&frame->tf_esp -
326 				    offsetof(struct trapframe, tf_gs));
327 				resume = (int)resume_pop_gs;
328 				break;
329 			default:
330 				goto we_re_toast;
331 			}
332 			break;
333 		default:
334 			goto we_re_toast;
335 		}
336 		if (KERNELMODE(vframe->tf_cs, vframe->tf_eflags))
337 			goto we_re_toast;
338 
339 		frame->tf_eip = resume;
340 		return;
341 
342 	case T_PROTFLT|T_USER:		/* protection fault */
343 		/* If pmap_exec_fixup does something, let's retry the trap. */
344 		if (cpu_pae == 0 &&
345 		    pmap_exec_fixup(&p->p_vmspace->vm_map, frame, gdt_cs,
346 		    &p->p_addr->u_pcb))
347 			goto out;
348 
349 		sv.sival_int = frame->tf_eip;
350 		trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
351 		goto out;
352 
353 	case T_TSSFLT|T_USER:
354 		sv.sival_int = frame->tf_eip;
355 		trapsignal(p, SIGBUS, type &~ T_USER, BUS_OBJERR, sv);
356 		goto out;
357 
358 	case T_SEGNPFLT|T_USER:
359 	case T_STKFLT|T_USER:
360 		sv.sival_int = frame->tf_eip;
361 		trapsignal(p, SIGSEGV, type &~ T_USER, SEGV_MAPERR, sv);
362 		goto out;
363 
364 	case T_ALIGNFLT|T_USER:
365 		sv.sival_int = frame->tf_eip;
366 		trapsignal(p, SIGBUS, type &~ T_USER, BUS_ADRALN, sv);
367 		goto out;
368 
369 	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
370 		sv.sival_int = frame->tf_eip;
371 		trapsignal(p, SIGILL, type &~ T_USER, ILL_PRVOPC, sv);
372 		goto out;
373 
374 	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
375 		sv.sival_int = frame->tf_eip;
376 		trapsignal(p, SIGILL, type &~ T_USER, ILL_COPROC, sv);
377 		goto out;
378 
379 	case T_DNA|T_USER: {
380 		printf("pid %d killed due to lack of floating point\n",
381 		    p->p_p->ps_pid);
382 		sv.sival_int = frame->tf_eip;
383 		trapsignal(p, SIGKILL, type &~ T_USER, FPE_FLTINV, sv);
384 		goto out;
385 	}
386 
387 	case T_BOUND|T_USER:
388 		sv.sival_int = frame->tf_eip;
389 		trapsignal(p, SIGFPE, type &~ T_USER, FPE_FLTSUB, sv);
390 		goto out;
391 	case T_OFLOW|T_USER:
392 		sv.sival_int = frame->tf_eip;
393 		trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTOVF, sv);
394 		goto out;
395 	case T_DIVIDE|T_USER:
396 		sv.sival_int = frame->tf_eip;
397 		trapsignal(p, SIGFPE, type &~ T_USER, FPE_INTDIV, sv);
398 		goto out;
399 
400 	case T_ARITHTRAP|T_USER:
401 		sv.sival_int = frame->tf_eip;
402 		trapsignal(p, SIGFPE, frame->tf_err, FPE_INTOVF, sv);
403 		goto out;
404 
405 	case T_XFTRAP|T_USER:
406 		npxtrap(frame);
407 		goto out;
408 
409 	case T_PAGEFLT:
410 		if (kpageflttrap(frame, cr2))
411 			return;
412 		goto we_re_toast;
413 
414 	case T_PAGEFLT|T_USER: {	/* page fault */
415 		if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p),
416 		    "[%s]%d/%d sp=%lx inside %lx-%lx: not MAP_STACK\n",
417 		    uvm_map_inentry_sp, p->p_vmspace->vm_map.sserial))
418 			goto out;
419 		if (upageflttrap(frame, cr2))
420 			goto out;
421 		goto we_re_toast;
422 	}
423 
424 #if 0  /* Should this be left out?  */
425 #if !defined(DDB)
426 	/* XXX need to deal with this when DDB is present, too */
427 	case T_TRCTRAP: /* kernel trace trap; someone single stepping lcall's */
428 			/* syscall has to turn off the trace bit itself */
429 		return;
430 #endif
431 #endif
432 
433 	case T_BPTFLT|T_USER:		/* bpt instruction fault */
434 		sv.sival_int = cr2;
435 		trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_BRKPT, sv);
436 		break;
437 	case T_TRCTRAP|T_USER:		/* trace trap */
438 		sv.sival_int = cr2;
439 		trapsignal(p, SIGTRAP, type &~ T_USER, TRAP_TRACE, sv);
440 		break;
441 
442 #if NISA > 0
443 	case T_NMI:
444 	case T_NMI|T_USER:
445 #ifdef DDB
446 		/* NMI can be hooked up to a pushbutton for debugging */
447 		printf ("NMI ... going to debugger\n");
448 		if (db_ktrap(type, 0, frame))
449 			return;
450 #endif
451 		/* machine/parity/power fail/"kitchen sink" faults */
452 		if (isa_nmi() == 0)
453 			return;
454 		else
455 			goto we_re_toast;
456 #endif
457 
458 	case T_TRCTRAP:
459 #ifndef DDB
460 		return;	/* Just return if no kernel debugger */
461 #endif
462 		/* FALLTHROUGH */
463 	default:
464 	we_re_toast:
465 #ifdef DDB
466 		if (db_ktrap(type, frame->tf_err, frame))
467 			return;
468 #endif
469 		if (frame->tf_trapno < trap_types)
470 			printf("fatal %s (%d)", trap_type[frame->tf_trapno],
471 			    frame->tf_trapno);
472 		else
473 			printf("unknown trap %d", frame->tf_trapno);
474 		printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
475 		printf("trap type %d code %x eip %x cs %x eflags %x cr2 %x cpl %x\n",
476 		    type, frame->tf_err, frame->tf_eip, frame->tf_cs,
477 		    frame->tf_eflags, cr2, lapic_tpr);
478 
479 		panic("trap type %d, code=%x, pc=%x",
480 		    type, frame->tf_err, frame->tf_eip);
481 		/*NOTREACHED*/
482 	}
483 
484 	if ((type & T_USER) == 0)
485 		return;
486 out:
487 	userret(p);
488 }
489 
490 
491 /*
492  * ast(frame):
493  *	AST handler.  This is called from assembly language stubs when
494  *	returning to userspace after a syscall, trap, or interrupt.
495  */
496 void
497 ast(struct trapframe *frame)
498 {
499 	struct proc *p = curproc;
500 
501 	uvmexp.traps++;
502 	KASSERT(!KERNELMODE(frame->tf_cs, frame->tf_eflags));
503 	p->p_md.md_regs = frame;
504 	refreshcreds(p);
505 	uvmexp.softs++;
506 	mi_ast(p, curcpu()->ci_want_resched);
507 	userret(p);
508 }
509 
510 
511 /*
512  * syscall(frame):
513  *	System call request from POSIX system call gate interface to kernel.
514  */
515 void
516 syscall(struct trapframe *frame)
517 {
518 	caddr_t params;
519 	const struct sysent *callp = sysent;
520 	struct proc *p = curproc;
521 	int error;
522 	register_t code, args[8], rval[2];
523 #ifdef DIAGNOSTIC
524 	int ocpl = lapic_tpr;
525 #endif
526 	short argsize;
527 
528 	uvmexp.syscalls++;
529 #ifdef DIAGNOSTIC
530 	if (!USERMODE(frame->tf_cs, frame->tf_eflags))
531 		panic("syscall");
532 #endif
533 #ifdef DIAGNOSTIC
534 	if (curcpu()->ci_feature_sefflags_ebx & SEFF0EBX_SMAP) {
535 		u_int ef = read_eflags();
536 		if (ef & PSL_AC) {
537 			write_eflags(ef & ~PSL_AC);
538 			panic("%s: AC set on entry", "syscall");
539 		}
540 	}
541 #endif
542 
543 	p->p_md.md_regs = frame;
544 
545 	code = frame->tf_eax;
546 	// XXX out of range stays on syscall0, which we assume is enosys
547 	if (code > 0 && code < SYS_MAXSYSCALL)
548 		callp += code;
549 
550 	argsize = callp->sy_argsize;
551 	params = (caddr_t)frame->tf_esp + sizeof(int);
552 	if (argsize && (error = copyin(params, args, argsize)))
553 		goto bad;
554 
555 	rval[0] = 0;
556 	rval[1] = frame->tf_edx;
557 
558 	error = mi_syscall(p, code, callp, args, rval);
559 
560 	switch (error) {
561 	case 0:
562 		frame->tf_eax = rval[0];
563 		frame->tf_edx = rval[1];
564 		frame->tf_eflags &= ~PSL_C;	/* carry bit */
565 		break;
566 	case ERESTART:
567 		/* Back up over the int$80 (2 bytes) that made the syscall */
568 		frame->tf_eip -= 2;
569 		break;
570 	case EJUSTRETURN:
571 		/* nothing to do */
572 		break;
573 	default:
574 	bad:
575 		frame->tf_eax = error;
576 		frame->tf_eflags |= PSL_C;	/* carry bit */
577 		break;
578 	}
579 
580 	mi_syscall_return(p, code, error, rval);
581 
582 #ifdef DIAGNOSTIC
583 	if (lapic_tpr != ocpl) {
584 		printf("WARNING: SPL (0x%x) NOT LOWERED ON "
585 		    "syscall(0x%lx, 0x%lx, 0x%lx, 0x%lx...) EXIT, PID %d\n",
586 		    lapic_tpr, code, args[0], args[1], args[2],
587 		    p->p_p->ps_pid);
588 		lapic_tpr = ocpl;
589 	}
590 #endif
591 }
592 
593 void
594 child_return(void *arg)
595 {
596 	struct proc *p = (struct proc *)arg;
597 	struct trapframe *tf = p->p_md.md_regs;
598 
599 	tf->tf_eax = 0;
600 	tf->tf_eflags &= ~PSL_C;
601 
602 	KERNEL_UNLOCK();
603 
604 	mi_child_return(p);
605 }
606