xref: /netbsd-src/sys/arch/i386/i386/trap.c (revision 68fa58437753598de948829082f591c269b48777)
1 
2 /*	$NetBSD: trap.c,v 1.309 2023/10/05 19:41:04 ad Exp $	*/
3 
4 /*-
5  * Copyright (c) 1998, 2000, 2005, 2006, 2007, 2008 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Charles M. Hannum.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*-
34  * Copyright (c) 1990 The Regents of the University of California.
35  * All rights reserved.
36  *
37  * This code is derived from software contributed to Berkeley by
38  * the University of Utah, and William Jolitz.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)trap.c	7.4 (Berkeley) 5/13/91
65  */
66 
67 /*
68  * 386 Trap and System call handling
69  */
70 
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: trap.c,v 1.309 2023/10/05 19:41:04 ad Exp $");
73 
74 #include "opt_ddb.h"
75 #include "opt_kgdb.h"
76 #include "opt_lockdebug.h"
77 #include "opt_multiprocessor.h"
78 #include "opt_xen.h"
79 #include "opt_dtrace.h"
80 #include "opt_compat_netbsd.h"
81 
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/proc.h>
85 #include <sys/acct.h>
86 #include <sys/kauth.h>
87 #include <sys/kernel.h>
88 #include <sys/kmem.h>
89 #include <sys/ras.h>
90 #include <sys/signal.h>
91 #include <sys/syscall.h>
92 #include <sys/cpu.h>
93 #include <sys/ucontext.h>
94 
95 #include <uvm/uvm_extern.h>
96 
97 #include <machine/cpufunc.h>
98 #include <machine/psl.h>
99 #include <machine/reg.h>
100 #include <machine/trap.h>
101 #include <machine/userret.h>
102 #include <machine/db_machdep.h>
103 #include <machine/pmap_private.h>
104 
105 #include "mca.h"
106 #if NMCA > 0
107 #include <machine/mca_machdep.h>
108 #endif
109 
110 #include <x86/dbregs.h>
111 #include <x86/nmi.h>
112 
113 #include "isa.h"
114 
115 #include <sys/kgdb.h>
116 
117 #ifdef KDTRACE_HOOKS
118 #include <sys/dtrace_bsd.h>
119 
120 /*
121  * This is a hook which is initialized by the dtrace module
122  * to handle traps which might occur during DTrace probe
123  * execution.
124  */
125 dtrace_trap_func_t	dtrace_trap_func = NULL;
126 
127 dtrace_doubletrap_func_t	dtrace_doubletrap_func = NULL;
128 #endif
129 
130 void trap(struct trapframe *);
131 void trap_tss(struct i386tss *, int, int);
132 void trap_return_fault_return(struct trapframe *) __dead;
133 #ifndef XENPV
134 int ss_shadow(struct trapframe *tf);
135 #endif
136 
137 const char * const trap_type[] = {
138 	"privileged instruction fault",		/*  0 T_PRIVINFLT */
139 	"breakpoint trap",			/*  1 T_BPTFLT */
140 	"arithmetic trap",			/*  2 T_ARITHTRAP */
141 	"asynchronous system trap",		/*  3 T_ASTFLT */
142 	"protection fault",			/*  4 T_PROTFLT */
143 	"trace trap",				/*  5 T_TRCTRAP */
144 	"page fault",				/*  6 T_PAGEFLT */
145 	"alignment fault",			/*  7 T_ALIGNFLT */
146 	"integer divide fault",			/*  8 T_DIVIDE */
147 	"non-maskable interrupt",		/*  9 T_NMI */
148 	"overflow trap",			/* 10 T_OFLOW */
149 	"bounds check fault",			/* 11 T_BOUND */
150 	"FPU not available fault",		/* 12 T_DNA */
151 	"double fault",				/* 13 T_DOUBLEFLT */
152 	"FPU operand fetch fault",		/* 14 T_FPOPFLT */
153 	"invalid TSS fault",			/* 15 T_TSSFLT */
154 	"segment not present fault",		/* 16 T_SEGNPFLT */
155 	"stack fault",				/* 17 T_STKFLT */
156 	"machine check fault",			/* 18 T_MCA */
157 	"SSE FP exception",			/* 19 T_XMM */
158 	"reserved trap",			/* 20 T_RESERVED */
159 };
160 int	trap_types = __arraycount(trap_type);
161 
162 #ifdef DEBUG
163 int	trapdebug = 0;
164 #endif
165 
166 #define	IDTVEC(name)	__CONCAT(X, name)
167 
168 #ifdef TRAP_SIGDEBUG
169 static void sigdebug(const struct trapframe *, const ksiginfo_t *, int);
170 #define SIGDEBUG(a, b, c) sigdebug(a, b, c)
171 #else
172 #define SIGDEBUG(a, b, c)
173 #endif
174 
175 void
trap_tss(struct i386tss * tss,int trapno,int code)176 trap_tss(struct i386tss *tss, int trapno, int code)
177 {
178 	struct trapframe tf;
179 
180 	tf.tf_gs = tss->tss_gs;
181 	tf.tf_fs = tss->tss_fs;
182 	tf.tf_es = tss->__tss_es;
183 	tf.tf_ds = tss->__tss_ds;
184 	tf.tf_edi = tss->__tss_edi;
185 	tf.tf_esi = tss->__tss_esi;
186 	tf.tf_ebp = tss->tss_ebp;
187 	tf.tf_ebx = tss->__tss_ebx;
188 	tf.tf_edx = tss->__tss_edx;
189 	tf.tf_ecx = tss->__tss_ecx;
190 	tf.tf_eax = tss->__tss_eax;
191 	tf.tf_trapno = trapno;
192 	tf.tf_err = code | TC_TSS;
193 	tf.tf_eip = tss->__tss_eip;
194 	tf.tf_cs = tss->__tss_cs;
195 	tf.tf_eflags = tss->__tss_eflags;
196 	tf.tf_esp = tss->tss_esp;
197 	tf.tf_ss = tss->__tss_ss;
198 	trap(&tf);
199 }
200 
201 static void *
onfault_handler(const struct pcb * pcb,const struct trapframe * tf)202 onfault_handler(const struct pcb *pcb, const struct trapframe *tf)
203 {
204 	struct onfault_table {
205 		uintptr_t start;
206 		uintptr_t end;
207 		void *handler;
208 	};
209 	extern const struct onfault_table onfault_table[];
210 	const struct onfault_table *p;
211 	uintptr_t pc;
212 
213 	if (pcb->pcb_onfault != NULL) {
214 		return pcb->pcb_onfault;
215 	}
216 
217 	pc = tf->tf_eip;
218 	for (p = onfault_table; p->start; p++) {
219 		if (p->start <= pc && pc < p->end) {
220 			return p->handler;
221 		}
222 	}
223 	return NULL;
224 }
225 
226 static void
trap_print(const struct trapframe * frame,const lwp_t * l)227 trap_print(const struct trapframe *frame, const lwp_t *l)
228 {
229 	const int type = frame->tf_trapno;
230 
231 	if (frame->tf_trapno < trap_types) {
232 		printf("fatal %s", trap_type[type]);
233 	} else {
234 		printf("unknown trap %d", type);
235 	}
236 	printf(" in %s mode\n", (type & T_USER) ? "user" : "supervisor");
237 
238 	printf("trap type %d code %#x eip %#x cs %#x eflags %#x cr2 %#lx "
239 	    "ilevel %#x esp %#x\n",
240 	    type, frame->tf_err, frame->tf_eip, frame->tf_cs, frame->tf_eflags,
241 	    (long)rcr2(), curcpu()->ci_ilevel, frame->tf_esp);
242 
243 	printf("curlwp %p pid %d lid %d lowest kstack %p\n",
244 	    l, l->l_proc->p_pid, l->l_lid, KSTACK_LOWEST_ADDR(l));
245 }
246 
247 #ifndef XENPV
248 int
ss_shadow(struct trapframe * tf)249 ss_shadow(struct trapframe *tf)
250 {
251 	struct gate_descriptor *gd;
252 	struct cpu_info *ci;
253 	struct idt_vec *iv;
254 	idt_descriptor_t *idt;
255 	uintptr_t eip, func;
256 	size_t i;
257 
258 	eip = tf->tf_eip;
259 	ci = curcpu();
260 	iv = idt_vec_ref(&ci->ci_idtvec);
261 	idt = iv->iv_idt;
262 
263 	for (i = 0; i < 256; i++) {
264 		gd = &idt[i];
265 		func = (gd->gd_hioffset << 16) | gd->gd_looffset;
266 		if (eip == func)
267 			return 1;
268 	}
269 
270 	return 0;
271 }
272 #endif
273 
274 /*
275  * trap(frame): exception, fault, and trap interface to BSD kernel.
276  *
277  * This common code is called from assembly language IDT gate entry routines
278  * that prepare a suitable stack frame, and restore this frame after the
279  * exception has been processed. Note that the effect is as if the arguments
280  * were passed call by reference.
281  */
282 void
trap(struct trapframe * frame)283 trap(struct trapframe *frame)
284 {
285 	struct lwp *l = curlwp;
286 	struct proc *p;
287 	struct pcb *pcb;
288 	extern char kcopy_fault[], return_address_fault[];
289 	struct trapframe *vframe;
290 	ksiginfo_t ksi;
291 	void *onfault;
292 	int type, error = 0;
293 	uint32_t cr2;
294 	bool pfail;
295 
296 	if (__predict_true(l != NULL)) {
297 		pcb = lwp_getpcb(l);
298 		p = l->l_proc;
299 	} else {
300 		/*
301 		 * this can happen eg. on break points in early on boot.
302 		 */
303 		pcb = NULL;
304 		p = NULL;
305 	}
306 	type = frame->tf_trapno;
307 
308 #ifdef DEBUG
309 	if (trapdebug) {
310 		trap_print(frame, l);
311 	}
312 #endif
313 	if (type != T_NMI && !KERNELMODE(frame->tf_cs)) {
314 		type |= T_USER;
315 		l->l_md.md_regs = frame;
316 		pcb->pcb_cr2 = 0;
317 	}
318 
319 #ifdef KDTRACE_HOOKS
320 	/*
321 	 * A trap can occur while DTrace executes a probe. Before
322 	 * executing the probe, DTrace blocks re-scheduling and sets
323 	 * a flag in its per-cpu flags to indicate that it doesn't
324 	 * want to fault. On returning from the probe, the no-fault
325 	 * flag is cleared and finally re-scheduling is enabled.
326 	 *
327 	 * If the DTrace kernel module has registered a trap handler,
328 	 * call it and if it returns non-zero, assume that it has
329 	 * handled the trap and modified the trap frame so that this
330 	 * function can return normally.
331 	 */
332 	if ((type == T_PROTFLT || type == T_PAGEFLT) &&
333 	    dtrace_trap_func != NULL) {
334 		if ((*dtrace_trap_func)(frame, type)) {
335 			return;
336 		}
337 	}
338 #endif
339 
340 	switch (type) {
341 
342 	default:
343 	we_re_toast:
344 		trap_print(frame, l);
345 
346 		if (kdb_trap(type, 0, frame))
347 			return;
348 		if (kgdb_trap(type, frame))
349 			return;
350 		/*
351 		 * If this is a breakpoint, don't panic if we're not connected.
352 		 */
353 		if (type == T_BPTFLT && kgdb_disconnected()) {
354 			printf("kgdb: ignored %s\n", trap_type[type]);
355 			return;
356 		}
357 		panic("trap");
358 		/*NOTREACHED*/
359 
360 	case T_PROTFLT:
361 	case T_SEGNPFLT:
362 	case T_ALIGNFLT:
363 	case T_STKFLT:
364 	case T_TSSFLT:
365 		if (p == NULL)
366 			goto we_re_toast;
367 		/* Check for copyin/copyout fault. */
368 		onfault = onfault_handler(pcb, frame);
369 		if (onfault != NULL) {
370 copyefault:
371 			error = EFAULT;
372 copyfault:
373 			frame->tf_eip = (uintptr_t)onfault;
374 			frame->tf_eax = error;
375 			return;
376 		}
377 
378 		/*
379 		 * Check for failure during return to user mode.
380 		 * This can happen loading invalid values into the segment
381 		 * registers, or during the 'iret' itself.
382 		 *
383 		 * We do this by looking at the instruction we faulted on.
384 		 * The specific instructions we recognize only happen when
385 		 * returning from a trap, syscall, or interrupt.
386 		 */
387 
388 kernelfault:
389 		KSI_INIT_TRAP(&ksi);
390 		ksi.ksi_signo = SIGSEGV;
391 		ksi.ksi_code = SEGV_ACCERR;
392 		ksi.ksi_trap = type;
393 
394 		switch (*(u_char *)frame->tf_eip) {
395 		case 0xcf:	/* iret */
396 			/*
397 			 * The 'iret' instruction faulted, so we have the
398 			 * 'user' registers saved after the kernel %eip:%cs:%fl
399 			 * of the 'iret' and below that the user %eip:%cs:%fl
400 			 * the 'iret' was processing.
401 			 * We must delete the 3 words of kernel return address
402 			 * from the stack to generate a normal stack frame
403 			 * (eg for sending a SIGSEGV).
404 			 */
405 			vframe = (void *)((int *)frame + 3);
406 			if (KERNELMODE(vframe->tf_cs))
407 				goto we_re_toast;
408 			memmove(vframe, frame,
409 			    offsetof(struct trapframe, tf_eip));
410 			/* Set the faulting address to the user %eip */
411 			ksi.ksi_addr = (void *)vframe->tf_eip;
412 			break;
413 		case 0x8e:
414 			switch (*(uint32_t *)frame->tf_eip) {
415 			case 0x8e242c8e:	/* mov (%esp,%gs), then */
416 			case 0x0424648e:	/* mov 0x4(%esp),%fs */
417 			case 0x0824448e:	/* mov 0x8(%esp),%es */
418 			case 0x0c245c8e:	/* mov 0xc(%esp),%ds */
419 				break;
420 			default:
421 				goto we_re_toast;
422 			}
423 			/*
424 			 * We faulted loading one of the user segment registers.
425 			 * The stack frame containing the user registers is
426 			 * still valid and is just below the %eip:%cs:%fl of
427 			 * the kernel fault frame.
428 			 */
429 			vframe = (void *)(&frame->tf_eflags + 1);
430 			if (KERNELMODE(vframe->tf_cs))
431 				goto we_re_toast;
432 			/* There is no valid address for the fault */
433 			break;
434 		default:
435 			goto we_re_toast;
436 		}
437 		/*
438 		 * We might have faulted trying to execute the
439 		 * trampoline for a local (nested) signal handler.
440 		 * Only generate SIGSEGV if the user %cs isn't changed.
441 		 * (This is only strictly necessary in the 'iret' case.)
442 		 */
443 		if (!pmap_exec_fixup(&p->p_vmspace->vm_map, vframe, pcb)) {
444 			/* Save outer frame for any signal return */
445 			l->l_md.md_regs = vframe;
446 			SIGDEBUG(vframe, &ksi, error);
447 			(*p->p_emul->e_trapsignal)(l, &ksi);
448 		}
449 		/* Return to user by reloading the user frame */
450 		trap_return_fault_return(vframe);
451 		/* NOTREACHED */
452 
453 	case T_PROTFLT|T_USER:		/* protection fault */
454 #if defined(COMPAT_10) || defined(COMPAT_NOMID)
455 	{
456 #define LCALLSZ 7
457 		/* Check for the osyscall lcall instruction. */
458 		if (frame->tf_eip < VM_MAXUSER_ADDRESS - LCALLSZ &&
459 		    x86_cpu_is_lcall((const void *)frame->tf_eip) == 0) {
460 
461 			/* Advance past the lcall. */
462 			frame->tf_eip += LCALLSZ;
463 
464 			/* Do the syscall. */
465 			p->p_md.md_syscall(frame);
466 			goto out;
467 		}
468 	}
469 #endif
470 		/* FALLTHROUGH */
471 	case T_TSSFLT|T_USER:
472 	case T_SEGNPFLT|T_USER:
473 	case T_STKFLT|T_USER:
474 	case T_ALIGNFLT|T_USER:
475 		KSI_INIT_TRAP(&ksi);
476 
477 		ksi.ksi_addr = (void *)rcr2();
478 		switch (type) {
479 		case T_SEGNPFLT|T_USER:
480 		case T_STKFLT|T_USER:
481 			ksi.ksi_signo = SIGBUS;
482 			ksi.ksi_code = BUS_ADRERR;
483 			break;
484 		case T_TSSFLT|T_USER:
485 			ksi.ksi_signo = SIGBUS;
486 			ksi.ksi_code = BUS_OBJERR;
487 			break;
488 		case T_ALIGNFLT|T_USER:
489 			ksi.ksi_signo = SIGBUS;
490 			ksi.ksi_code = BUS_ADRALN;
491 			break;
492 		case T_PROTFLT|T_USER:
493 			/*
494 			 * If pmap_exec_fixup does something,
495 			 * let's retry the trap.
496 			 */
497 			if (pmap_exec_fixup(&p->p_vmspace->vm_map, frame, pcb)){
498 				goto out;
499 			}
500 			ksi.ksi_signo = SIGSEGV;
501 			ksi.ksi_code = SEGV_ACCERR;
502 			break;
503 		default:
504 			KASSERT(0);
505 			break;
506 		}
507 		goto trapsignal;
508 
509 	case T_PRIVINFLT|T_USER:	/* privileged instruction fault */
510 	case T_FPOPFLT|T_USER:		/* coprocessor operand fault */
511 		KSI_INIT_TRAP(&ksi);
512 		ksi.ksi_signo = SIGILL;
513 		ksi.ksi_addr = (void *) frame->tf_eip;
514 		switch (type) {
515 		case T_PRIVINFLT|T_USER:
516 			ksi.ksi_code = ILL_PRVOPC;
517 			break;
518 		case T_FPOPFLT|T_USER:
519 			ksi.ksi_code = ILL_COPROC;
520 			break;
521 		default:
522 			ksi.ksi_code = 0;
523 			break;
524 		}
525 		goto trapsignal;
526 
527 	case T_ASTFLT|T_USER:
528 		/* Allow process switch. */
529 		//curcpu()->ci_data.cpu_nast++;
530 		if (l->l_pflag & LP_OWEUPC) {
531 			l->l_pflag &= ~LP_OWEUPC;
532 			ADDUPROF(l);
533 		}
534 		goto out;
535 
536 	case T_BOUND|T_USER:
537 	case T_OFLOW|T_USER:
538 	case T_DIVIDE|T_USER:
539 		KSI_INIT_TRAP(&ksi);
540 		ksi.ksi_signo = SIGFPE;
541 		ksi.ksi_addr = (void *)frame->tf_eip;
542 		switch (type) {
543 		case T_BOUND|T_USER:
544 			ksi.ksi_code = FPE_FLTSUB;
545 			break;
546 		case T_OFLOW|T_USER:
547 			ksi.ksi_code = FPE_INTOVF;
548 			break;
549 		case T_DIVIDE|T_USER:
550 			ksi.ksi_code = FPE_INTDIV;
551 			break;
552 		default:
553 			ksi.ksi_code = 0;
554 			break;
555 		}
556 		goto trapsignal;
557 
558 	case T_PAGEFLT:
559 		/* Allow page faults in kernel mode. */
560 		if (__predict_false(l == NULL))
561 			goto we_re_toast;
562 
563 		onfault = pcb->pcb_onfault;
564 		if (onfault == return_address_fault) {
565 			goto copyefault;
566 		}
567 		if (cpu_intr_p() || (l->l_pflag & LP_INTR) != 0) {
568 			goto we_re_toast;
569 		}
570 
571 		cr2 = rcr2();
572 
573 		if (frame->tf_err & PGEX_I) {
574 			/* SMEP might have brought us here */
575 			if (cr2 > VM_MIN_ADDRESS && cr2 <= VM_MAXUSER_ADDRESS) {
576 				printf("prevented execution of %p (SMEP)\n",
577 				    (void *)cr2);
578 				goto we_re_toast;
579 			}
580 		}
581 
582 		if ((frame->tf_err & PGEX_P) &&
583 		    cr2 < VM_MAXUSER_ADDRESS) {
584 			/* SMAP might have brought us here */
585 			if (onfault_handler(pcb, frame) == NULL) {
586 				printf("prevented access to %p (SMAP)\n",
587 				    (void *)cr2);
588 				goto we_re_toast;
589 			}
590 		}
591 
592 		goto faultcommon;
593 
594 	case T_PAGEFLT|T_USER: {	/* page fault */
595 		register vaddr_t va;
596 		register struct vmspace *vm;
597 		register struct vm_map *map;
598 		vm_prot_t ftype;
599 		extern struct vm_map *kernel_map;
600 
601 		cr2 = rcr2();
602 faultcommon:
603 		vm = p->p_vmspace;
604 		if (__predict_false(vm == NULL)) {
605 			goto we_re_toast;
606 		}
607 		pcb->pcb_cr2 = cr2;
608 		va = trunc_page((vaddr_t)cr2);
609 		/*
610 		 * It is only a kernel address space fault iff:
611 		 *	1. (type & T_USER) == 0  and
612 		 *	2. pcb_onfault not set or
613 		 *	3. pcb_onfault set but supervisor space fault
614 		 * The last can occur during an exec() copyin where the
615 		 * argument space is lazy-allocated.
616 		 */
617 		if (type == T_PAGEFLT && va >= VM_MIN_KERNEL_ADDRESS)
618 			map = kernel_map;
619 		else
620 			map = &vm->vm_map;
621 		if (frame->tf_err & PGEX_W)
622 			ftype = VM_PROT_WRITE;
623 		else if (frame->tf_err & PGEX_I)
624 			ftype = VM_PROT_EXECUTE;
625 		else
626 			ftype = VM_PROT_READ;
627 
628 #ifdef DIAGNOSTIC
629 		if (map == kernel_map && va == 0) {
630 			printf("trap: bad kernel access at %lx\n", va);
631 			goto we_re_toast;
632 		}
633 #endif
634 		/* Fault the original page in. */
635 		onfault = pcb->pcb_onfault;
636 		pcb->pcb_onfault = NULL;
637 		error = uvm_fault(map, va, ftype);
638 		pcb->pcb_onfault = onfault;
639 		if (error == 0) {
640 			if (map != kernel_map && (void *)va >= vm->vm_maxsaddr)
641 				uvm_grow(p, va);
642 
643 			pfail = false;
644 			while (type == T_PAGEFLT) {
645 				/*
646 				 * we need to switch pmap now if we're in
647 				 * the middle of copyin/out.
648 				 *
649 				 * but we don't need to do so for kcopy as
650 				 * it never touch userspace.
651  				 */
652 				kpreempt_disable();
653 				if (curcpu()->ci_want_pmapload) {
654 					onfault = onfault_handler(pcb, frame);
655 					if (onfault != kcopy_fault) {
656 						pmap_load();
657 					}
658 				}
659 				/*
660 				 * We need to keep the pmap loaded and
661 				 * so avoid being preempted until back
662 				 * into the copy functions.  Disable
663 				 * interrupts at the hardware level before
664 				 * re-enabling preemption.  Interrupts
665 				 * will be re-enabled by 'iret' when
666 				 * returning back out of the trap stub.
667 				 * They'll only be re-enabled when the
668 				 * program counter is once again in
669 				 * the copy functions, and so visible
670 				 * to cpu_kpreempt_exit().
671 				 */
672 #ifndef XENPV
673 				x86_disable_intr();
674 #endif
675 				l->l_nopreempt--;
676 				if (l->l_nopreempt > 0 || !l->l_dopreempt ||
677 				    pfail) {
678 					return;
679 				}
680 #ifndef XENPV
681 				x86_enable_intr();
682 #endif
683 				/*
684 				 * If preemption fails for some reason,
685 				 * don't retry it.  The conditions won't
686 				 * change under our nose.
687 				 */
688 				pfail = kpreempt(0);
689 			}
690 			goto out;
691 		}
692 
693 		if (type == T_PAGEFLT) {
694 			onfault = onfault_handler(pcb, frame);
695 			if (onfault != NULL)
696 				goto copyfault;
697 			printf("uvm_fault(%p, %#lx, %d) -> %#x\n",
698 			    map, va, ftype, error);
699 			goto kernelfault;
700 		}
701 
702 		KSI_INIT_TRAP(&ksi);
703 		ksi.ksi_trap = type & ~T_USER;
704 		ksi.ksi_addr = (void *)cr2;
705 		switch (error) {
706 		case EINVAL:
707 			ksi.ksi_signo = SIGBUS;
708 			ksi.ksi_code = BUS_ADRERR;
709 			break;
710 		case EACCES:
711 			ksi.ksi_signo = SIGSEGV;
712 			ksi.ksi_code = SEGV_ACCERR;
713 			error = EFAULT;
714 			break;
715 		case ENOMEM:
716 			ksi.ksi_signo = SIGKILL;
717 			printf("UVM: pid %d.%d (%s), uid %d killed: "
718 			    "out of swap\n", p->p_pid, l->l_lid, p->p_comm,
719 			    l->l_cred ?  kauth_cred_geteuid(l->l_cred) : -1);
720 			break;
721 		default:
722 			ksi.ksi_signo = SIGSEGV;
723 			ksi.ksi_code = SEGV_MAPERR;
724 			break;
725 		}
726 
727 		SIGDEBUG(frame, &ksi, error);
728 		(*p->p_emul->e_trapsignal)(l, &ksi);
729 		break;
730 	}
731 
732 	case T_TRCTRAP:
733 		/*
734 		 * Ignore debug register trace traps due to
735 		 * accesses in the user's address space, which
736 		 * can happen under several conditions such as
737 		 * if a user sets a watchpoint on a buffer and
738 		 * then passes that buffer to a system call.
739 		 * We still want to get TRCTRAPS for addresses
740 		 * in kernel space because that is useful when
741 		 * debugging the kernel.
742 		 */
743 		if (x86_dbregs_user_trap())
744 			break;
745 
746 		goto we_re_toast;
747 
748 	case T_BPTFLT|T_USER:		/* bpt instruction fault */
749 	case T_TRCTRAP|T_USER:		/* trace trap */
750 		/*
751 		 * Don't go single-stepping into a RAS.
752 		 */
753 		if (p->p_raslist == NULL ||
754 		    (ras_lookup(p, (void *)frame->tf_eip) == (void *)-1)) {
755 			KSI_INIT_TRAP(&ksi);
756 			ksi.ksi_signo = SIGTRAP;
757 			ksi.ksi_trap = type & ~T_USER;
758 			if (x86_dbregs_user_trap()) {
759 				x86_dbregs_store_dr6(l);
760 				ksi.ksi_code = TRAP_DBREG;
761 			} else if (type == (T_BPTFLT|T_USER))
762 				ksi.ksi_code = TRAP_BRKPT;
763 			else
764 				ksi.ksi_code = TRAP_TRACE;
765 			ksi.ksi_addr = (void *)frame->tf_eip;
766 			SIGDEBUG(frame, &ksi, error);
767 			(*p->p_emul->e_trapsignal)(l, &ksi);
768 		}
769 		break;
770 
771 	case T_NMI:
772 		if (nmi_dispatch(frame))
773 			return;
774 		/* NMI can be hooked up to a pushbutton for debugging */
775 		if (kgdb_trap(type, frame))
776 			return;
777 		if (kdb_trap(type, 0, frame))
778 			return;
779 		/* machine/parity/power fail/"kitchen sink" faults */
780 #if NMCA > 0
781 		mca_nmi();
782 #endif
783 		x86_nmi();
784 	}
785 
786 	if ((type & T_USER) == 0)
787 		return;
788 out:
789 	userret(l);
790 	return;
791 trapsignal:
792 	ksi.ksi_trap = type & ~T_USER;
793 	SIGDEBUG(frame, &ksi, error);
794 	(*p->p_emul->e_trapsignal)(l, &ksi);
795 	userret(l);
796 }
797 
798 /*
799  * startlwp: start of a new LWP.
800  */
801 void
startlwp(void * arg)802 startlwp(void *arg)
803 {
804 	ucontext_t *uc = arg;
805 	lwp_t *l = curlwp;
806 	int error __diagused;
807 
808 	error = cpu_setmcontext(l, &uc->uc_mcontext, uc->uc_flags);
809 	KASSERT(error == 0);
810 
811 	kmem_free(uc, sizeof(ucontext_t));
812 	userret(l);
813 }
814 
815 #ifdef TRAP_SIGDEBUG
816 static void
frame_dump(const struct trapframe * tf,const struct pcb * pcb)817 frame_dump(const struct trapframe *tf, const struct pcb *pcb)
818 {
819 	uint64_t fsd, gsd;
820 
821 	printf("trapframe %p\n", tf);
822 	printf("eip 0x%08x  esp 0x%08x  efl 0x%08x\n",
823 	    tf->tf_eip, tf->tf_esp, tf->tf_eflags);
824 	printf("edi 0x%08x  esi 0x%08x  edx 0x%08x\n",
825 	    tf->tf_edi, tf->tf_esi, tf->tf_edx);
826 	printf("ecx 0x%08x\n",
827 	    tf->tf_ecx);
828 	printf("ebp 0x%08x  ebx 0x%08x  eax 0x%08x\n",
829 	    tf->tf_ebp, tf->tf_ebx, tf->tf_eax);
830 	printf("cs 0x%04x  ds 0x%04x  es 0x%04x  "
831 	       "fs 0x%04x  gs 0x%04x  ss 0x%04x\n",
832 		tf->tf_cs & 0xffff, tf->tf_ds & 0xffff, tf->tf_es & 0xffff,
833 		tf->tf_fs & 0xffff, tf->tf_gs & 0xffff, tf->tf_ss & 0xffff);
834 	memcpy(&fsd, &pcb->pcb_fsd, sizeof(fsd));
835 	memcpy(&gsd, &pcb->pcb_gsd, sizeof(gsd));
836 	printf("fsbase 0x%016llx gsbase 0x%016llx\n", fsd, gsd);
837 	printf("\n");
838 	hexdump(printf, "Stack dump", tf, 256);
839 }
840 
841 static void
sigdebug(const struct trapframe * tf,const ksiginfo_t * ksi,int e)842 sigdebug(const struct trapframe *tf, const ksiginfo_t *ksi, int e)
843 {
844 	struct lwp *l = curlwp;
845 	struct proc *p = l->l_proc;
846 
847 	printf("pid %d.%d (%s): signal %d code=%d (trap %x) "
848 	    "@eip %#x addr %#x error=%d\n",
849 	    p->p_pid, l->l_lid, p->p_comm, ksi->ksi_signo, ksi->ksi_code,
850 	    tf->tf_trapno, tf->tf_eip, rcr2(), e);
851 	frame_dump(tf, lwp_getpcb(l));
852 }
853 #endif
854