1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * the University of Utah, and William Jolitz.
7 *
8 * %sccs.include.redist.c%
9 *
10 * @(#)trap.c 8.4 (Berkeley) 09/23/93
11 */
12
13 /*
14 * 386 Trap and System call handleing
15 */
16
17 #include <machine/cpu.h>
18 #include <machine/psl.h>
19 #include <machine/reg.h>
20
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/proc.h>
24 #include <sys/user.h>
25 #include <sys/acct.h>
26 #include <sys/kernel.h>
27 #ifdef KTRACE
28 #include <sys/ktrace.h>
29 #endif
30
31 #include <vm/vm_param.h>
32 #include <vm/pmap.h>
33 #include <vm/vm_map.h>
34
35 #include <machine/trap.h>
36 #include <machine/dbg.h>
37
38
39 struct sysent sysent[];
40 int nsysent;
41 unsigned rcr2();
42 extern short cpl;
43
44
45 /*
46 * trap(frame):
47 * Exception, fault, and trap interface to BSD kernel. This
48 * common code is called from assembly language IDT gate entry
49 * routines that prepare a suitable stack frame, and restore this
50 * frame after the exception has been processed. Note that the
51 * effect is as if the arguments were passed call by reference.
52 */
53
54 /*ARGSUSED*/
55 trap(frame)
56 struct trapframe frame;
57 {
58 register int i;
59 register struct proc *p = curproc;
60 u_quad_t sticks;
61 int ucode, type, code, eva;
62 extern int cold;
63
64 if(cold) goto we_re_toast;
65 frame.tf_eflags &= ~PSL_NT; /* clear nested trap XXX */
66 type = frame.tf_trapno;
67
68 if (curpcb && curpcb->pcb_onfault && frame.tf_trapno != 0xc) {
69 copyfault: frame.tf_eip = (int)curpcb->pcb_onfault;
70 return;
71 }
72
73 if (ISPL(frame.tf_cs) == SEL_UPL) {
74 type |= T_USER;
75 p->p_md.md_regs = (int *)&frame;
76 curpcb->pcb_flags |= FM_TRAP; /* used by sendsig */
77 sticks = p->p_sticks;
78 }
79
80 ucode=0;
81 eva = rcr2();
82 code = frame.tf_err;
83 switch (type) {
84
85 default:
86 we_re_toast:
87 #ifdef KDB
88 if (kdb_trap(&psl))
89 return;
90 #endif
91
92 printf("trap type %d code = %x eip = %x cs = %x eflags = %x ",
93 frame.tf_trapno, frame.tf_err, frame.tf_eip,
94 frame.tf_cs, frame.tf_eflags);
95 printf("cr2 %x cpl %x\n", eva, cpl);
96 type &= ~T_USER;
97 panic("trap");
98 /*NOTREACHED*/
99
100 case T_SEGNPFLT|T_USER:
101 case T_STKFLT|T_USER: /* 386bsd */
102 case T_PROTFLT|T_USER: /* protection fault */
103 ucode = code + BUS_SEGM_FAULT ;
104 i = SIGBUS;
105 break;
106
107 case T_PRIVINFLT|T_USER: /* privileged instruction fault */
108 case T_RESADFLT|T_USER: /* reserved addressing fault */
109 case T_RESOPFLT|T_USER: /* reserved operand fault */
110 case T_FPOPFLT|T_USER: /* coprocessor operand fault */
111 ucode = type &~ T_USER;
112 i = SIGILL;
113 break;
114
115 case T_ASTFLT|T_USER: /* Allow process switch */
116 case T_ASTFLT:
117 astoff();
118 if ((p->p_flag & P_OWEUPC) && p->p_stats->p_prof.pr_scale) {
119 addupc(frame.tf_eip, &p->p_stats->p_prof, 1);
120 p->p_flag &= ~P_OWEUPC;
121 }
122 goto out;
123
124 case T_DNA|T_USER:
125 #include "npx.h"
126 #if NNPX > 0
127 /* if a transparent fault (due to context switch "late") */
128 if (npxdna()) return;
129 #endif
130 ucode = FPE_FPU_NP_TRAP;
131 i = SIGFPE;
132 break;
133
134 case T_BOUND|T_USER:
135 ucode = FPE_SUBRNG_TRAP;
136 i = SIGFPE;
137 break;
138
139 case T_OFLOW|T_USER:
140 ucode = FPE_INTOVF_TRAP;
141 i = SIGFPE;
142 break;
143
144 case T_DIVIDE|T_USER:
145 ucode = FPE_INTDIV_TRAP;
146 i = SIGFPE;
147 break;
148
149 case T_ARITHTRAP|T_USER:
150 ucode = code;
151 i = SIGFPE;
152 break;
153
154 case T_PAGEFLT: /* allow page faults in kernel mode */
155 if (code & PGEX_P) goto we_re_toast;
156
157 /* fall into */
158 case T_PAGEFLT|T_USER: /* page fault */
159 {
160 register vm_offset_t va;
161 register struct vmspace *vm = p->p_vmspace;
162 register vm_map_t map;
163 int rv;
164 vm_prot_t ftype;
165 extern vm_map_t kernel_map;
166
167 va = trunc_page((vm_offset_t)eva);
168 /*
169 * It is only a kernel address space fault iff:
170 * 1. (type & T_USER) == 0 and
171 * 2. pcb_onfault not set or
172 * 3. pcb_onfault set but supervisor space fault
173 * The last can occur during an exec() copyin where the
174 * argument space is lazy-allocated.
175 */
176 if (type == T_PAGEFLT && va >= 0xfe000000)
177 map = kernel_map;
178 else
179 map = &vm->vm_map;
180 if (code & PGEX_W)
181 ftype = VM_PROT_READ | VM_PROT_WRITE;
182 else
183 ftype = VM_PROT_READ;
184
185 rv = user_page_fault(p, map, va, ftype, type);
186
187 if (rv == KERN_SUCCESS) {
188 if (type == T_PAGEFLT)
189 return;
190 goto out;
191 }
192
193 if (type == T_PAGEFLT) {
194 if (curpcb->pcb_onfault)
195 goto copyfault;
196 printf("vm_fault(%x, %x, %x, 0) -> %x\n",
197 map, va, ftype, rv);
198 printf(" type %x, code %x\n",
199 type, code);
200 goto we_re_toast;
201 }
202 i = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
203 break;
204 }
205
206 case T_TRCTRAP: /* trace trap -- someone single stepping lcall's */
207 frame.tf_eflags &= ~PSL_T;
208
209 /* Q: how do we turn it on again? */
210 return;
211
212 case T_BPTFLT|T_USER: /* bpt instruction fault */
213 case T_TRCTRAP|T_USER: /* trace trap */
214 frame.tf_eflags &= ~PSL_T;
215 i = SIGTRAP;
216 break;
217
218 #include "isa.h"
219 #if NISA > 0
220 case T_NMI:
221 case T_NMI|T_USER:
222 /* machine/parity/power fail/"kitchen sink" faults */
223 if(isa_nmi(code) == 0) return;
224 else goto we_re_toast;
225 #endif
226 }
227
228 trapsignal(p, i, ucode);
229 if ((type & T_USER) == 0)
230 return;
231 out:
232 while (i = CURSIG(p))
233 postsig(i);
234 p->p_priority = p->p_usrpri;
235 if (want_resched) {
236 int pl;
237
238 /*
239 * Since we are curproc, clock will normally just change
240 * our priority without moving us from one queue to another
241 * (since the running process is not on a queue.)
242 * If that happened after we put ourselves on the run queue
243 * but before we switched, we might not be on the queue
244 * indicated by our priority.
245 */
246 pl = splclock();
247 setrunqueue(p);
248 p->p_stats->p_ru.ru_nivcsw++;
249 mi_switch();
250 splx(pl);
251 while (i = CURSIG(p))
252 postsig(i);
253 }
254 if (p->p_stats->p_prof.pr_scale) {
255 u_quad_t ticks = p->p_sticks - sticks;
256
257 if (ticks) {
258 #ifdef PROFTIMER
259 extern int profscale;
260 addupc(frame.tf_eip, &p->p_stats->p_prof,
261 ticks * profscale);
262 #else
263 addupc(frame.tf_eip, &p->p_stats->p_prof, ticks);
264 #endif
265 }
266 }
267 curpriority = p->p_priority;
268 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */
269 }
270
271 /*
272 * syscall(frame):
273 * System call request from POSIX system call gate interface to kernel.
274 * Like trap(), argument is call by reference.
275 */
276 /*ARGSUSED*/
syscall(frame)277 syscall(frame)
278 volatile struct syscframe frame;
279 {
280 register int *locr0 = ((int *)&frame);
281 register caddr_t params;
282 register int i;
283 register struct sysent *callp;
284 register struct proc *p = curproc;
285 u_quad_t sticks;
286 int error, opc;
287 int args[8], rval[2];
288 unsigned int code;
289
290 #ifdef lint
291 r0 = 0; r0 = r0; r1 = 0; r1 = r1;
292 #endif
293 sticks = p->p_sticks;
294 if (ISPL(frame.sf_cs) != SEL_UPL)
295 panic("syscall");
296
297 code = frame.sf_eax;
298 p->p_md.md_regs = (int *)&frame;
299 curpcb->pcb_flags &= ~FM_TRAP; /* used by sendsig */
300 params = (caddr_t)frame.sf_esp + sizeof (int) ;
301
302 /*
303 * Reconstruct pc, assuming lcall $X,y is 7 bytes, as it is always.
304 */
305 opc = frame.sf_eip - 7;
306 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
307 if (callp == sysent) {
308 code = fuword(params);
309 params += sizeof (int);
310 callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
311 }
312
313 if ((i = callp->sy_narg * sizeof (int)) &&
314 (error = copyin(params, (caddr_t)args, (u_int)i))) {
315 frame.sf_eax = error;
316 frame.sf_eflags |= PSL_C; /* carry bit */
317 #ifdef KTRACE
318 if (KTRPOINT(p, KTR_SYSCALL))
319 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
320 #endif
321 goto done;
322 }
323 #ifdef KTRACE
324 if (KTRPOINT(p, KTR_SYSCALL))
325 ktrsyscall(p->p_tracep, code, callp->sy_narg, &args);
326 #endif
327 rval[0] = 0;
328 rval[1] = frame.sf_edx;
329 error = (*callp->sy_call)(p, args, rval);
330 if (error == ERESTART)
331 frame.sf_eip = opc;
332 else if (error != EJUSTRETURN) {
333 if (error) {
334 frame.sf_eax = error;
335 frame.sf_eflags |= PSL_C; /* carry bit */
336 } else {
337 frame.sf_eax = rval[0];
338 frame.sf_edx = rval[1];
339 frame.sf_eflags &= ~PSL_C; /* carry bit */
340 }
341 }
342 /* else if (error == EJUSTRETURN) */
343 /* nothing to do */
344 done:
345 /*
346 * Reinitialize proc pointer `p' as it may be different
347 * if this is a child returning from fork syscall.
348 */
349 p = curproc;
350 while (i = CURSIG(p))
351 postsig(i);
352 p->p_priority = p->p_usrpri;
353 if (want_resched) {
354 int pl;
355
356 /*
357 * Since we are curproc, clock will normally just change
358 * our priority without moving us from one queue to another
359 * (since the running process is not on a queue.)
360 * If that happened after we put ourselves on the run queue
361 * but before we switched, we might not be on the queue
362 * indicated by our priority.
363 */
364 pl = splclock();
365 setrunqueue(p);
366 p->p_stats->p_ru.ru_nivcsw++;
367 mi_switch();
368 splx(pl);
369 while (i = CURSIG(p))
370 postsig(i);
371 }
372 if (p->p_stats->p_prof.pr_scale) {
373 u_quad_t ticks = p->p_sticks - sticks;
374
375 if (ticks) {
376 #ifdef PROFTIMER
377 extern int profscale;
378 addupc(frame.sf_eip, &p->p_stats->p_prof,
379 ticks * profscale);
380 #else
381 addupc(frame.sf_eip, &p->p_stats->p_prof, ticks);
382 #endif
383 }
384 }
385 curpriority = p->p_priority;
386 #ifdef KTRACE
387 if (KTRPOINT(p, KTR_SYSRET))
388 ktrsysret(p->p_tracep, code, error, rval[0]);
389 #endif
390 }
391
392 int
user_page_fault(p,map,addr,ftype,type)393 user_page_fault (p, map, addr, ftype, type)
394 struct proc *p;
395 vm_map_t map;
396 caddr_t addr;
397 vm_prot_t ftype;
398 int type;
399 {
400 struct vmspace *vm;
401 vm_offset_t va;
402 int rv;
403 extern vm_map_t kernel_map;
404 unsigned nss, v;
405
406 vm = p->p_vmspace;
407
408 va = trunc_page((vm_offset_t)addr);
409
410 /*
411 * XXX: rude hack to make stack limits "work"
412 */
413 nss = 0;
414 if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
415 nss = clrnd(btoc(USRSTACK - (unsigned)va));
416 if (nss > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
417 return (KERN_FAILURE);
418 }
419
420 /* check if page table is mapped, if not, fault it first */
421 #define pde_v(v) (PTD[((v)>>PD_SHIFT)&1023].pd_v)
422 if (!pde_v(va)) {
423 v = trunc_page(vtopte(va));
424 if ((rv = vm_fault(map, v, ftype, FALSE)) != KERN_SUCCESS)
425 return (rv);
426 /* check if page table fault, increment wiring */
427 vm_map_pageable(map, v, round_page(v+1), FALSE);
428 } else
429 v = 0;
430
431 if ((rv = vm_fault(map, va, ftype, FALSE)) != KERN_SUCCESS)
432 return (rv);
433
434 /*
435 * XXX: continuation of rude stack hack
436 */
437 if (nss > vm->vm_ssize)
438 vm->vm_ssize = nss;
439 va = trunc_page(vtopte(va));
440 /*
441 * for page table, increment wiring
442 * as long as not a page table fault as well
443 */
444 if (!v && type != T_PAGEFLT)
445 vm_map_pageable(map, va, round_page(va+1), FALSE);
446 return (KERN_SUCCESS);
447 }
448
449 int
user_write_fault(addr)450 user_write_fault (addr)
451 void *addr;
452 {
453 if (user_page_fault (curproc, &curproc->p_vmspace->vm_map,
454 addr, VM_PROT_READ | VM_PROT_WRITE,
455 T_PAGEFLT) == KERN_SUCCESS)
456 return (0);
457 else
458 return (EFAULT);
459 }
460
461 int
copyout(from,to,len)462 copyout (from, to, len)
463 void *from;
464 void *to;
465 u_int len;
466 {
467 u_int *pte, *pde;
468 int rest_of_page;
469 int thistime;
470 int err;
471
472 /* be very careful not to overflow doing this check */
473 if (to >= (void *)USRSTACK || (void *)USRSTACK - to < len)
474 return (EFAULT);
475
476 pte = (u_int *)vtopte (to);
477 pde = (u_int *)vtopte (pte);
478
479 rest_of_page = PAGE_SIZE - ((int)to & (PAGE_SIZE - 1));
480
481 while (1) {
482 thistime = len;
483 if (thistime > rest_of_page)
484 thistime = rest_of_page;
485
486 if ((*pde & PG_V) == 0
487 || (*pte & (PG_V | PG_UW)) != (PG_V | PG_UW))
488 if (err = user_write_fault (to))
489 return (err);
490
491 bcopy (from, to, thistime);
492
493 len -= thistime;
494
495 /*
496 * Break out as soon as possible in the common case
497 * that the whole transfer is containted in one page.
498 */
499 if (len == 0)
500 break;
501
502 from += thistime;
503 to += thistime;
504 pte++;
505 pde = (u_int *)vtopte (pte);
506 rest_of_page = PAGE_SIZE;
507 }
508
509 return (0);
510 }
511