xref: /plan9-contrib/sys/src/9/pc/trap.c (revision 178702b161d3fe3e021aa6cb2f305be898e56ca0)
1 #include	"u.h"
2 #include	"tos.h"
3 #include	"../port/lib.h"
4 #include	"mem.h"
5 #include	"dat.h"
6 #include	"fns.h"
7 #include	"io.h"
8 #include	"ureg.h"
9 #include	"../port/error.h"
10 #include	<trace.h>
11 
12 static int trapinited;
13 
14 void	noted(Ureg*, ulong);
15 
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21 
22 static Lock vctllock;
23 static Vctl *vctl[256];
24 
25 enum
26 {
27 	Ntimevec = 20		/* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30 
31 void
32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34 	int vno;
35 	Vctl *v;
36 
37 	if(f == nil){
38 		print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39 			irq, tbdf, name);
40 		return;
41 	}
42 
43 	v = xalloc(sizeof(Vctl));
44 	v->isintr = 1;
45 	v->irq = irq;
46 	v->tbdf = tbdf;
47 	v->f = f;
48 	v->a = a;
49 	strncpy(v->name, name, KNAMELEN-1);
50 	v->name[KNAMELEN-1] = 0;
51 
52 	ilock(&vctllock);
53 	vno = arch->intrenable(v);
54 	if(vno == -1){
55 		iunlock(&vctllock);
56 		print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57 			irq, tbdf, v->name);
58 		xfree(v);
59 		return;
60 	}
61 	if(vctl[vno]){
62 		if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63 			panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64 				vctl[vno]->name, v->name,
65 				vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66 		v->next = vctl[vno];
67 	}
68 	vctl[vno] = v;
69 	iunlock(&vctllock);
70 }
71 
72 int
73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74 {
75 	Vctl **pv, *v;
76 	int vno;
77 
78 	/*
79 	 * For now, none of this will work with the APIC code,
80 	 * there is no mapping between irq and vector as the IRQ
81 	 * is pretty meaningless.
82 	 */
83 	if(arch->intrvecno == nil)
84 		return -1;
85 	vno = arch->intrvecno(irq);
86 	ilock(&vctllock);
87 	pv = &vctl[vno];
88 	while (*pv &&
89 		  ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90 		   strcmp((*pv)->name, name)))
91 		pv = &((*pv)->next);
92 	assert(*pv);
93 
94 	v = *pv;
95 	*pv = (*pv)->next;	/* Link out the entry */
96 
97 	if(vctl[vno] == nil && arch->intrdisable != nil)
98 		arch->intrdisable(irq);
99 	iunlock(&vctllock);
100 	xfree(v);
101 	return 0;
102 }
103 
104 static long
105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
106 {
107 	char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108 	int m, vno;
109 	long oldn;
110 	Vctl *v;
111 
112 	if(n < 0 || offset < 0)
113 		error(Ebadarg);
114 
115 	oldn = n;
116 	buf = vbuf;
117 	for(vno=0; vno<nelem(vctl); vno++){
118 		for(v=vctl[vno]; v; v=v->next){
119 			m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120 			if(m <= offset)	/* if do not want this, skip entry */
121 				offset -= m;
122 			else{
123 				/* skip offset bytes */
124 				m -= offset;
125 				p = str+offset;
126 				offset = 0;
127 
128 				/* write at most max(n,m) bytes */
129 				if(m > n)
130 					m = n;
131 				memmove(buf, p, m);
132 				n -= m;
133 				buf += m;
134 
135 				if(n == 0)
136 					return oldn;
137 			}
138 		}
139 	}
140 	return oldn - n;
141 }
142 
143 void
144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145 {
146 	Vctl *v;
147 
148 	if(vno < 0 || vno >= VectorPIC)
149 		panic("trapenable: vno %d", vno);
150 	v = xalloc(sizeof(Vctl));
151 	v->tbdf = BUSUNKNOWN;
152 	v->f = f;
153 	v->a = a;
154 	strncpy(v->name, name, KNAMELEN);
155 	v->name[KNAMELEN-1] = 0;
156 
157 	ilock(&vctllock);
158 	if(vctl[vno])
159 		v->next = vctl[vno]->next;
160 	vctl[vno] = v;
161 	iunlock(&vctllock);
162 }
163 
164 static void
165 nmienable(void)
166 {
167 	int x;
168 
169 	/*
170 	 * Hack: should be locked with NVRAM access.
171 	 */
172 	outb(0x70, 0x80);		/* NMI latch clear */
173 	outb(0x70, 0);
174 
175 	x = inb(0x61) & 0x07;		/* Enable NMI */
176 	outb(0x61, 0x08|x);
177 	outb(0x61, x);
178 }
179 
180 /*
181  * Minimal trap setup.  Just enough so that we can panic
182  * on traps (bugs) during kernel initialization.
183  * Called very early - malloc is not yet available.
184  */
185 void
186 trapinit0(void)
187 {
188 	int d1, v;
189 	ulong vaddr;
190 	Segdesc *idt;
191 
192 	idt = (Segdesc*)IDTADDR;
193 	vaddr = (ulong)vectortable;
194 	for(v = 0; v < 256; v++){
195 		d1 = (vaddr & 0xFFFF0000)|SEGP;
196 		switch(v){
197 
198 		case VectorBPT:
199 			d1 |= SEGPL(3)|SEGIG;
200 			break;
201 
202 		case VectorSYSCALL:
203 			d1 |= SEGPL(3)|SEGIG;
204 			break;
205 
206 		default:
207 			d1 |= SEGPL(0)|SEGIG;
208 			break;
209 		}
210 		idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
211 		idt[v].d1 = d1;
212 		vaddr += 6;
213 	}
214 }
215 
216 void
217 trapinit(void)
218 {
219 	/*
220 	 * Special traps.
221 	 * Syscall() is called directly without going through trap().
222 	 */
223 	trapenable(VectorBPT, debugbpt, 0, "debugpt");
224 	trapenable(VectorPF, fault386, 0, "fault386");
225 	trapenable(Vector2F, doublefault, 0, "doublefault");
226 	trapenable(Vector15, unexpected, 0, "unexpected");
227 	nmienable();
228 
229 	addarchfile("irqalloc", 0444, irqallocread, nil);
230 	trapinited = 1;
231 }
232 
233 static char* excname[32] = {
234 	"divide error",
235 	"debug exception",
236 	"nonmaskable interrupt",
237 	"breakpoint",
238 	"overflow",
239 	"bounds check",
240 	"invalid opcode",
241 	"coprocessor not available",
242 	"double fault",
243 	"coprocessor segment overrun",
244 	"invalid TSS",
245 	"segment not present",
246 	"stack exception",
247 	"general protection violation",
248 	"page fault",
249 	"15 (reserved)",
250 	"coprocessor error",
251 	"alignment check",
252 	"machine check",
253 	"19 (reserved)",
254 	"20 (reserved)",
255 	"21 (reserved)",
256 	"22 (reserved)",
257 	"23 (reserved)",
258 	"24 (reserved)",
259 	"25 (reserved)",
260 	"26 (reserved)",
261 	"27 (reserved)",
262 	"28 (reserved)",
263 	"29 (reserved)",
264 	"30 (reserved)",
265 	"31 (reserved)",
266 };
267 
268 /*
269  *  keep histogram of interrupt service times
270  */
271 void
272 intrtime(Mach*, int vno)
273 {
274 	ulong diff;
275 	ulong x;
276 
277 	x = perfticks();
278 	diff = x - m->perf.intrts;
279 	m->perf.intrts = x;
280 
281 	m->perf.inintr += diff;
282 	if(up == nil && m->perf.inidle > diff)
283 		m->perf.inidle -= diff;
284 
285 	diff /= m->cpumhz*100;		/* quantum = 100µsec */
286 	if(diff >= Ntimevec)
287 		diff = Ntimevec-1;
288 	intrtimes[vno][diff]++;
289 }
290 
291 /* go to user space */
292 void
293 kexit(Ureg*)
294 {
295 	uvlong t;
296 	Tos *tos;
297 
298 	/* precise time accounting, kernel exit */
299 	tos = (Tos*)(USTKTOP-sizeof(Tos));
300 	cycles(&t);
301 	tos->kcycles += t - up->kentry;
302 	tos->pcycles = up->pcycles;
303 	tos->pid = up->pid;
304 }
305 
306 /*
307  *  All traps come here.  It is slower to have all traps call trap()
308  *  rather than directly vectoring the handler.  However, this avoids a
309  *  lot of code duplication and possible bugs.  The only exception is
310  *  VectorSYSCALL.
311  *  Trap is called with interrupts disabled via interrupt-gates.
312  */
313 void
314 trap(Ureg* ureg)
315 {
316 	int clockintr, i, vno, user;
317 	char buf[ERRMAX];
318 	Vctl *ctl, *v;
319 	Mach *mach;
320 
321 	if(!trapinited){
322 		/* fault386 can give a better error message */
323 		if(ureg->trap == VectorPF)
324 			fault386(ureg, nil);
325 		panic("trap %lud: not ready", ureg->trap);
326 	}
327 
328 	m->perf.intrts = perfticks();
329 	user = (ureg->cs & 0xFFFF) == UESEL;
330 	if(user){
331 		up->dbgreg = ureg;
332 		cycles(&up->kentry);
333 	}
334 
335 	clockintr = 0;
336 
337 	vno = ureg->trap;
338 	if(ctl = vctl[vno]){
339 		if(ctl->isintr){
340 			m->intr++;
341 			if(vno >= VectorPIC && vno != VectorSYSCALL)
342 				m->lastintr = ctl->irq;
343 		}
344 
345 		if(ctl->isr)
346 			ctl->isr(vno);
347 		for(v = ctl; v != nil; v = v->next){
348 			if(v->f)
349 				v->f(ureg, v->a);
350 		}
351 		if(ctl->eoi)
352 			ctl->eoi(vno);
353 
354 		if(ctl->isintr){
355 			intrtime(m, vno);
356 
357 			if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
358 				clockintr = 1;
359 
360 			if(up && !clockintr)
361 				preempted();
362 		}
363 	}
364 	else if(vno < nelem(excname) && user){
365 		spllo();
366 		sprint(buf, "sys: trap: %s", excname[vno]);
367 		postnote(up, 1, buf, NDebug);
368 	}
369 	else if(vno >= VectorPIC && vno != VectorSYSCALL){
370 		/*
371 		 * An unknown interrupt.
372 		 * Check for a default IRQ7. This can happen when
373 		 * the IRQ input goes away before the acknowledge.
374 		 * In this case, a 'default IRQ7' is generated, but
375 		 * the corresponding bit in the ISR isn't set.
376 		 * In fact, just ignore all such interrupts.
377 		 */
378 
379 		/* call all interrupt routines, just in case */
380 		for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
381 			ctl = vctl[i];
382 			if(ctl == nil)
383 				continue;
384 			if(!ctl->isintr)
385 				continue;
386 			for(v = ctl; v != nil; v = v->next){
387 				if(v->f)
388 					v->f(ureg, v->a);
389 			}
390 			/* should we do this? */
391 			if(ctl->eoi)
392 				ctl->eoi(i);
393 		}
394 
395 		/* clear the interrupt */
396 		i8259isr(vno);
397 
398 		if(0)print("cpu%d: spurious interrupt %d, last %d\n",
399 			m->machno, vno, m->lastintr);
400 		if(0)if(conf.nmach > 1){
401 			for(i = 0; i < 32; i++){
402 				if(!(active.machs & (1<<i)))
403 					continue;
404 				mach = MACHP(i);
405 				if(m->machno == mach->machno)
406 					continue;
407 				print(" cpu%d: last %d",
408 					mach->machno, mach->lastintr);
409 			}
410 			print("\n");
411 		}
412 		m->spuriousintr++;
413 		if(user)
414 			kexit(ureg);
415 		return;
416 	}
417 	else{
418 		if(vno == VectorNMI){
419 			/*
420 			 * Don't re-enable, it confuses the crash dumps.
421 			nmienable();
422 			 */
423 			iprint("cpu%d: NMI PC %#8.8lux\n", m->machno, ureg->pc);
424 			while(m->machno != 0)
425 				;
426 		}
427 		dumpregs(ureg);
428 		if(!user){
429 			ureg->sp = (ulong)&ureg->sp;
430 			_dumpstack(ureg);
431 		}
432 		if(vno < nelem(excname))
433 			panic("%s", excname[vno]);
434 		panic("unknown trap/intr: %d", vno);
435 	}
436 	splhi();
437 
438 	/* delaysched set because we held a lock or because our quantum ended */
439 	if(up && up->delaysched && clockintr){
440 		sched();
441 		splhi();
442 	}
443 
444 	if(user){
445 		if(up->procctl || up->nnote)
446 			notify(ureg);
447 		kexit(ureg);
448 	}
449 }
450 
451 /*
452  *  dump registers
453  */
454 void
455 dumpregs2(Ureg* ureg)
456 {
457 	if(up)
458 		iprint("cpu%d: registers for %s %lud\n",
459 			m->machno, up->text, up->pid);
460 	else
461 		iprint("cpu%d: registers for kernel\n", m->machno);
462 	iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
463 		ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
464 	iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
465 	iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
466 		ureg->ax, ureg->bx, ureg->cx, ureg->dx);
467 	iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
468 		ureg->si, ureg->di, ureg->bp);
469 	iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
470 		ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
471 		ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
472 }
473 
474 void
475 dumpregs(Ureg* ureg)
476 {
477 	vlong mca, mct;
478 
479 	dumpregs2(ureg);
480 
481 	/*
482 	 * Processor control registers.
483 	 * If machine check exception, time stamp counter, page size extensions
484 	 * or enhanced virtual 8086 mode extensions are supported, there is a
485 	 * CR4. If there is a CR4 and machine check extensions, read the machine
486 	 * check address and machine check type registers if RDMSR supported.
487 	 */
488 	iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
489 		getcr0(), getcr2(), getcr3());
490 	if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
491 		iprint(" CR4 %8.8lux", getcr4());
492 		if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){
493 			rdmsr(0x00, &mca);
494 			rdmsr(0x01, &mct);
495 			iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
496 		}
497 	}
498 	iprint("\n  ur %#p up %#p\n", ureg, up);
499 }
500 
501 
502 /*
503  * Fill in enough of Ureg to get a stack trace, and call a function.
504  * Used by debugging interface rdb.
505  */
506 void
507 callwithureg(void (*fn)(Ureg*))
508 {
509 	Ureg ureg;
510 	ureg.pc = getcallerpc(&fn);
511 	ureg.sp = (ulong)&fn;
512 	fn(&ureg);
513 }
514 
515 static void
516 _dumpstack(Ureg *ureg)
517 {
518 	uintptr l, v, i, estack;
519 	extern ulong etext;
520 	int x;
521 	char *s;
522 
523 	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
524 		iprint("dumpstack disabled\n");
525 		return;
526 	}
527 	iprint("dumpstack\n");
528 
529 	x = 0;
530 	x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
531 	i = 0;
532 	if(up
533 	&& (uintptr)&l >= (uintptr)up->kstack
534 	&& (uintptr)&l <= (uintptr)up->kstack+KSTACK)
535 		estack = (uintptr)up->kstack+KSTACK;
536 	else if((uintptr)&l >= (uintptr)m->stack
537 	&& (uintptr)&l <= (uintptr)m+MACHSIZE)
538 		estack = (uintptr)m+MACHSIZE;
539 	else
540 		return;
541 	x += iprint("estackx %p\n", estack);
542 
543 	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
544 		v = *(uintptr*)l;
545 		if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
546 			/*
547 			 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
548 			 * and CALL indirect through AX
549 			 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
550 			 * but this is too clever and misses faulting address.
551 			 */
552 			x += iprint("%.8p=%.8p ", l, v);
553 			i++;
554 		}
555 		if(i == 4){
556 			i = 0;
557 			x += iprint("\n");
558 		}
559 	}
560 	if(i)
561 		iprint("\n");
562 	iprint("EOF\n");
563 
564 	if(ureg->trap != VectorNMI)
565 		return;
566 
567 	i = 0;
568 	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
569 		iprint("%.8p ", *(uintptr*)l);
570 		if(++i == 8){
571 			i = 0;
572 			iprint("\n");
573 		}
574 	}
575 	if(i)
576 		iprint("\n");
577 }
578 
579 void
580 dumpstack(void)
581 {
582 	callwithureg(_dumpstack);
583 }
584 
585 static void
586 debugbpt(Ureg* ureg, void*)
587 {
588 	char buf[ERRMAX];
589 
590 	if(up == 0)
591 		panic("kernel bpt");
592 	/* restore pc to instruction that caused the trap */
593 	ureg->pc--;
594 	sprint(buf, "sys: breakpoint");
595 	postnote(up, 1, buf, NDebug);
596 }
597 
598 static void
599 doublefault(Ureg*, void*)
600 {
601 	panic("double fault");
602 }
603 
604 static void
605 unexpected(Ureg* ureg, void*)
606 {
607 	print("unexpected trap %lud; ignoring\n", ureg->trap);
608 }
609 
610 extern void checkpages(void);
611 extern void checkfault(ulong, ulong);
612 static void
613 fault386(Ureg* ureg, void*)
614 {
615 	ulong addr;
616 	int read, user, n, insyscall;
617 	char buf[ERRMAX];
618 
619 	addr = getcr2();
620 	read = !(ureg->ecode & 2);
621 
622 	user = (ureg->cs & 0xFFFF) == UESEL;
623 	if(!user){
624 		if(vmapsync(addr))
625 			return;
626 		if(addr >= USTKTOP)
627 			panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
628 		if(up == nil)
629 			panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
630 	}
631 	if(up == nil)
632 		panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
633 
634 	insyscall = up->insyscall;
635 	up->insyscall = 1;
636 	n = fault(addr, read);
637 	if(n < 0){
638 		if(!user){
639 			dumpregs(ureg);
640 			panic("fault: 0x%lux", addr);
641 		}
642 		checkpages();
643 		checkfault(addr, ureg->pc);
644 		sprint(buf, "sys: trap: fault %s addr=0x%lux",
645 			read ? "read" : "write", addr);
646 		postnote(up, 1, buf, NDebug);
647 	}
648 	up->insyscall = insyscall;
649 }
650 
651 /*
652  *  system calls
653  */
654 #include "../port/systab.h"
655 
656 /*
657  *  Syscall is called directly from assembler without going through trap().
658  */
659 void
660 syscall(Ureg* ureg)
661 {
662 	char *e;
663 	ulong	sp;
664 	long	ret;
665 	int	i, s;
666 	ulong scallnr;
667 	vlong startns, stopns;
668 
669 	if((ureg->cs & 0xFFFF) != UESEL)
670 		panic("syscall: cs 0x%4.4luX", ureg->cs);
671 
672 	cycles(&up->kentry);
673 
674 	m->syscall++;
675 	up->insyscall = 1;
676 	up->pc = ureg->pc;
677 	up->dbgreg = ureg;
678 
679 	sp = ureg->usp;
680 	scallnr = ureg->ax;
681 	up->scallnr = scallnr;
682 
683 	if(up->procctl == Proc_tracesyscall){
684 		/*
685 		 * Redundant validaddr.  Do we care?
686 		 * Tracing syscalls is not exactly a fast path...
687 		 * Beware, validaddr currently does a pexit rather
688 		 * than an error if there's a problem; that might
689 		 * change in the future.
690 		 */
691 		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
692 			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
693 
694 		syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
695 		up->procctl = Proc_stopme;
696 		procctl(up);
697 		if(up->syscalltrace)
698 			free(up->syscalltrace);
699 		up->syscalltrace = nil;
700 		startns = todget(nil);
701 	}
702 
703 	if(scallnr == RFORK && up->fpstate == FPactive){
704 		fpsave(&up->fpsave);
705 		up->fpstate = FPinactive;
706 	}
707 	spllo();
708 
709 	up->nerrlab = 0;
710 	ret = -1;
711 	if(!waserror()){
712 		if(scallnr >= nsyscall || systab[scallnr] == 0){
713 			pprint("bad sys call number %lud pc %lux\n",
714 				scallnr, ureg->pc);
715 			postnote(up, 1, "sys: bad sys call", NDebug);
716 			error(Ebadarg);
717 		}
718 
719 		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
720 			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
721 
722 		up->s = *((Sargs*)(sp+BY2WD));
723 		up->psstate = sysctab[scallnr];
724 
725 		ret = systab[scallnr](up->s.args);
726 		poperror();
727 	}else{
728 		/* failure: save the error buffer for errstr */
729 		e = up->syserrstr;
730 		up->syserrstr = up->errstr;
731 		up->errstr = e;
732 		if(0 && up->pid == 1)
733 			print("syscall %lud error %s\n", scallnr, up->syserrstr);
734 	}
735 	if(up->nerrlab){
736 		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
737 		for(i = 0; i < NERR; i++)
738 			print("sp=%lux pc=%lux\n",
739 				up->errlab[i].sp, up->errlab[i].pc);
740 		panic("error stack");
741 	}
742 
743 	/*
744 	 *  Put return value in frame.  On the x86 the syscall is
745 	 *  just another trap and the return value from syscall is
746 	 *  ignored.  On other machines the return value is put into
747 	 *  the results register by caller of syscall.
748 	 */
749 	ureg->ax = ret;
750 
751 	if(up->procctl == Proc_tracesyscall){
752 		stopns = todget(nil);
753 		up->procctl = Proc_stopme;
754 		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
755 		s = splhi();
756 		procctl(up);
757 		splx(s);
758 		if(up->syscalltrace)
759 			free(up->syscalltrace);
760 		up->syscalltrace = nil;
761 	}
762 
763 	up->insyscall = 0;
764 	up->psstate = 0;
765 
766 	if(scallnr == NOTED)
767 		noted(ureg, *(ulong*)(sp+BY2WD));
768 
769 	if(scallnr!=RFORK && (up->procctl || up->nnote)){
770 		splhi();
771 		notify(ureg);
772 	}
773 	/* if we delayed sched because we held a lock, sched now */
774 	if(up->delaysched)
775 		sched();
776 	kexit(ureg);
777 }
778 
779 /*
780  *  Call user, if necessary, with note.
781  *  Pass user the Ureg struct and the note on his stack.
782  */
783 int
784 notify(Ureg* ureg)
785 {
786 	int l;
787 	ulong s, sp;
788 	Note *n;
789 
790 	if(up->procctl)
791 		procctl(up);
792 	if(up->nnote == 0)
793 		return 0;
794 
795 	if(up->fpstate == FPactive){
796 		fpsave(&up->fpsave);
797 		up->fpstate = FPinactive;
798 	}
799 	up->fpstate |= FPillegal;
800 
801 	s = spllo();
802 	qlock(&up->debug);
803 	up->notepending = 0;
804 	n = &up->note[0];
805 	if(strncmp(n->msg, "sys:", 4) == 0){
806 		l = strlen(n->msg);
807 		if(l > ERRMAX-15)	/* " pc=0x12345678\0" */
808 			l = ERRMAX-15;
809 		sprint(n->msg+l, " pc=0x%.8lux", ureg->pc);
810 	}
811 
812 	if(n->flag!=NUser && (up->notified || up->notify==0)){
813 		if(n->flag == NDebug)
814 			pprint("suicide: %s\n", n->msg);
815 		qunlock(&up->debug);
816 		pexit(n->msg, n->flag!=NDebug);
817 	}
818 
819 	if(up->notified){
820 		qunlock(&up->debug);
821 		splhi();
822 		return 0;
823 	}
824 
825 	if(!up->notify){
826 		qunlock(&up->debug);
827 		pexit(n->msg, n->flag!=NDebug);
828 	}
829 	sp = ureg->usp;
830 	sp -= 256;	/* debugging: preserve context causing problem */
831 	sp -= sizeof(Ureg);
832 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
833 	up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
834 
835 	if(!okaddr((ulong)up->notify, 1, 0)
836 	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
837 		qunlock(&up->debug);
838 		pprint("suicide: bad address in notify\n");
839 		pexit("Suicide", 0);
840 	}
841 
842 	memmove((Ureg*)sp, ureg, sizeof(Ureg));
843 	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
844 	up->ureg = (void*)sp;
845 	sp -= BY2WD+ERRMAX;
846 	memmove((char*)sp, up->note[0].msg, ERRMAX);
847 	sp -= 3*BY2WD;
848 	*(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;		/* arg 2 is string */
849 	*(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;	/* arg 1 is ureg* */
850 	*(ulong*)(sp+0*BY2WD) = 0;			/* arg 0 is pc */
851 	ureg->usp = sp;
852 	ureg->pc = (ulong)up->notify;
853 	up->notified = 1;
854 	up->nnote--;
855 	memmove(&up->lastnote, &up->note[0], sizeof(Note));
856 	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
857 
858 	qunlock(&up->debug);
859 	splx(s);
860 	return 1;
861 }
862 
863 /*
864  *   Return user to state before notify()
865  */
866 void
867 noted(Ureg* ureg, ulong arg0)
868 {
869 	Ureg *nureg;
870 	ulong oureg, sp;
871 
872 	qlock(&up->debug);
873 	if(arg0!=NRSTR && !up->notified) {
874 		qunlock(&up->debug);
875 		pprint("call to noted() when not notified\n");
876 		pexit("Suicide", 0);
877 	}
878 	up->notified = 0;
879 
880 	nureg = up->ureg;	/* pointer to user returned Ureg struct */
881 
882 	up->fpstate &= ~FPillegal;
883 
884 	/* sanity clause */
885 	oureg = (ulong)nureg;
886 	if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
887 		qunlock(&up->debug);
888 		pprint("bad ureg in noted or call to noted when not notified\n");
889 		pexit("Suicide", 0);
890 	}
891 
892 	/*
893 	 * Check the segment selectors are all valid, otherwise
894 	 * a fault will be taken on attempting to return to the
895 	 * user process.
896 	 * Take care with the comparisons as different processor
897 	 * generations push segment descriptors in different ways.
898 	 */
899 	if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
900 	  || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
901 	  || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
902 		qunlock(&up->debug);
903 		pprint("bad segment selector in noted\n");
904 		pexit("Suicide", 0);
905 	}
906 
907 	/* don't let user change system flags */
908 	nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
909 
910 	memmove(ureg, nureg, sizeof(Ureg));
911 
912 	switch(arg0){
913 	case NCONT:
914 	case NRSTR:
915 if(0) print("%s %lud: noted %.8lux %.8lux\n",
916 	up->text, up->pid, nureg->pc, nureg->usp);
917 		if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
918 			qunlock(&up->debug);
919 			pprint("suicide: trap in noted\n");
920 			pexit("Suicide", 0);
921 		}
922 		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
923 		qunlock(&up->debug);
924 		break;
925 
926 	case NSAVE:
927 		if(!okaddr(nureg->pc, BY2WD, 0)
928 		|| !okaddr(nureg->usp, BY2WD, 0)){
929 			qunlock(&up->debug);
930 			pprint("suicide: trap in noted\n");
931 			pexit("Suicide", 0);
932 		}
933 		qunlock(&up->debug);
934 		sp = oureg-4*BY2WD-ERRMAX;
935 		splhi();
936 		ureg->sp = sp;
937 		((ulong*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
938 		((ulong*)sp)[0] = 0;		/* arg 0 is pc */
939 		break;
940 
941 	default:
942 		pprint("unknown noted arg 0x%lux\n", arg0);
943 		up->lastnote.flag = NDebug;
944 		/* fall through */
945 
946 	case NDFLT:
947 		if(up->lastnote.flag == NDebug){
948 			qunlock(&up->debug);
949 			pprint("suicide: %s\n", up->lastnote.msg);
950 		} else
951 			qunlock(&up->debug);
952 		pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
953 	}
954 }
955 
956 long
957 execregs(ulong entry, ulong ssize, ulong nargs)
958 {
959 	ulong *sp;
960 	Ureg *ureg;
961 
962 	up->fpstate = FPinit;
963 	fpoff();
964 
965 	sp = (ulong*)(USTKTOP - ssize);
966 	*--sp = nargs;
967 
968 	ureg = up->dbgreg;
969 	ureg->usp = (ulong)sp;
970 	ureg->pc = entry;
971 	return USTKTOP-sizeof(Tos);		/* address of kernel/user shared data */
972 }
973 
974 /*
975  *  return the userpc the last exception happened at
976  */
977 ulong
978 userpc(void)
979 {
980 	Ureg *ureg;
981 
982 	ureg = (Ureg*)up->dbgreg;
983 	return ureg->pc;
984 }
985 
986 /* This routine must save the values of registers the user is not permitted
987  * to write from devproc and then restore the saved values before returning.
988  */
989 void
990 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
991 {
992 	ulong cs, ds, es, flags, fs, gs, ss;
993 
994 	ss = ureg->ss;
995 	flags = ureg->flags;
996 	cs = ureg->cs;
997 	ds = ureg->ds;
998 	es = ureg->es;
999 	fs = ureg->fs;
1000 	gs = ureg->gs;
1001 	memmove(pureg, uva, n);
1002 	ureg->gs = gs;
1003 	ureg->fs = fs;
1004 	ureg->es = es;
1005 	ureg->ds = ds;
1006 	ureg->cs = cs;
1007 	ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
1008 	ureg->ss = ss;
1009 }
1010 
1011 static void
1012 linkproc(void)
1013 {
1014 	spllo();
1015 	up->kpfun(up->kparg);
1016 	pexit("kproc dying", 0);
1017 }
1018 
1019 void
1020 kprocchild(Proc* p, void (*func)(void*), void* arg)
1021 {
1022 	/*
1023 	 * gotolabel() needs a word on the stack in
1024 	 * which to place the return PC used to jump
1025 	 * to linkproc().
1026 	 */
1027 	p->sched.pc = (ulong)linkproc;
1028 	p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1029 
1030 	p->kpfun = func;
1031 	p->kparg = arg;
1032 }
1033 
1034 void
1035 forkchild(Proc *p, Ureg *ureg)
1036 {
1037 	Ureg *cureg;
1038 
1039 	/*
1040 	 * Add 2*BY2WD to the stack to account for
1041 	 *  - the return PC
1042 	 *  - trap's argument (ur)
1043 	 */
1044 	p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1045 	p->sched.pc = (ulong)forkret;
1046 
1047 	cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1048 	memmove(cureg, ureg, sizeof(Ureg));
1049 	/* return value of syscall in child */
1050 	cureg->ax = 0;
1051 
1052 	/* Things from bottom of syscall which were never executed */
1053 	p->psstate = 0;
1054 	p->insyscall = 0;
1055 }
1056 
1057 /* Give enough context in the ureg to produce a kernel stack for
1058  * a sleeping process
1059  */
1060 void
1061 setkernur(Ureg* ureg, Proc* p)
1062 {
1063 	ureg->pc = p->sched.pc;
1064 	ureg->sp = p->sched.sp+4;
1065 }
1066 
1067 ulong
1068 dbgpc(Proc *p)
1069 {
1070 	Ureg *ureg;
1071 
1072 	ureg = p->dbgreg;
1073 	if(ureg == 0)
1074 		return 0;
1075 
1076 	return ureg->pc;
1077 }
1078