xref: /plan9/sys/src/9/pc/trap.c (revision fac6300f1f1b25611e114fc0bdda9cf428c13da4)
1 #include	"u.h"
2 #include	"tos.h"
3 #include	"../port/lib.h"
4 #include	"mem.h"
5 #include	"dat.h"
6 #include	"fns.h"
7 #include	"io.h"
8 #include	"ureg.h"
9 #include	"../port/error.h"
10 #include	<trace.h>
11 
12 static int trapinited;
13 
14 void	noted(Ureg*, ulong);
15 
16 static void debugbpt(Ureg*, void*);
17 static void fault386(Ureg*, void*);
18 static void doublefault(Ureg*, void*);
19 static void unexpected(Ureg*, void*);
20 static void _dumpstack(Ureg*);
21 
22 static Lock vctllock;
23 static Vctl *vctl[256];
24 
25 enum
26 {
27 	Ntimevec = 20		/* number of time buckets for each intr */
28 };
29 ulong intrtimes[256][Ntimevec];
30 
31 void
intrenable(int irq,void (* f)(Ureg *,void *),void * a,int tbdf,char * name)32 intrenable(int irq, void (*f)(Ureg*, void*), void* a, int tbdf, char *name)
33 {
34 	int vno;
35 	Vctl *v;
36 
37 	if(f == nil){
38 		print("intrenable: nil handler for %d, tbdf 0x%uX for %s\n",
39 			irq, tbdf, name);
40 		return;
41 	}
42 
43 	v = xalloc(sizeof(Vctl));
44 	v->isintr = 1;
45 	v->irq = irq;
46 	v->tbdf = tbdf;
47 	v->f = f;
48 	v->a = a;
49 	strncpy(v->name, name, KNAMELEN-1);
50 	v->name[KNAMELEN-1] = 0;
51 
52 	ilock(&vctllock);
53 	vno = arch->intrenable(v);
54 	if(vno == -1){
55 		iunlock(&vctllock);
56 		print("intrenable: couldn't enable irq %d, tbdf 0x%uX for %s\n",
57 			irq, tbdf, v->name);
58 		xfree(v);
59 		return;
60 	}
61 	if(vctl[vno]){
62 		if(vctl[vno]->isr != v->isr || vctl[vno]->eoi != v->eoi)
63 			panic("intrenable: handler: %s %s %#p %#p %#p %#p",
64 				vctl[vno]->name, v->name,
65 				vctl[vno]->isr, v->isr, vctl[vno]->eoi, v->eoi);
66 		v->next = vctl[vno];
67 	}
68 	vctl[vno] = v;
69 	iunlock(&vctllock);
70 }
71 
72 int
intrdisable(int irq,void (* f)(Ureg *,void *),void * a,int tbdf,char * name)73 intrdisable(int irq, void (*f)(Ureg *, void *), void *a, int tbdf, char *name)
74 {
75 	Vctl **pv, *v;
76 	int vno;
77 
78 	/*
79 	 * For now, none of this will work with the APIC code,
80 	 * there is no mapping between irq and vector as the IRQ
81 	 * is pretty meaningless.
82 	 */
83 	if(arch->intrvecno == nil)
84 		return -1;
85 	vno = arch->intrvecno(irq);
86 	ilock(&vctllock);
87 	pv = &vctl[vno];
88 	while (*pv &&
89 		  ((*pv)->irq != irq || (*pv)->tbdf != tbdf || (*pv)->f != f || (*pv)->a != a ||
90 		   strcmp((*pv)->name, name)))
91 		pv = &((*pv)->next);
92 	assert(*pv);
93 
94 	v = *pv;
95 	*pv = (*pv)->next;	/* Link out the entry */
96 
97 	if(vctl[vno] == nil && arch->intrdisable != nil)
98 		arch->intrdisable(irq);
99 	iunlock(&vctllock);
100 	xfree(v);
101 	return 0;
102 }
103 
104 static long
irqallocread(Chan *,void * vbuf,long n,vlong offset)105 irqallocread(Chan*, void *vbuf, long n, vlong offset)
106 {
107 	char *buf, *p, str[2*(11+1)+KNAMELEN+1+1];
108 	int m, vno;
109 	long oldn;
110 	Vctl *v;
111 
112 	if(n < 0 || offset < 0)
113 		error(Ebadarg);
114 
115 	oldn = n;
116 	buf = vbuf;
117 	for(vno=0; vno<nelem(vctl); vno++){
118 		for(v=vctl[vno]; v; v=v->next){
119 			m = snprint(str, sizeof str, "%11d %11d %.*s\n", vno, v->irq, KNAMELEN, v->name);
120 			if(m <= offset)	/* if do not want this, skip entry */
121 				offset -= m;
122 			else{
123 				/* skip offset bytes */
124 				m -= offset;
125 				p = str+offset;
126 				offset = 0;
127 
128 				/* write at most max(n,m) bytes */
129 				if(m > n)
130 					m = n;
131 				memmove(buf, p, m);
132 				n -= m;
133 				buf += m;
134 
135 				if(n == 0)
136 					return oldn;
137 			}
138 		}
139 	}
140 	return oldn - n;
141 }
142 
143 void
trapenable(int vno,void (* f)(Ureg *,void *),void * a,char * name)144 trapenable(int vno, void (*f)(Ureg*, void*), void* a, char *name)
145 {
146 	Vctl *v;
147 
148 	if(vno < 0 || vno >= VectorPIC)
149 		panic("trapenable: vno %d", vno);
150 	v = xalloc(sizeof(Vctl));
151 	v->tbdf = BUSUNKNOWN;
152 	v->f = f;
153 	v->a = a;
154 	strncpy(v->name, name, KNAMELEN);
155 	v->name[KNAMELEN-1] = 0;
156 
157 	ilock(&vctllock);
158 	v->next = vctl[vno];
159 	vctl[vno] = v;
160 	iunlock(&vctllock);
161 }
162 
163 static void
nmienable(void)164 nmienable(void)
165 {
166 	int x;
167 
168 	/*
169 	 * Hack: should be locked with NVRAM access.
170 	 */
171 	outb(0x70, 0x80);		/* NMI latch clear */
172 	outb(0x70, 0);
173 
174 	x = inb(0x61) & 0x07;		/* Enable NMI */
175 	outb(0x61, 0x08|x);
176 	outb(0x61, x);
177 }
178 
179 /*
180  * Minimal trap setup.  Just enough so that we can panic
181  * on traps (bugs) during kernel initialization.
182  * Called very early - malloc is not yet available.
183  */
184 void
trapinit0(void)185 trapinit0(void)
186 {
187 	int d1, v;
188 	ulong vaddr;
189 	Segdesc *idt;
190 
191 	idt = (Segdesc*)IDTADDR;
192 	vaddr = (ulong)vectortable;
193 	for(v = 0; v < 256; v++){
194 		d1 = (vaddr & 0xFFFF0000)|SEGP;
195 		switch(v){
196 
197 		case VectorBPT:
198 			d1 |= SEGPL(3)|SEGIG;
199 			break;
200 
201 		case VectorSYSCALL:
202 			d1 |= SEGPL(3)|SEGIG;
203 			break;
204 
205 		default:
206 			d1 |= SEGPL(0)|SEGIG;
207 			break;
208 		}
209 		idt[v].d0 = (vaddr & 0xFFFF)|(KESEL<<16);
210 		idt[v].d1 = d1;
211 		vaddr += 6;
212 	}
213 }
214 
215 void
trapinit(void)216 trapinit(void)
217 {
218 	/*
219 	 * Special traps.
220 	 * Syscall() is called directly without going through trap().
221 	 */
222 	trapenable(VectorBPT, debugbpt, 0, "debugpt");
223 	trapenable(VectorPF, fault386, 0, "fault386");
224 	trapenable(Vector2F, doublefault, 0, "doublefault");
225 	trapenable(Vector15, unexpected, 0, "unexpected");
226 	nmienable();
227 
228 	addarchfile("irqalloc", 0444, irqallocread, nil);
229 	trapinited = 1;
230 }
231 
232 static char* excname[32] = {
233 	"divide error",
234 	"debug exception",
235 	"nonmaskable interrupt",
236 	"breakpoint",
237 	"overflow",
238 	"bounds check",
239 	"invalid opcode",
240 	"coprocessor not available",
241 	"double fault",
242 	"coprocessor segment overrun",
243 	"invalid TSS",
244 	"segment not present",
245 	"stack exception",
246 	"general protection violation",
247 	"page fault",
248 	"15 (reserved)",
249 	"coprocessor error",
250 	"alignment check",
251 	"machine check",
252 	"19 (reserved)",
253 	"20 (reserved)",
254 	"21 (reserved)",
255 	"22 (reserved)",
256 	"23 (reserved)",
257 	"24 (reserved)",
258 	"25 (reserved)",
259 	"26 (reserved)",
260 	"27 (reserved)",
261 	"28 (reserved)",
262 	"29 (reserved)",
263 	"30 (reserved)",
264 	"31 (reserved)",
265 };
266 
267 /*
268  *  keep histogram of interrupt service times
269  */
270 void
intrtime(Mach *,int vno)271 intrtime(Mach*, int vno)
272 {
273 	ulong diff;
274 	ulong x;
275 
276 	x = perfticks();
277 	diff = x - m->perf.intrts;
278 	m->perf.intrts = x;
279 
280 	m->perf.inintr += diff;
281 	if(up == nil && m->perf.inidle > diff)
282 		m->perf.inidle -= diff;
283 
284 	diff /= m->cpumhz*100;		/* quantum = 100µsec */
285 	if(diff >= Ntimevec)
286 		diff = Ntimevec-1;
287 	intrtimes[vno][diff]++;
288 }
289 
290 /* go to user space */
291 void
kexit(Ureg *)292 kexit(Ureg*)
293 {
294 	uvlong t;
295 	Tos *tos;
296 
297 	/* precise time accounting, kernel exit */
298 	tos = (Tos*)(USTKTOP-sizeof(Tos));
299 	cycles(&t);
300 	tos->kcycles += t - up->kentry;
301 	tos->pcycles = up->pcycles;
302 	tos->pid = up->pid;
303 }
304 
305 /*
306  *  All traps come here.  It is slower to have all traps call trap()
307  *  rather than directly vectoring the handler.  However, this avoids a
308  *  lot of code duplication and possible bugs.  The only exception is
309  *  VectorSYSCALL.
310  *  Trap is called with interrupts disabled via interrupt-gates.
311  */
312 void
trap(Ureg * ureg)313 trap(Ureg* ureg)
314 {
315 	int clockintr, i, vno, user;
316 	char buf[ERRMAX];
317 	Vctl *ctl, *v;
318 	Mach *mach;
319 
320 	if(!trapinited){
321 		/* fault386 can give a better error message */
322 		if(ureg->trap == VectorPF)
323 			fault386(ureg, nil);
324 		panic("trap %lud: not ready", ureg->trap);
325 	}
326 
327 	m->perf.intrts = perfticks();
328 	user = (ureg->cs & 0xFFFF) == UESEL;
329 	if(user){
330 		up->dbgreg = ureg;
331 		cycles(&up->kentry);
332 	}
333 
334 	clockintr = 0;
335 
336 	vno = ureg->trap;
337 	if(ctl = vctl[vno]){
338 		if(ctl->isintr){
339 			m->intr++;
340 			if(vno >= VectorPIC && vno != VectorSYSCALL)
341 				m->lastintr = ctl->irq;
342 		}
343 
344 		if(ctl->isr)
345 			ctl->isr(vno);
346 		for(v = ctl; v != nil; v = v->next){
347 			if(v->f)
348 				v->f(ureg, v->a);
349 		}
350 		if(ctl->eoi)
351 			ctl->eoi(vno);
352 
353 		if(ctl->isintr){
354 			intrtime(m, vno);
355 
356 			if(ctl->irq == IrqCLOCK || ctl->irq == IrqTIMER)
357 				clockintr = 1;
358 
359 			if(up && !clockintr)
360 				preempted();
361 		}
362 	}
363 	else if(vno < nelem(excname) && user){
364 		spllo();
365 		snprint(buf, sizeof buf, "sys: trap: %s", excname[vno]);
366 		postnote(up, 1, buf, NDebug);
367 	}
368 	else if(vno >= VectorPIC && vno != VectorSYSCALL){
369 		/*
370 		 * An unknown interrupt.
371 		 * Check for a default IRQ7. This can happen when
372 		 * the IRQ input goes away before the acknowledge.
373 		 * In this case, a 'default IRQ7' is generated, but
374 		 * the corresponding bit in the ISR isn't set.
375 		 * In fact, just ignore all such interrupts.
376 		 */
377 
378 		/* call all interrupt routines, just in case */
379 		for(i = VectorPIC; i <= MaxIrqLAPIC; i++){
380 			ctl = vctl[i];
381 			if(ctl == nil)
382 				continue;
383 			if(!ctl->isintr)
384 				continue;
385 			for(v = ctl; v != nil; v = v->next){
386 				if(v->f)
387 					v->f(ureg, v->a);
388 			}
389 			/* should we do this? */
390 			if(ctl->eoi)
391 				ctl->eoi(i);
392 		}
393 
394 		/* clear the interrupt */
395 		i8259isr(vno);
396 
397 		if(0)print("cpu%d: spurious interrupt %d, last %d\n",
398 			m->machno, vno, m->lastintr);
399 		if(0)if(conf.nmach > 1){
400 			for(i = 0; i < 32; i++){
401 				if(!(active.machs & (1<<i)))
402 					continue;
403 				mach = MACHP(i);
404 				if(m->machno == mach->machno)
405 					continue;
406 				print(" cpu%d: last %d",
407 					mach->machno, mach->lastintr);
408 			}
409 			print("\n");
410 		}
411 		m->spuriousintr++;
412 		if(user)
413 			kexit(ureg);
414 		return;
415 	}
416 	else{
417 		if(vno == VectorNMI){
418 			/*
419 			 * Don't re-enable, it confuses the crash dumps.
420 			nmienable();
421 			 */
422 			iprint("cpu%d: NMI PC %#8.8lux\n", m->machno, ureg->pc);
423 			while(m->machno != 0)
424 				;
425 		}
426 		dumpregs(ureg);
427 		if(!user){
428 			ureg->sp = (ulong)&ureg->sp;
429 			_dumpstack(ureg);
430 		}
431 		if(vno < nelem(excname))
432 			panic("%s", excname[vno]);
433 		panic("unknown trap/intr: %d", vno);
434 	}
435 	splhi();
436 
437 	/* delaysched set because we held a lock or because our quantum ended */
438 	if(up && up->delaysched && clockintr){
439 		sched();
440 		splhi();
441 	}
442 
443 	if(user){
444 		if(up->procctl || up->nnote)
445 			notify(ureg);
446 		kexit(ureg);
447 	}
448 }
449 
450 /*
451  *  dump registers
452  */
453 void
dumpregs2(Ureg * ureg)454 dumpregs2(Ureg* ureg)
455 {
456 	if(up)
457 		iprint("cpu%d: registers for %s %lud\n",
458 			m->machno, up->text, up->pid);
459 	else
460 		iprint("cpu%d: registers for kernel\n", m->machno);
461 	iprint("FLAGS=%luX TRAP=%luX ECODE=%luX PC=%luX",
462 		ureg->flags, ureg->trap, ureg->ecode, ureg->pc);
463 	iprint(" SS=%4.4luX USP=%luX\n", ureg->ss & 0xFFFF, ureg->usp);
464 	iprint("  AX %8.8luX  BX %8.8luX  CX %8.8luX  DX %8.8luX\n",
465 		ureg->ax, ureg->bx, ureg->cx, ureg->dx);
466 	iprint("  SI %8.8luX  DI %8.8luX  BP %8.8luX\n",
467 		ureg->si, ureg->di, ureg->bp);
468 	iprint("  CS %4.4luX  DS %4.4luX  ES %4.4luX  FS %4.4luX  GS %4.4luX\n",
469 		ureg->cs & 0xFFFF, ureg->ds & 0xFFFF, ureg->es & 0xFFFF,
470 		ureg->fs & 0xFFFF, ureg->gs & 0xFFFF);
471 }
472 
473 void
dumpregs(Ureg * ureg)474 dumpregs(Ureg* ureg)
475 {
476 	vlong mca, mct;
477 
478 	dumpregs2(ureg);
479 
480 	/*
481 	 * Processor control registers.
482 	 * If machine check exception, time stamp counter, page size extensions
483 	 * or enhanced virtual 8086 mode extensions are supported, there is a
484 	 * CR4. If there is a CR4 and machine check extensions, read the machine
485 	 * check address and machine check type registers if RDMSR supported.
486 	 */
487 	iprint("  CR0 %8.8lux CR2 %8.8lux CR3 %8.8lux",
488 		getcr0(), getcr2(), getcr3());
489 	if(m->cpuiddx & (Mce|Tsc|Pse|Vmex)){
490 		iprint(" CR4 %8.8lux", getcr4());
491 		if((m->cpuiddx & (Mce|Cpumsr)) == (Mce|Cpumsr)){
492 			rdmsr(0x00, &mca);
493 			rdmsr(0x01, &mct);
494 			iprint("\n  MCA %8.8llux MCT %8.8llux", mca, mct);
495 		}
496 	}
497 	iprint("\n  ur %#p up %#p\n", ureg, up);
498 }
499 
500 
501 /*
502  * Fill in enough of Ureg to get a stack trace, and call a function.
503  * Used by debugging interface rdb.
504  */
505 void
callwithureg(void (* fn)(Ureg *))506 callwithureg(void (*fn)(Ureg*))
507 {
508 	Ureg ureg;
509 	ureg.pc = getcallerpc(&fn);
510 	ureg.sp = (ulong)&fn;
511 	fn(&ureg);
512 }
513 
514 static void
_dumpstack(Ureg * ureg)515 _dumpstack(Ureg *ureg)
516 {
517 	uintptr l, v, i, estack;
518 	extern ulong etext;
519 	int x;
520 	char *s;
521 
522 	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
523 		iprint("dumpstack disabled\n");
524 		return;
525 	}
526 	iprint("dumpstack\n");
527 
528 	x = 0;
529 	x += iprint("ktrace /kernel/path %.8lux %.8lux <<EOF\n", ureg->pc, ureg->sp);
530 	i = 0;
531 	if(up
532 	&& (uintptr)&l >= (uintptr)up->kstack
533 	&& (uintptr)&l <= (uintptr)up->kstack+KSTACK)
534 		estack = (uintptr)up->kstack+KSTACK;
535 	else if((uintptr)&l >= (uintptr)m->stack
536 	&& (uintptr)&l <= (uintptr)m+MACHSIZE)
537 		estack = (uintptr)m+MACHSIZE;
538 	else
539 		return;
540 	x += iprint("estackx %p\n", estack);
541 
542 	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
543 		v = *(uintptr*)l;
544 		if((KTZERO < v && v < (uintptr)&etext) || estack-l < 32){
545 			/*
546 			 * Could Pick off general CALL (((uchar*)v)[-5] == 0xE8)
547 			 * and CALL indirect through AX
548 			 * (((uchar*)v)[-2] == 0xFF && ((uchar*)v)[-2] == 0xD0),
549 			 * but this is too clever and misses faulting address.
550 			 */
551 			x += iprint("%.8p=%.8p ", l, v);
552 			i++;
553 		}
554 		if(i == 4){
555 			i = 0;
556 			x += iprint("\n");
557 		}
558 	}
559 	if(i)
560 		iprint("\n");
561 	iprint("EOF\n");
562 
563 	if(ureg->trap != VectorNMI)
564 		return;
565 
566 	i = 0;
567 	for(l = (uintptr)&l; l < estack; l += sizeof(uintptr)){
568 		iprint("%.8p ", *(uintptr*)l);
569 		if(++i == 8){
570 			i = 0;
571 			iprint("\n");
572 		}
573 	}
574 	if(i)
575 		iprint("\n");
576 }
577 
578 void
dumpstack(void)579 dumpstack(void)
580 {
581 	callwithureg(_dumpstack);
582 }
583 
584 static void
debugbpt(Ureg * ureg,void *)585 debugbpt(Ureg* ureg, void*)
586 {
587 	char buf[ERRMAX];
588 
589 	if(up == 0)
590 		panic("kernel bpt");
591 	/* restore pc to instruction that caused the trap */
592 	ureg->pc--;
593 	snprint(buf, sizeof buf, "sys: breakpoint");
594 	postnote(up, 1, buf, NDebug);
595 }
596 
597 static void
doublefault(Ureg *,void *)598 doublefault(Ureg*, void*)
599 {
600 	panic("double fault");
601 }
602 
603 static void
unexpected(Ureg * ureg,void *)604 unexpected(Ureg* ureg, void*)
605 {
606 	print("unexpected trap %lud; ignoring\n", ureg->trap);
607 }
608 
609 extern void checkpages(void);
610 extern void checkfault(ulong, ulong);
611 static void
fault386(Ureg * ureg,void *)612 fault386(Ureg* ureg, void*)
613 {
614 	ulong addr;
615 	int read, user, n, insyscall;
616 	char buf[ERRMAX];
617 
618 	addr = getcr2();
619 	read = !(ureg->ecode & 2);
620 
621 	user = (ureg->cs & 0xFFFF) == UESEL;
622 	if(!user){
623 		if(vmapsync(addr))
624 			return;
625 		if(addr >= USTKTOP)
626 			panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
627 		if(up == nil)
628 			panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
629 	}
630 	if(up == nil)
631 		panic("user fault: up=0 pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
632 
633 	insyscall = up->insyscall;
634 	up->insyscall = 1;
635 	n = fault(addr, read);
636 	if(n < 0){
637 		if(!user){
638 			dumpregs(ureg);
639 			panic("fault: 0x%lux", addr);
640 		}
641 		checkpages();
642 		checkfault(addr, ureg->pc);
643 		snprint(buf, sizeof buf, "sys: trap: fault %s addr=0x%lux",
644 			read ? "read" : "write", addr);
645 		postnote(up, 1, buf, NDebug);
646 	}
647 	up->insyscall = insyscall;
648 }
649 
650 /*
651  *  system calls
652  */
653 #include "../port/systab.h"
654 
655 /*
656  *  Syscall is called directly from assembler without going through trap().
657  */
658 void
syscall(Ureg * ureg)659 syscall(Ureg* ureg)
660 {
661 	char *e;
662 	ulong	sp;
663 	long	ret;
664 	int	i, s;
665 	ulong scallnr;
666 	vlong startns, stopns;
667 
668 	if((ureg->cs & 0xFFFF) != UESEL)
669 		panic("syscall: cs 0x%4.4luX", ureg->cs);
670 
671 	cycles(&up->kentry);
672 
673 	m->syscall++;
674 	up->insyscall = 1;
675 	up->pc = ureg->pc;
676 	up->dbgreg = ureg;
677 
678 	sp = ureg->usp;
679 	scallnr = ureg->ax;
680 	up->scallnr = scallnr;
681 
682 	if(up->procctl == Proc_tracesyscall){
683 		/*
684 		 * Redundant validaddr.  Do we care?
685 		 * Tracing syscalls is not exactly a fast path...
686 		 * Beware, validaddr currently does a pexit rather
687 		 * than an error if there's a problem; that might
688 		 * change in the future.
689 		 */
690 		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
691 			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
692 
693 		syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
694 		up->procctl = Proc_stopme;
695 		procctl(up);
696 		if(up->syscalltrace)
697 			free(up->syscalltrace);
698 		up->syscalltrace = nil;
699 		startns = todget(nil);
700 	}
701 
702 	if(scallnr == RFORK && up->fpstate == FPactive){
703 		fpsave(&up->fpsave);
704 		up->fpstate = FPinactive;
705 	}
706 	spllo();
707 
708 	up->nerrlab = 0;
709 	ret = -1;
710 	if(!waserror()){
711 		if(scallnr >= nsyscall || systab[scallnr] == 0){
712 			pprint("bad sys call number %lud pc %lux\n",
713 				scallnr, ureg->pc);
714 			postnote(up, 1, "sys: bad sys call", NDebug);
715 			error(Ebadarg);
716 		}
717 
718 		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
719 			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
720 
721 		up->s = *((Sargs*)(sp+BY2WD));
722 		up->psstate = sysctab[scallnr];
723 
724 		ret = systab[scallnr](up->s.args);
725 		poperror();
726 	}else{
727 		/* failure: save the error buffer for errstr */
728 		e = up->syserrstr;
729 		up->syserrstr = up->errstr;
730 		up->errstr = e;
731 		if(0 && up->pid == 1)
732 			print("syscall %lud error %s\n", scallnr, up->syserrstr);
733 	}
734 	if(up->nerrlab){
735 		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
736 		for(i = 0; i < NERR; i++)
737 			print("sp=%lux pc=%lux\n",
738 				up->errlab[i].sp, up->errlab[i].pc);
739 		panic("error stack");
740 	}
741 
742 	/*
743 	 *  Put return value in frame.  On the x86 the syscall is
744 	 *  just another trap and the return value from syscall is
745 	 *  ignored.  On other machines the return value is put into
746 	 *  the results register by caller of syscall.
747 	 */
748 	ureg->ax = ret;
749 
750 	if(up->procctl == Proc_tracesyscall){
751 		stopns = todget(nil);
752 		up->procctl = Proc_stopme;
753 		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
754 		s = splhi();
755 		procctl(up);
756 		splx(s);
757 		if(up->syscalltrace)
758 			free(up->syscalltrace);
759 		up->syscalltrace = nil;
760 	}
761 
762 	up->insyscall = 0;
763 	up->psstate = 0;
764 
765 	if(scallnr == NOTED)
766 		noted(ureg, *(ulong*)(sp+BY2WD));
767 
768 	if(scallnr!=RFORK && (up->procctl || up->nnote)){
769 		splhi();
770 		notify(ureg);
771 	}
772 	/* if we delayed sched because we held a lock, sched now */
773 	if(up->delaysched)
774 		sched();
775 	kexit(ureg);
776 }
777 
778 /*
779  *  Call user, if necessary, with note.
780  *  Pass user the Ureg struct and the note on his stack.
781  */
782 int
notify(Ureg * ureg)783 notify(Ureg* ureg)
784 {
785 	int l;
786 	ulong s, sp;
787 	Note *n;
788 
789 	if(up->procctl)
790 		procctl(up);
791 	if(up->nnote == 0)
792 		return 0;
793 
794 	if(up->fpstate == FPactive){
795 		fpsave(&up->fpsave);
796 		up->fpstate = FPinactive;
797 	}
798 	up->fpstate |= FPillegal;
799 
800 	s = spllo();
801 	qlock(&up->debug);
802 	up->notepending = 0;
803 	n = &up->note[0];
804 	if(strncmp(n->msg, "sys:", 4) == 0){
805 		l = strlen(n->msg);
806 		if(l > ERRMAX-15)	/* " pc=0x12345678\0" */
807 			l = ERRMAX-15;
808 		seprint(n->msg+l, &n->msg[sizeof n->msg], " pc=0x%.8lux",
809 			ureg->pc);
810 	}
811 
812 	if(n->flag!=NUser && (up->notified || up->notify==0)){
813 		if(n->flag == NDebug)
814 			pprint("suicide: %s\n", n->msg);
815 		qunlock(&up->debug);
816 		pexit(n->msg, n->flag!=NDebug);
817 	}
818 
819 	if(up->notified){
820 		qunlock(&up->debug);
821 		splhi();
822 		return 0;
823 	}
824 
825 	if(!up->notify){
826 		qunlock(&up->debug);
827 		pexit(n->msg, n->flag!=NDebug);
828 	}
829 	sp = ureg->usp;
830 	sp -= 256;	/* debugging: preserve context causing problem */
831 	sp -= sizeof(Ureg);
832 if(0) print("%s %lud: notify %.8lux %.8lux %.8lux %s\n",
833 	up->text, up->pid, ureg->pc, ureg->usp, sp, n->msg);
834 
835 	if(!okaddr((ulong)up->notify, 1, 0)
836 	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)){
837 		qunlock(&up->debug);
838 		pprint("suicide: bad address in notify\n");
839 		pexit("Suicide", 0);
840 	}
841 
842 	memmove((Ureg*)sp, ureg, sizeof(Ureg));
843 	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
844 	up->ureg = (void*)sp;
845 	sp -= BY2WD+ERRMAX;
846 	memmove((char*)sp, up->note[0].msg, ERRMAX);
847 	sp -= 3*BY2WD;
848 	*(ulong*)(sp+2*BY2WD) = sp+3*BY2WD;		/* arg 2 is string */
849 	*(ulong*)(sp+1*BY2WD) = (ulong)up->ureg;	/* arg 1 is ureg* */
850 	*(ulong*)(sp+0*BY2WD) = 0;			/* arg 0 is pc */
851 	ureg->usp = sp;
852 	ureg->pc = (ulong)up->notify;
853 	up->notified = 1;
854 	up->nnote--;
855 	memmove(&up->lastnote, &up->note[0], sizeof(Note));
856 	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
857 
858 	qunlock(&up->debug);
859 	splx(s);
860 	return 1;
861 }
862 
863 /*
864  *   Return user to state before notify()
865  */
866 void
noted(Ureg * ureg,ulong arg0)867 noted(Ureg* ureg, ulong arg0)
868 {
869 	Ureg *nureg;
870 	ulong oureg, sp;
871 
872 	qlock(&up->debug);
873 	if(arg0!=NRSTR && !up->notified) {
874 		qunlock(&up->debug);
875 		pprint("call to noted() when not notified\n");
876 		pexit("Suicide", 0);
877 	}
878 	up->notified = 0;
879 
880 	nureg = up->ureg;	/* pointer to user returned Ureg struct */
881 
882 	up->fpstate &= ~FPillegal;
883 
884 	/* sanity clause */
885 	oureg = (ulong)nureg;
886 	if(!okaddr((ulong)oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
887 		qunlock(&up->debug);
888 		pprint("bad ureg in noted or call to noted when not notified\n");
889 		pexit("Suicide", 0);
890 	}
891 
892 	/*
893 	 * Check the segment selectors are all valid, otherwise
894 	 * a fault will be taken on attempting to return to the
895 	 * user process.
896 	 * Take care with the comparisons as different processor
897 	 * generations push segment descriptors in different ways.
898 	 */
899 	if((nureg->cs & 0xFFFF) != UESEL || (nureg->ss & 0xFFFF) != UDSEL
900 	  || (nureg->ds & 0xFFFF) != UDSEL || (nureg->es & 0xFFFF) != UDSEL
901 	  || (nureg->fs & 0xFFFF) != UDSEL || (nureg->gs & 0xFFFF) != UDSEL){
902 		qunlock(&up->debug);
903 		pprint("bad segment selector in noted\n");
904 		pexit("Suicide", 0);
905 	}
906 
907 	/* don't let user change system flags */
908 	nureg->flags = (ureg->flags & ~0xCD5) | (nureg->flags & 0xCD5);
909 
910 	memmove(ureg, nureg, sizeof(Ureg));
911 
912 	switch(arg0){
913 	case NCONT:
914 	case NRSTR:
915 if(0) print("%s %lud: noted %.8lux %.8lux\n",
916 	up->text, up->pid, nureg->pc, nureg->usp);
917 		if(!okaddr(nureg->pc, 1, 0) || !okaddr(nureg->usp, BY2WD, 0)){
918 			qunlock(&up->debug);
919 			pprint("suicide: trap in noted\n");
920 			pexit("Suicide", 0);
921 		}
922 		up->ureg = (Ureg*)(*(ulong*)(oureg-BY2WD));
923 		qunlock(&up->debug);
924 		break;
925 
926 	case NSAVE:
927 		if(!okaddr(nureg->pc, BY2WD, 0)
928 		|| !okaddr(nureg->usp, BY2WD, 0)){
929 			qunlock(&up->debug);
930 			pprint("suicide: trap in noted\n");
931 			pexit("Suicide", 0);
932 		}
933 		qunlock(&up->debug);
934 		sp = oureg-4*BY2WD-ERRMAX;
935 		splhi();
936 		ureg->sp = sp;
937 		((ulong*)sp)[1] = oureg;	/* arg 1 0(FP) is ureg* */
938 		((ulong*)sp)[0] = 0;		/* arg 0 is pc */
939 		break;
940 
941 	default:
942 		pprint("unknown noted arg 0x%lux\n", arg0);
943 		up->lastnote.flag = NDebug;
944 		/* fall through */
945 
946 	case NDFLT:
947 		if(up->lastnote.flag == NDebug){
948 			qunlock(&up->debug);
949 			pprint("suicide: %s\n", up->lastnote.msg);
950 		} else
951 			qunlock(&up->debug);
952 		pexit(up->lastnote.msg, up->lastnote.flag!=NDebug);
953 	}
954 }
955 
956 void
validalign(uintptr addr,unsigned align)957 validalign(uintptr addr, unsigned align)
958 {
959 	/*
960 	 * Plan 9 is a 32-bit O/S, and the hardware it runs on
961 	 * does not usually have instructions which move 64-bit
962 	 * quantities directly, synthesizing the operations
963 	 * with 32-bit move instructions. Therefore, the compiler
964 	 * (and hardware) usually only enforce 32-bit alignment,
965 	 * if at all.
966 	 *
967 	 * Take this out if the architecture warrants it.
968 	 */
969 	if(align == sizeof(vlong))
970 		align = sizeof(long);
971 
972 	/*
973 	 * Check align is a power of 2, then addr alignment.
974 	 */
975 	if((align != 0 && !(align & (align-1))) && !(addr & (align-1)))
976 		return;
977 	postnote(up, 1, "sys: odd address", NDebug);
978 	error(Ebadarg);
979 	/*NOTREACHED*/
980 }
981 
982 long
execregs(ulong entry,ulong ssize,ulong nargs)983 execregs(ulong entry, ulong ssize, ulong nargs)
984 {
985 	ulong *sp;
986 	Ureg *ureg;
987 
988 	up->fpstate = FPinit;
989 	fpoff();
990 
991 	sp = (ulong*)(USTKTOP - ssize);
992 	*--sp = nargs;
993 
994 	ureg = up->dbgreg;
995 	ureg->usp = (ulong)sp;
996 	ureg->pc = entry;
997 	return USTKTOP-sizeof(Tos);		/* address of kernel/user shared data */
998 }
999 
1000 /*
1001  *  return the userpc the last exception happened at
1002  */
1003 ulong
userpc(void)1004 userpc(void)
1005 {
1006 	Ureg *ureg;
1007 
1008 	ureg = (Ureg*)up->dbgreg;
1009 	return ureg->pc;
1010 }
1011 
1012 /* This routine must save the values of registers the user is not permitted
1013  * to write from devproc and then restore the saved values before returning.
1014  */
1015 void
setregisters(Ureg * ureg,char * pureg,char * uva,int n)1016 setregisters(Ureg* ureg, char* pureg, char* uva, int n)
1017 {
1018 	ulong cs, ds, es, flags, fs, gs, ss;
1019 
1020 	ss = ureg->ss;
1021 	flags = ureg->flags;
1022 	cs = ureg->cs;
1023 	ds = ureg->ds;
1024 	es = ureg->es;
1025 	fs = ureg->fs;
1026 	gs = ureg->gs;
1027 	memmove(pureg, uva, n);
1028 	ureg->gs = gs;
1029 	ureg->fs = fs;
1030 	ureg->es = es;
1031 	ureg->ds = ds;
1032 	ureg->cs = cs;
1033 	ureg->flags = (ureg->flags & 0x00FF) | (flags & 0xFF00);
1034 	ureg->ss = ss;
1035 }
1036 
1037 static void
linkproc(void)1038 linkproc(void)
1039 {
1040 	spllo();
1041 	up->kpfun(up->kparg);
1042 	pexit("kproc dying", 0);
1043 }
1044 
1045 void
kprocchild(Proc * p,void (* func)(void *),void * arg)1046 kprocchild(Proc* p, void (*func)(void*), void* arg)
1047 {
1048 	/*
1049 	 * gotolabel() needs a word on the stack in
1050 	 * which to place the return PC used to jump
1051 	 * to linkproc().
1052 	 */
1053 	p->sched.pc = (ulong)linkproc;
1054 	p->sched.sp = (ulong)p->kstack+KSTACK-BY2WD;
1055 
1056 	p->kpfun = func;
1057 	p->kparg = arg;
1058 }
1059 
1060 void
forkchild(Proc * p,Ureg * ureg)1061 forkchild(Proc *p, Ureg *ureg)
1062 {
1063 	Ureg *cureg;
1064 
1065 	/*
1066 	 * Add 2*BY2WD to the stack to account for
1067 	 *  - the return PC
1068 	 *  - trap's argument (ur)
1069 	 */
1070 	p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Ureg)+2*BY2WD);
1071 	p->sched.pc = (ulong)forkret;
1072 
1073 	cureg = (Ureg*)(p->sched.sp+2*BY2WD);
1074 	memmove(cureg, ureg, sizeof(Ureg));
1075 	/* return value of syscall in child */
1076 	cureg->ax = 0;
1077 
1078 	/* Things from bottom of syscall which were never executed */
1079 	p->psstate = 0;
1080 	p->insyscall = 0;
1081 }
1082 
1083 /* Give enough context in the ureg to produce a kernel stack for
1084  * a sleeping process
1085  */
1086 void
setkernur(Ureg * ureg,Proc * p)1087 setkernur(Ureg* ureg, Proc* p)
1088 {
1089 	ureg->pc = p->sched.pc;
1090 	ureg->sp = p->sched.sp+4;
1091 }
1092 
1093 ulong
dbgpc(Proc * p)1094 dbgpc(Proc *p)
1095 {
1096 	Ureg *ureg;
1097 
1098 	ureg = p->dbgreg;
1099 	if(ureg == 0)
1100 		return 0;
1101 
1102 	return ureg->pc;
1103 }
1104