xref: /plan9/sys/src/9/pc/mp.c (revision 4e3613ab15c331a9ada113286cc0f2a35bc0373d)
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "ureg.h"
8 
9 #include "mp.h"
10 #include "apbootstrap.h"
11 
12 #define dprint(...)	if(mpdebug) print(__VA_ARGS__); else USED(mpdebug)
13 
14 /* from mpacpi.c */
15 Apic *bootapic;
16 
17 int mpdebug;
18 void (*mpacpifunc)(void);
19 
20 static PCMP* mppcmp;
21 static Bus* mpbus;
22 static Bus* mpbuslast;
23 static int mpisabus = -1;
24 static int mpeisabus = -1;
25 extern int i8259elcr;			/* mask of level-triggered interrupts */
26 /* static */ Apic mpapic[MaxAPICNO+1];
27 /* static */ int machno2apicno[MaxAPICNO+1];	/* inverse map: machno -> APIC ID */
28 /* static */ Apic ioapic[MaxAPICNO+1];
29 static Ref mpvnoref;			/* unique vector assignment */
30 static int mpmachno = 1;
31 static Lock mpphysidlock;
32 static int mpphysid;
33 
34 static char* buses[] = {
35 	"CBUSI ",
36 	"CBUSII",
37 	"EISA  ",
38 	"FUTURE",
39 	"INTERN",
40 	"ISA   ",
41 	"MBI   ",
42 	"MBII  ",
43 	"MCA   ",
44 	"MPI   ",
45 	"MPSA  ",
46 	"NUBUS ",
47 	"PCI   ",
48 	"PCMCIA",
49 	"TC    ",
50 	"VL    ",
51 	"VME   ",
52 	"XPRESS",
53 	0,
54 };
55 
56 static Apic*
mkprocessor(PCMPprocessor * p)57 mkprocessor(PCMPprocessor* p)
58 {
59 	int apicno;
60 	Apic *apic;
61 
62 	apicno = p->apicno;
63 	if(!(p->flags & PcmpEN) || apicno > MaxAPICNO)
64 		return 0;
65 
66 	apic = &mpapic[apicno];
67 	apic->type = PcmpPROCESSOR;
68 	apic->apicno = apicno;
69 	apic->flags = p->flags;
70 	apic->lintr[0] = ApicIMASK;
71 	apic->lintr[1] = ApicIMASK;
72 
73 	if(p->flags & PcmpBP){
74 		machno2apicno[0] = apicno;
75 		apic->machno = 0;
76 	}
77 	else{
78 		machno2apicno[mpmachno] = apicno;
79 		apic->machno = mpmachno;
80 		mpmachno++;
81 	}
82 
83 	return apic;
84 }
85 
86 static Bus*
mkbus(PCMPbus * p)87 mkbus(PCMPbus* p)
88 {
89 	Bus *bus;
90 	int i;
91 
92 	for(i = 0; buses[i]; i++){
93 		if(strncmp(buses[i], p->string, sizeof(p->string)) == 0)
94 			break;
95 	}
96 	if(buses[i] == 0)
97 		return 0;
98 
99 	bus = xalloc(sizeof(Bus));
100 	if(mpbus)
101 		mpbuslast->next = bus;
102 	else
103 		mpbus = bus;
104 	mpbuslast = bus;
105 
106 	bus->type = i;
107 	bus->busno = p->busno;
108 	if(bus->type == BusEISA){
109 		bus->po = PcmpLOW;
110 		bus->el = PcmpLEVEL;
111 		if(mpeisabus != -1)
112 			print("mkbus: more than one EISA bus\n");
113 		mpeisabus = bus->busno;
114 	}
115 	else if(bus->type == BusPCI){
116 		bus->po = PcmpLOW;
117 		bus->el = PcmpLEVEL;
118 	}
119 	else if(bus->type == BusISA){
120 		bus->po = PcmpHIGH;
121 		bus->el = PcmpEDGE;
122 		if(mpisabus != -1)
123 			print("mkbus: more than one ISA bus\n");
124 		mpisabus = bus->busno;
125 	}
126 	else{
127 		bus->po = PcmpHIGH;
128 		bus->el = PcmpEDGE;
129 	}
130 
131 	return bus;
132 }
133 
134 static Bus*
mpgetbus(int busno)135 mpgetbus(int busno)
136 {
137 	Bus *bus;
138 
139 	for(bus = mpbus; bus; bus = bus->next){
140 		if(bus->busno == busno)
141 			return bus;
142 	}
143 	print("mpgetbus: can't find bus %d\n", busno);
144 
145 	return 0;
146 }
147 
148 static Apic*
mkioapic(PCMPioapic * p)149 mkioapic(PCMPioapic* p)
150 {
151 	void *va;
152 	int apicno;
153 	Apic *apic;
154 
155 	apicno = p->apicno;
156 	if(!(p->flags & PcmpEN) || apicno > MaxAPICNO)
157 		return 0;
158 
159 	/*
160 	 * Map the I/O APIC.
161 	 */
162 	if((va = vmap(p->addr, 1024)) == nil)
163 		return 0;
164 
165 	apic = &ioapic[apicno];
166 	apic->type = PcmpIOAPIC;
167 	apic->apicno = apicno;
168 	apic->addr = va;
169 	apic->paddr = p->addr;
170 	apic->flags = p->flags;
171 
172 	return apic;
173 }
174 
175 static Aintr*
mkiointr(PCMPintr * p)176 mkiointr(PCMPintr* p)
177 {
178 	Bus *bus;
179 	Aintr *aintr;
180 	PCMPintr* pcmpintr;
181 
182 	/*
183 	 * According to the MultiProcessor Specification, a destination
184 	 * I/O APIC of 0xFF means the signal is routed to all I/O APICs.
185 	 * It's unclear how that can possibly be correct so treat it as
186 	 * an error for now.
187 	 */
188 	if(p->apicno == 0xFF)
189 		return 0;
190 	if((bus = mpgetbus(p->busno)) == 0)
191 		return 0;
192 
193 	aintr = xalloc(sizeof(Aintr));
194 	aintr->intr = p;
195 
196 	if(0)
197 		dprint("mkiointr: type %d intr type %d flags %#o "
198 			"bus %d irq %d apicno %d intin %d\n",
199 			p->type, p->intr, p->flags,
200 			p->busno, p->irq, p->apicno, p->intin);
201 	/*
202 	 * Hack for Intel SR1520ML motherboard, which BIOS describes
203 	 * the i82575 dual ethernet controllers incorrectly.
204 	 */
205 	if(mppcmp && memcmp(mppcmp->product, "INTEL   X38MLST     ", 20) == 0){
206 		if(p->busno == 1 && p->intin == 16 && p->irq == 1){
207 			pcmpintr = malloc(sizeof(PCMPintr));
208 			if(pcmpintr == nil)
209 				panic("mkiointr: no memory");
210 			memmove(pcmpintr, p, sizeof(PCMPintr));
211 			print("mkiointr: %20.20s bus %d intin %d irq %d\n",
212 				(char*)mppcmp->product,
213 				pcmpintr->busno, pcmpintr->intin,
214 				pcmpintr->irq);
215 			pcmpintr->intin = 17;
216 			aintr->intr = pcmpintr;
217 		}
218 	}
219 	if ((unsigned)p->apicno >= nelem(mpapic))
220 		panic("mkiointr: apic %d out of range", p->apicno);
221 	aintr->apic = &ioapic[p->apicno];
222 	aintr->next = bus->aintr;
223 	bus->aintr = aintr;
224 
225 	return aintr;
226 }
227 
228 static int
mpintrinit(Bus * bus,PCMPintr * intr,int vno,int)229 mpintrinit(Bus* bus, PCMPintr* intr, int vno, int /*irq*/)
230 {
231 	int el, po, v;
232 
233 	/*
234 	 * Parse an I/O or Local APIC interrupt table entry and
235 	 * return the encoded vector.
236 	 */
237 	v = vno;
238 
239 	po = intr->flags & PcmpPOMASK;
240 	el = intr->flags & PcmpELMASK;
241 
242 	switch(intr->intr){
243 
244 	default:				/* PcmpINT */
245 		v |= ApicFIXED;			/* no-op */
246 		break;
247 
248 	case PcmpNMI:
249 		v |= ApicNMI;
250 		po = PcmpHIGH;
251 		el = PcmpEDGE;
252 		break;
253 
254 	case PcmpSMI:
255 		v |= ApicSMI;
256 		break;
257 
258 	case PcmpExtINT:
259 		v |= ApicExtINT;
260 		/*
261 		 * The AMI Goliath doesn't boot successfully with it's LINTR0
262 		 * entry which decodes to low+level. The PPro manual says ExtINT
263 		 * should be level, whereas the Pentium is edge. Setting the
264 		 * Goliath to edge+high seems to cure the problem. Other PPro
265 		 * MP tables (e.g. ASUS P/I-P65UP5 have a entry which decodes
266 		 * to edge+high, so who knows.
267 		 * Perhaps it would be best just to not set an ExtINT entry at
268 		 * all, it shouldn't be needed for SMP mode.
269 		 */
270 		po = PcmpHIGH;
271 		el = PcmpEDGE;
272 		break;
273 	}
274 
275 	/*
276 	 */
277 	if(bus->type == BusEISA && !po && !el /*&& !(i8259elcr & (1<<irq))*/){
278 		po = PcmpHIGH;
279 		el = PcmpEDGE;
280 	}
281 	if(!po)
282 		po = bus->po;
283 	if(po == PcmpLOW)
284 		v |= ApicLOW;
285 	else if(po != PcmpHIGH){
286 		print("mpintrinit: bad polarity 0x%uX\n", po);
287 		return ApicIMASK;
288 	}
289 
290 	if(!el)
291 		el = bus->el;
292 	if(el == PcmpLEVEL)
293 		v |= ApicLEVEL;
294 	else if(el != PcmpEDGE){
295 		print("mpintrinit: bad trigger 0x%uX\n", el);
296 		return ApicIMASK;
297 	}
298 
299 	return v;
300 }
301 
302 static int
mklintr(PCMPintr * p)303 mklintr(PCMPintr* p)
304 {
305 	Apic *apic;
306 	Bus *bus;
307 	int intin, v;
308 
309 	/*
310 	 * The offsets of vectors for LINT[01] are known to be
311 	 * 0 and 1 from the local APIC vector space at VectorLAPIC.
312 	 */
313 	if((bus = mpgetbus(p->busno)) == 0)
314 		return 0;
315 	intin = p->intin;
316 
317 	/*
318 	 * Pentium Pros have problems if LINT[01] are set to ExtINT
319 	 * so just bag it, SMP mode shouldn't need ExtINT anyway.
320 	 */
321 	if(p->intr == PcmpExtINT || p->intr == PcmpNMI)
322 		v = ApicIMASK;
323 	else
324 		v = mpintrinit(bus, p, VectorLAPIC+intin, p->irq);
325 
326 	if(p->apicno == 0xFF){
327 		for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
328 			if((apic->flags & PcmpEN)
329 			&& apic->type == PcmpPROCESSOR)
330 				apic->lintr[intin] = v;
331 		}
332 	}
333 	else{
334 		if ((unsigned)p->apicno >= nelem(mpapic))
335 			panic("mklintr: ioapic %d out of range", p->apicno);
336 		apic = &mpapic[p->apicno];
337 		if((apic->flags & PcmpEN) && apic->type == PcmpPROCESSOR)
338 			apic->lintr[intin] = v;
339 	}
340 
341 	return v;
342 }
343 
344 static void
checkmtrr(void)345 checkmtrr(void)
346 {
347 	int i, vcnt;
348 	Mach *mach0;
349 
350 	/*
351 	 * If there are MTRR registers, snarf them for validation.
352 	 */
353 	if(!(m->cpuiddx & Mtrr))
354 		return;
355 
356 	rdmsr(0x0FE, &m->mtrrcap);
357 	rdmsr(0x2FF, &m->mtrrdef);
358 	if(m->mtrrcap & 0x0100){
359 		rdmsr(0x250, &m->mtrrfix[0]);
360 		rdmsr(0x258, &m->mtrrfix[1]);
361 		rdmsr(0x259, &m->mtrrfix[2]);
362 		for(i = 0; i < 8; i++)
363 			rdmsr(0x268+i, &m->mtrrfix[(i+3)]);
364 	}
365 	vcnt = m->mtrrcap & 0x00FF;
366 	if(vcnt > nelem(m->mtrrvar))
367 		vcnt = nelem(m->mtrrvar);
368 	for(i = 0; i < vcnt; i++)
369 		rdmsr(0x200+i, &m->mtrrvar[i]);
370 
371 	/*
372 	 * If not the bootstrap processor, compare.
373 	 */
374 	if(m->machno == 0)
375 		return;
376 
377 	mach0 = MACHP(0);
378 	if(mach0->mtrrcap != m->mtrrcap)
379 		print("mtrrcap%d: %lluX %lluX\n",
380 			m->machno, mach0->mtrrcap, m->mtrrcap);
381 	if(mach0->mtrrdef != m->mtrrdef)
382 		print("mtrrdef%d: %lluX %lluX\n",
383 			m->machno, mach0->mtrrdef, m->mtrrdef);
384 	for(i = 0; i < 11; i++){
385 		if(mach0->mtrrfix[i] != m->mtrrfix[i])
386 			print("mtrrfix%d: i%d: %lluX %lluX\n",
387 				m->machno, i, mach0->mtrrfix[i], m->mtrrfix[i]);
388 	}
389 	for(i = 0; i < vcnt; i++){
390 		if(mach0->mtrrvar[i] != m->mtrrvar[i])
391 			print("mtrrvar%d: i%d: %lluX %lluX\n",
392 				m->machno, i, mach0->mtrrvar[i], m->mtrrvar[i]);
393 	}
394 }
395 
396 static void
squidboy(Apic * apic)397 squidboy(Apic* apic)
398 {
399 //	iprint("Hello Squidboy\n");
400 
401 	machinit();
402 	fpsavealloc();
403 	mmuinit();
404 
405 	cpuidentify();
406 	cpuidprint();
407 	checkmtrr();
408 
409 	apic->online = 1;
410 	coherence();
411 
412 	lapicinit(apic);
413 	lapiconline();
414 	syncclock();
415 	timersinit();
416 
417 	fpoff();
418 
419 	lock(&active);
420 	active.machs |= 1<<m->machno;
421 	unlock(&active);
422 
423 	while(!active.thunderbirdsarego)
424 		microdelay(100);
425 
426 	schedinit();
427 }
428 
429 static void
mpstartap(Apic * apic)430 mpstartap(Apic* apic)
431 {
432 	ulong *apbootp, *pdb, *pte;
433 	Mach *mach, *mach0;
434 	int i, machno;
435 	uchar *p;
436 
437 	mach0 = MACHP(0);
438 
439 	/*
440 	 * Initialise the AP page-tables and Mach structure. The page-tables
441 	 * are the same as for the bootstrap processor with the exception of
442 	 * the PTE for the Mach structure.
443 	 * Xspanalloc will panic if an allocation can't be made.
444 	 */
445 	p = xspanalloc(4*BY2PG, BY2PG, 0);
446 	pdb = (ulong*)p;
447 	memmove(pdb, mach0->pdb, BY2PG);
448 	p += BY2PG;
449 
450 	if((pte = mmuwalk(pdb, MACHADDR, 1, 0)) == nil)
451 		return;
452 	memmove(p, KADDR(PPN(*pte)), BY2PG);
453 	*pte = PADDR(p)|PTEWRITE|PTEVALID;
454 	if(mach0->havepge)
455 		*pte |= PTEGLOBAL;
456 	p += BY2PG;
457 
458 	mach = (Mach*)p;
459 	if((pte = mmuwalk(pdb, MACHADDR, 2, 0)) == nil)
460 		return;
461 	*pte = PADDR(mach)|PTEWRITE|PTEVALID;
462 	if(mach0->havepge)
463 		*pte |= PTEGLOBAL;
464 	p += BY2PG;
465 
466 	machno = apic->machno;
467 	MACHP(machno) = mach;
468 	mach->machno = machno;
469 	mach->pdb = pdb;
470 	mach->gdt = (Segdesc*)p;	/* filled by mmuinit */
471 
472 	/*
473 	 * Tell the AP where its kernel vector and pdb are.
474 	 * The offsets are known in the AP bootstrap code.
475 	 */
476 	apbootp = (ulong*)(APBOOTSTRAP+0x08);
477 	*apbootp++ = (ulong)squidboy;	/* assembler jumps here eventually */
478 	*apbootp++ = PADDR(pdb);
479 	*apbootp = (ulong)apic;
480 
481 	/*
482 	 * Universal Startup Algorithm.
483 	 */
484 	p = KADDR(0x467);		/* warm-reset vector */
485 	*p++ = PADDR(APBOOTSTRAP);
486 	*p++ = PADDR(APBOOTSTRAP)>>8;
487 	i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16;
488 	/* code assumes i==0 */
489 	if(i != 0)
490 		print("mp: bad APBOOTSTRAP\n");
491 	*p++ = i;
492 	*p = i>>8;
493 
494 	coherence();
495 
496 	nvramwrite(0x0F, 0x0A);	/* shutdown code: warm reset upon init ipi */
497 	lapicstartap(apic, PADDR(APBOOTSTRAP));
498 	for(i = 0; i < 1000; i++){
499 		if(apic->online)
500 			break;
501 		delay(10);
502 	}
503 	nvramwrite(0x0F, 0x00);
504 }
505 
506 static void
trympacpi(void)507 trympacpi(void)
508 {
509 	if (mpacpifunc != nil) {
510 		print("mpinit: scanning acpi madt for extra cpus\n");
511 		(*mpacpifunc)();
512 	}
513 }
514 
515 void
mpinit(void)516 mpinit(void)
517 {
518 	int ncpu, cpuson;
519 	char *cp;
520 	PCMP *pcmp;
521 	uchar *e, *p;
522 	Apic *apic, *bpapic;
523 	void *va;
524 
525 	mpdebug = getconf("*debugmp") != nil;
526 	i8259init();
527 	syncclock();
528 
529 	bpapic = nil;
530 	cpuson = 0;
531 
532 	if(_mp_ == 0) {
533 		/*
534 		 * We can easily get processor info from ACPI, but
535 		 * interrupt routing, etc. would require interpreting AML.
536 		 */
537 		print("mpinit: no mp table found, assuming uniprocessor\n");
538 		archrevert();
539 		return;
540 	}
541 	pcmp = KADDR(_mp_->physaddr);
542 
543 	/*
544 	 * Map the local APIC.
545 	 */
546 	if((va = vmap(pcmp->lapicbase, 1024)) == nil)
547 		return;
548 	mppcmp = pcmp;
549 	print("LAPIC: %#lux %#lux\n", pcmp->lapicbase, (ulong)va);
550 
551 	/*
552 	 * Run through the table saving information needed for starting
553 	 * application processors and initialising any I/O APICs. The table
554 	 * is guaranteed to be in order such that only one pass is necessary.
555 	 */
556 	p = ((uchar*)pcmp)+sizeof(PCMP);
557 	e = ((uchar*)pcmp)+pcmp->length;
558 	while(p < e) switch(*p){
559 
560 	default:
561 		print("mpinit: unknown PCMP type 0x%uX (e-p 0x%luX)\n",
562 			*p, e-p);
563 		while(p < e){
564 			print("%uX ", *p);
565 			p++;
566 		}
567 		break;
568 
569 	case PcmpPROCESSOR:
570 		if(apic = mkprocessor((PCMPprocessor*)p)){
571 			/*
572 			 * Must take a note of bootstrap processor APIC
573 			 * now as it will be needed in order to start the
574 			 * application processors later and there's no
575 			 * guarantee that the bootstrap processor appears
576 			 * first in the table before the others.
577 			 */
578 			apic->addr = va;
579 			apic->paddr = pcmp->lapicbase;
580 			if(apic->flags & PcmpBP)
581 				bpapic = apic;
582 			cpuson++;
583 		}
584 		p += sizeof(PCMPprocessor);
585 		continue;
586 
587 	case PcmpBUS:
588 		mkbus((PCMPbus*)p);
589 		p += sizeof(PCMPbus);
590 		continue;
591 
592 	case PcmpIOAPIC:
593 		if(apic = mkioapic((PCMPioapic*)p))
594 			ioapicinit(apic, ((PCMPioapic*)p)->apicno);
595 		p += sizeof(PCMPioapic);
596 		continue;
597 
598 	case PcmpIOINTR:
599 		mkiointr((PCMPintr*)p);
600 		p += sizeof(PCMPintr);
601 		continue;
602 
603 	case PcmpLINTR:
604 		mklintr((PCMPintr*)p);
605 		p += sizeof(PCMPintr);
606 		continue;
607 	}
608 
609 	dprint("mpinit: mp table describes %d cpus\n", cpuson);
610 
611 	/* For now, always scan ACPI's MADT for processors that MP missed. */
612 	trympacpi();
613 
614 	if (bpapic == nil)
615 		bpapic = bootapic;
616 
617 	/*
618 	 * No bootstrap processor, no need to go further.
619 	 */
620 	if(bpapic == 0)
621 		return;
622 	bpapic->online = 1;
623 
624 	lapicinit(bpapic);
625 
626 	/*
627 	 * These interrupts are local to the processor
628 	 * and do not appear in the I/O APIC so it is OK
629 	 * to set them now.
630 	 */
631 	intrenable(IrqTIMER, lapicclock, 0, BUSUNKNOWN, "clock");
632 	intrenable(IrqERROR, lapicerror, 0, BUSUNKNOWN, "lapicerror");
633 	intrenable(IrqSPURIOUS, lapicspurious, 0, BUSUNKNOWN, "lapicspurious");
634 	lapiconline();
635 
636 	checkmtrr();
637 
638 	/*
639 	 * Initialise the application processors.
640 	 */
641 	if(cp = getconf("*ncpu")){
642 		ncpu = strtol(cp, 0, 0);
643 		if(ncpu < 1)
644 			ncpu = 1;
645 		else if(ncpu > MAXMACH)
646 			ncpu = MAXMACH;
647 	}
648 	else
649 		ncpu = MAXMACH;
650 	memmove((void*)APBOOTSTRAP, apbootstrap, sizeof(apbootstrap));
651 	for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
652 		if(ncpu <= 1)
653 			break;
654 		if((apic->flags & (PcmpBP|PcmpEN)) == PcmpEN
655 		&& apic->type == PcmpPROCESSOR){
656 			mpstartap(apic);
657 			conf.nmach++;
658 			ncpu--;
659 		}
660 	}
661 
662 	/*
663 	 *  we don't really know the number of processors till
664 	 *  here.
665 	 *
666 	 *  set conf.copymode here if nmach > 1.
667 	 *  Should look for an ExtINT line and enable it.
668 	 */
669 	if(X86FAMILY(m->cpuidax) == 3 || conf.nmach > 1)
670 		conf.copymode = 1;
671 }
672 
673 static int
mpintrcpu(void)674 mpintrcpu(void)
675 {
676 	int i;
677 
678 	/*
679 	 * The bulk of this code was written ~1995, when there was
680 	 * one architecture and one generation of hardware, the number
681 	 * of CPUs was up to 4(8) and the choices for interrupt routing
682 	 * were physical, or flat logical (optionally with lowest
683 	 * priority interrupt). Logical mode hasn't scaled well with
684 	 * the increasing number of packages/cores/threads, so the
685 	 * fall-back is to physical mode, which works across all processor
686 	 * generations, both AMD and Intel, using the APIC and xAPIC.
687 	 *
688 	 * Interrupt routing policy can be set here.
689 	 * Currently, just assign each interrupt to a different CPU on
690 	 * a round-robin basis. Some idea of the packages/cores/thread
691 	 * topology would be useful here, e.g. to not assign interrupts
692 	 * to more than one thread in a core, or to use a "noise" core.
693 	 * But, as usual, Intel make that an onerous task.
694 	 */
695 
696 	/*
697 	 * temporary workaround for many-core intel (non-amd) systems:
698 	 * always use cpu 0.  (TODO)
699 	 */
700 	if(strncmp(m->cpuidid, "AuthenticAMD", 12) != 0 && conf.nmach > 8)
701 		return 0;
702 
703 	lock(&mpphysidlock);
704 	for(;;){
705 		i = mpphysid++;
706 		if(mpphysid >= MaxAPICNO+1)
707 			mpphysid = 0;
708 		if(mpapic[i].online)
709 			break;
710 	}
711 	unlock(&mpphysidlock);
712 
713 	return mpapic[i].apicno;
714 }
715 
716 static int
mpintrenablex(Vctl * v,int tbdf)717 mpintrenablex(Vctl* v, int tbdf)
718 {
719 	Bus *bus;
720 	Aintr *aintr;
721 	Apic *apic;
722 	Pcidev *pcidev;
723 	int bno, dno, hi, irq, lo, n, type, vno;
724 	char *typenm;
725 
726 	/*
727 	 * Find the bus.
728 	 */
729 	type = BUSTYPE(tbdf);
730 	bno = BUSBNO(tbdf);
731 	dno = BUSDNO(tbdf);
732 	if(type == BusISA)
733 		bno = mpisabus;
734 	vno = -1;
735 	for(bus = mpbus; bus != nil; bus = bus->next){
736 		if(bus->type != type)
737 			continue;
738 		if(bus->busno == bno)
739 			break;
740 	}
741 	if(bus == nil){
742 		typenm = type < 0 || type >= nelem(buses)? "": buses[type];
743 		print("mpintrenablex: can't find bus type %d (%s) for irq %d "
744 			"%s busno %d\n", type, typenm, v->irq, v->name, bno);
745 		return -1;
746 	}
747 
748 	/*
749 	 * For PCI devices the interrupt pin (INT[ABCD]) and device
750 	 * number are encoded into the entry irq field, so create something
751 	 * to match on. The interrupt pin used by the device has to be
752 	 * obtained from the PCI config space.
753 	 */
754 	if(bus->type == BusPCI){
755 		pcidev = pcimatchtbdf(tbdf);
756 		if(pcidev != nil && (n = pcicfgr8(pcidev, PciINTP)) != 0)
757 			irq = (dno<<2)|(n-1);
758 		else
759 			irq = -1;
760 		//print("pcidev %#uX: irq %#uX v->irq %#uX\n", tbdf, irq, v->irq);
761 	}
762 	else
763 		irq = v->irq;
764 
765 	/*
766 	 * Find a matching interrupt entry from the list of interrupts
767 	 * attached to this bus.
768 	 */
769 	for(aintr = bus->aintr; aintr; aintr = aintr->next){
770 		if(aintr->intr->irq != irq)
771 			continue;
772 		if (0) {
773 			PCMPintr* p = aintr->intr;
774 
775 	   	 	print("mpintrenablex: bus %d intin %d irq %d\n",
776 				p->busno, p->intin, p->irq);
777 		}
778 		/*
779 		 * Check if already enabled. Multifunction devices may share
780 		 * INT[A-D]# so, if already enabled, check the polarity matches
781 		 * and the trigger is level.
782 		 *
783 		 * Should check the devices differ only in the function number,
784 		 * but that can wait for the planned enable/disable rewrite.
785 		 * The RDT read here is safe for now as currently interrupts
786 		 * are never disabled once enabled.
787 		 */
788 		apic = aintr->apic;
789 		ioapicrdtr(apic, aintr->intr->intin, 0, &lo);
790 		if(!(lo & ApicIMASK)){
791 			vno = lo & 0xFF;
792 //print("%s vector %d (!imask)\n", v->name, vno);
793 			n = mpintrinit(bus, aintr->intr, vno, v->irq);
794 			n |= ApicPHYSICAL;		/* no-op */
795 			lo &= ~(ApicRemoteIRR|ApicDELIVS);
796 			if(n != lo || !(n & ApicLEVEL)){
797 				print("mpintrenable: multiple botch irq%d, tbdf %uX, lo %8.8uX, n %8.8uX\n",
798 					v->irq, tbdf, lo, n);
799 				return -1;
800 			}
801 			break;
802 		}
803 
804 		/*
805 		 * With the APIC a unique vector can be assigned to each
806 		 * request to enable an interrupt. There are two reasons this
807 		 * is a good idea:
808 		 * 1) to prevent lost interrupts, no more than 2 interrupts
809 		 *    should be assigned per block of 16 vectors (there is an
810 		 *    in-service entry and a holding entry for each priority
811 		 *    level and there is one priority level per block of 16
812 		 *    interrupts).
813 		 * 2) each input pin on the IOAPIC will receive a different
814 		 *    vector regardless of whether the devices on that pin use
815 		 *    the same IRQ as devices on another pin.
816 		 */
817 		vno = VectorAPIC + (incref(&mpvnoref)-1)*8;
818 //print("%s vector %d (imask)\n", v->name, vno);
819 		if(vno > MaxVectorAPIC){
820 			print("mpintrenable: vno %d, irq %d, tbdf %uX\n",
821 				vno, v->irq, tbdf);
822 			return -1;
823 		}
824 
825 		hi = mpintrcpu()<<24;
826 		lo = mpintrinit(bus, aintr->intr, vno, v->irq);
827 		//print("lo 0x%uX: busno %d intr %d vno %d irq %d elcr 0x%uX\n",
828 		//	lo, bus->busno, aintr->intr->irq, vno,
829 		//	v->irq, i8259elcr);
830 		if(lo & ApicIMASK)
831 			return -1;
832 		lo |= ApicPHYSICAL;			/* no-op */
833 
834 		if((apic->flags & PcmpEN) && apic->type == PcmpIOAPIC)
835  			ioapicrdtw(apic, aintr->intr->intin, hi, lo);
836 		//else
837 		//	print("lo not enabled 0x%uX %d\n",
838 		//		apic->flags, apic->type);
839 		break;
840 	}
841 	if (aintr) {
842 		v->isr = lapicisr;
843 		v->eoi = lapiceoi;
844 	}
845 	return vno;
846 }
847 
848 int
mpintrenable(Vctl * v)849 mpintrenable(Vctl* v)
850 {
851 	int irq, tbdf, vno;
852 
853 	/*
854 	 * If the bus is known, try it.
855 	 * BUSUNKNOWN is given both by [E]ISA devices and by
856 	 * interrupts local to the processor (local APIC, coprocessor
857 	 * breakpoint and page-fault).
858 	 */
859 	tbdf = v->tbdf;
860 	if(tbdf != BUSUNKNOWN && (vno = mpintrenablex(v, tbdf)) != -1)
861 		return vno;
862 
863 	irq = v->irq;
864 	if(irq >= IrqLINT0 && irq <= MaxIrqLAPIC){
865 		if(irq != IrqSPURIOUS)
866 			v->isr = lapiceoi;
867 		return VectorPIC+irq;
868 	}
869 	if(irq < 0 || irq > MaxIrqPIC){
870 		print("mpintrenable: irq %d out of range\n", irq);
871 		return -1;
872 	}
873 
874 	/*
875 	 * Either didn't find it or have to try the default buses
876 	 * (ISA and EISA). This hack is due to either over-zealousness
877 	 * or laziness on the part of some manufacturers.
878 	 *
879 	 * The MP configuration table on some older systems
880 	 * (e.g. ASUS PCI/E-P54NP4) has an entry for the EISA bus
881 	 * but none for ISA. It also has the interrupt type and
882 	 * polarity set to 'default for this bus' which wouldn't
883 	 * be compatible with ISA.
884 	 */
885 	if(mpeisabus != -1){
886 		vno = mpintrenablex(v, MKBUS(BusEISA, 0, 0, 0));
887 		if(vno != -1)
888 			return vno;
889 	}
890 	if(mpisabus != -1){
891 		vno = mpintrenablex(v, MKBUS(BusISA, 0, 0, 0));
892 		if(vno != -1)
893 			return vno;
894 	}
895 	print("mpintrenable: out of choices eisa %d isa %d tbdf %#ux irq %d\n",
896 		mpeisabus, mpisabus, v->tbdf, v->irq);
897 	return -1;
898 }
899 
900 static Lock mpshutdownlock;
901 
902 void
mpshutdown(void)903 mpshutdown(void)
904 {
905 	/*
906 	 * To be done...
907 	 */
908 	if(!canlock(&mpshutdownlock)){
909 		/*
910 		 * If this processor received the CTRL-ALT-DEL from
911 		 * the keyboard, acknowledge it. Send an INIT to self.
912 		 */
913 #ifdef FIXTHIS
914 		if(lapicisr(VectorKBD))
915 			lapiceoi(VectorKBD);
916 #endif /* FIX THIS */
917 		arch->introff();
918 		idle();
919 	}
920 
921 	if(active.rebooting)
922 		return;
923 	print("apshutdown: active = %#8.8ux\n", active.machs);
924 	delay(1000);
925 	splhi();
926 	arch->resetothers();
927 
928 	pcireset();
929 	i8042reset();
930 
931 	/*
932 	 * Often the BIOS hangs during restart if a conventional 8042
933 	 * warm-boot sequence is tried. The following is Intel specific and
934 	 * seems to perform a cold-boot, but at least it comes back.
935 	 * And sometimes there is no keyboard...
936 	 *
937 	 * The reset register (0xcf9) is usually in one of the bridge
938 	 * chips. The actual location and sequence could be extracted from
939 	 * ACPI but why bother, this is the end of the line anyway.
940 	 */
941 	print("no kbd; trying bios warm boot...");
942 	*(ushort*)KADDR(0x472) = 0x1234;	/* BIOS warm-boot flag */
943 	outb(0xCF9, 0x02);
944 	outb(0xCF9, 0x06);
945 
946 	print("can't reset\n");
947 	for(;;)
948 		idle();
949 }
950