1 /*
2 * 9boot - load next 386 or amd64 kernel from disk and start it
3 * and
4 * 9load - load next 386 or amd64 kernel via pxe (bootp, tftp) and start it
5 *
6 * intel says that pxe can only load into the bottom 640K, and
7 * intel's pxe boot agent takes 128K, leaving only 512K for 9boot.
8 */
9 #include "u.h"
10 #include "../port/lib.h"
11 #include "mem.h"
12 #include "dat.h"
13 #include "fns.h"
14 #include "io.h"
15 #include "ureg.h"
16 #include "pool.h"
17 #include "reboot.h"
18 #include "ip.h" /* for eipfmt */
19 #include <tos.h>
20
21 enum {
22 Datamagic = 0xbabeabed,
23 };
24
25 Mach *m;
26
27 ulong* mach0pdb;
28 Mach* mach0m;
29 Segdesc* mach0gdt;
30 u32int memstart;
31 u32int memend;
32 int noclock;
33
34 extern int pcivga;
35 extern char hellomsg[];
36
37 /*
38 * Where configuration info is left for the loaded programme.
39 */
40 char bootdisk[KNAMELEN];
41 Conf conf;
42
43 uchar *sp; /* user stack of init proc */
44 int delaylink;
45 int debug;
46 int v_flag;
47
48 static void
sanity(void)49 sanity(void)
50 {
51 uintptr cr3;
52
53 cr3 = (uintptr)KADDR(getcr3());
54 if (cr3 == 0)
55 panic("zero cr3");
56 if ((uintptr)m->pdb != cr3 || (uintptr)mach0pdb != cr3)
57 panic("not all same: cr3 %#p m->pdb %#p mach0pdb %#p",
58 cr3, m->pdb, mach0pdb);
59 if (m != mach0m)
60 panic("m %#p != mach0m %#p", m, mach0m);
61 if (m->gdt != mach0gdt)
62 panic("m->gdt %#p != mach0gdt %#p", m->gdt, mach0gdt);
63 if (0)
64 iprint("m->pdb %#p m %#p sp %#p m->gdt %#p\n",
65 m->pdb, m, &cr3, m->gdt);
66 }
67
68 enum {
69 /* system control port a */
70 Sysctla= 0x92,
71 Sysctlreset= 1<<0,
72 Sysctla20ena= 1<<1,
73 };
74
75 static int
isa20on(void)76 isa20on(void)
77 {
78 int r;
79 ulong o;
80 ulong *zp, *mb1p;
81
82 zp = 0;
83 mb1p = (ulong *)MB;
84 o = *zp;
85
86 *zp = 0x1234;
87 *mb1p = 0x8765;
88 mb586();
89 wbinvd();
90 r = *zp != *mb1p;
91
92 *zp = o;
93 return r;
94 }
95
96 void
a20init(void)97 a20init(void)
98 {
99 int b;
100
101 if (isa20on())
102 return;
103
104 i8042a20(); /* original method, via kbd ctlr */
105 if (isa20on())
106 return;
107
108 /* newer method, last resort */
109 b = inb(Sysctla);
110 if (!(b & Sysctla20ena))
111 outb(Sysctla, (b & ~Sysctlreset) | Sysctla20ena);
112 if (!isa20on()){
113 iprint("a20 didn't come on!\n");
114 for(;;)
115 ;
116 }
117 }
118
119 void
main(void)120 main(void)
121 {
122 Proc *savup;
123 static ulong vfy = Datamagic;
124 static char novga[] = "\nno vga; serial console only\n";
125
126 savup = up;
127 up = nil;
128 /* m has been set by l32v.s */
129
130 /*
131 * disable address wraps at 1MB boundaries.
132 * if we're 9boot, ldecomp.s already did this.
133 */
134 a20init();
135
136 mach0init();
137 // options(); /* we don't get options passed to us */
138 ioinit();
139 /* we later call i8250console after plan9.ini has been read */
140 i8250config("0"); /* configure serial port 0 with defaults */
141 quotefmtinstall();
142 fmtinstall('i', eipfmt);
143 fmtinstall('I', eipfmt);
144 fmtinstall('E', eipfmt);
145 fmtinstall('V', eipfmt);
146 fmtinstall('M', eipfmt);
147 screeninit(); /* cga setup */
148 cgapost(0xc);
149
150 trapinit0();
151 mmuinit0();
152
153 kbdinit();
154 i8253init();
155 cpuidentify();
156 readlsconf();
157 meminit();
158 confinit();
159 archinit();
160 xinit();
161 if(i8237alloc != nil)
162 i8237alloc(); /* dma (for floppy) init */
163 trapinit();
164 printinit();
165 sanity();
166 cgapost(1);
167
168 /*
169 * soekris servers have no built-in video but each has a serial port.
170 * they must see serial output, if any, before cga output because
171 * otherwise the soekris bios will translate cga output to serial
172 * output, which will garble serial console output.
173 */
174 pcimatch(nil, 0, 0); /* force scan of pci table */
175 if (!pcivga) {
176 screenputs = nil;
177 uartputs(novga, sizeof novga - 1);
178 }
179 print(" %s\n\n", hellomsg);
180
181 if (vfy != Datamagic)
182 panic("data segment incorrectly aligned or loaded");
183 if (savup)
184 print("up was non-nil (%#p) upon entry to main; bss wasn't zeroed!\n",
185 savup);
186
187 // xsummary();
188 cpuidprint();
189 mmuinit();
190 if(arch->intrinit) /* launches other processors on an mp */
191 arch->intrinit();
192 timersinit();
193 mathinit();
194 kbdenable();
195 /*
196 * 9loadusb runs much faster if we don't use the clock.
197 * perhaps we're competing with the bios for the use of it?
198 */
199 if(!noclock && arch->clockenable)
200 arch->clockenable();
201 procinit0();
202 initseg();
203 if(delaylink){
204 bootlinks();
205 pcimatch(0, 0, 0);
206 }else
207 links();
208 conf.monitor = 1;
209 cgapost(0xcd);
210 chandevreset();
211 cgapost(2);
212 pageinit(); /* must follow xinit, and conf.mem must be populated */
213 i8253link();
214 userinit();
215
216 active.thunderbirdsarego = 1;
217 cgapost(0xb0);
218 schedinit();
219 }
220
221 void
mach0init(void)222 mach0init(void)
223 {
224 conf.nmach = 1;
225 MACHP(0) = mach0m;
226 m->machno = 0;
227 m->pdb = mach0pdb;
228 m->gdt = mach0gdt;
229
230 machinit();
231
232 active.machs = 1;
233 active.exiting = 0;
234 }
235
236 void
machinit(void)237 machinit(void)
238 {
239 int machno;
240 ulong *pdb;
241 Segdesc *gdt;
242
243 machno = m->machno;
244 pdb = m->pdb;
245 gdt = m->gdt;
246 memset(m, 0, sizeof(Mach));
247 m->machno = machno;
248 m->pdb = pdb;
249 m->gdt = gdt;
250 m->perf.period = 1;
251
252 /*
253 * For polled uart output at boot, need
254 * a default delay constant. 100000 should
255 * be enough for a while. Cpuidentify will
256 * calculate the real value later.
257 */
258 m->loopconst = 100000;
259 }
260
261 void
init0(void)262 init0(void)
263 {
264 int i;
265 char buf[2*KNAMELEN];
266
267 up->nerrlab = 0;
268
269 spllo();
270
271 /*
272 * These are o.k. because rootinit is null.
273 * Then early kproc's will have a root and dot.
274 */
275 up->slash = namec("#/", Atodir, 0, 0);
276 pathclose(up->slash->path);
277 up->slash->path = newpath("/");
278 up->dot = cclone(up->slash);
279
280 chandevinit();
281
282 if(0 && !waserror()){ /* not needed by boot */
283 snprint(buf, sizeof(buf), "%s %s", arch->id, conffile);
284 ksetenv("terminal", buf, 0);
285 ksetenv("cputype", "386", 0);
286 if(cpuserver)
287 ksetenv("service", "cpu", 0);
288 else
289 ksetenv("service", "terminal", 0);
290 for(i = 0; i < nconf; i++){
291 if(confname[i][0] != '*')
292 ksetenv(confname[i], confval[i], 0);
293 ksetenv(confname[i], confval[i], 1);
294 }
295 poperror();
296 }
297 kproc("alarm", alarmkproc, 0);
298
299 conschan = enamecopen("#c/cons", ORDWR);
300 bootloadproc(0);
301 panic("bootloadproc returned");
302 }
303
304 void
userinit(void)305 userinit(void)
306 {
307 Proc *p;
308
309 p = newproc();
310 p->pgrp = newpgrp();
311 p->egrp = smalloc(sizeof(Egrp));
312 p->egrp->ref = 1;
313 p->fgrp = dupfgrp(nil);
314 p->rgrp = newrgrp();
315 p->procmode = 0640;
316
317 kstrdup(&eve, "");
318 kstrdup(&p->text, "*init*");
319 kstrdup(&p->user, eve);
320
321 p->fpstate = FPinit;
322 fpoff();
323
324 /*
325 * Kernel Stack
326 *
327 * N.B. make sure there's enough space for syscall to check
328 * for valid args and
329 * 4 bytes for gotolabel's return PC
330 */
331 p->sched.pc = (ulong)init0;
332 p->sched.sp = (ulong)p->kstack+KSTACK-(sizeof(Sargs)+BY2WD);
333
334 /* NB: no user stack nor text segments are set up */
335
336 ready(p);
337 }
338
339 void
confinit(void)340 confinit(void)
341 {
342 int i, userpcnt;
343 ulong kpages;
344
345 userpcnt = 0; /* bootstrap; no user mode */
346 conf.npage = 0;
347 for(i=0; i<nelem(conf.mem); i++)
348 conf.npage += conf.mem[i].npage;
349
350 conf.npage = MemMax / BY2PG;
351 conf.nproc = 20; /* need a few kprocs */
352 if(cpuserver)
353 conf.nproc *= 3;
354 if(conf.nproc > 2000)
355 conf.nproc = 2000;
356 conf.nimage = 40;
357 conf.nswap = conf.nproc*80;
358 conf.nswppo = 4096;
359
360 kpages = conf.npage - (conf.npage*userpcnt)/100;
361
362 /*
363 * can't go past the end of virtual memory
364 * (ulong)-KZERO is 2^32 - KZERO
365 */
366 if(kpages > ((ulong)-KZERO)/BY2PG)
367 kpages = ((ulong)-KZERO)/BY2PG;
368
369 conf.upages = conf.npage - kpages;
370 conf.ialloc = (kpages/2)*BY2PG;
371
372 /*
373 * Guess how much is taken by the large permanent
374 * datastructures. Mntcache and Mntrpc are not accounted for
375 * (probably ~300KB).
376 */
377 kpages *= BY2PG;
378 kpages -= conf.upages*sizeof(Page)
379 + conf.nproc*sizeof(Proc)
380 + conf.nimage*sizeof(Image)
381 + conf.nswap
382 + conf.nswppo*sizeof(Page);
383 mainmem->maxsize = kpages;
384 if(!cpuserver){
385 /*
386 * give terminals lots of image memory, too; the dynamic
387 * allocation will balance the load properly, hopefully.
388 * be careful with 32-bit overflow.
389 */
390 imagmem->maxsize = kpages;
391 }
392 }
393
394 /*
395 * math coprocessor segment overrun
396 */
397 static void
mathover(Ureg *,void *)398 mathover(Ureg*, void*)
399 {
400 pexit("math overrun", 0);
401 }
402
403 void
mathinit(void)404 mathinit(void)
405 {
406 }
407
408 /*
409 * set up floating point for a new process
410 */
411 void
procsetup(Proc * p)412 procsetup(Proc*p)
413 {
414 p->fpstate = FPinit;
415 fpoff();
416 }
417
418 void
procrestore(Proc * p)419 procrestore(Proc *p)
420 {
421 uvlong t;
422
423 if(p->kp)
424 return;
425 cycles(&t);
426 p->pcycles -= t;
427 }
428
429 /*
430 * Save the mach dependent part of the process state.
431 */
432 void
procsave(Proc * p)433 procsave(Proc *p)
434 {
435 uvlong t;
436
437 cycles(&t);
438 p->pcycles += t;
439
440 /*
441 * While this processor is in the scheduler, the process could run
442 * on another processor and exit, returning the page tables to
443 * the free list where they could be reallocated and overwritten.
444 * When this processor eventually has to get an entry from the
445 * trashed page tables it will crash.
446 *
447 * If there's only one processor, this can't happen.
448 * You might think it would be a win not to do this in that case,
449 * especially on VMware, but it turns out not to matter.
450 */
451 mmuflushtlb(PADDR(m->pdb));
452 }
453
454 static void
shutdown(int ispanic)455 shutdown(int ispanic)
456 {
457 int ms, once;
458
459 lock(&active);
460 if(ispanic)
461 active.ispanic = ispanic;
462 else if(m->machno == 0 && (active.machs & (1<<m->machno)) == 0)
463 active.ispanic = 0;
464 once = active.machs & (1<<m->machno);
465 /*
466 * setting exiting will make hzclock() on each processor call exit(0),
467 * which calls shutdown(0) and arch->reset(), which on mp systems is
468 * mpshutdown, which idles non-bootstrap cpus and returns on bootstrap
469 * processors (to permit a reboot). clearing our bit in machs avoids
470 * calling exit(0) from hzclock() on this processor.
471 */
472 active.machs &= ~(1<<m->machno);
473 active.exiting = 1;
474 unlock(&active);
475
476 if(once)
477 iprint("cpu%d: exiting\n", m->machno);
478
479 /* wait for any other processors to shutdown */
480 spllo();
481 for(ms = 5*1000; ms > 0; ms -= TK2MS(2)){
482 delay(TK2MS(2));
483 if(active.machs == 0 && consactive() == 0)
484 break;
485 }
486
487 if(active.ispanic){
488 if(!cpuserver)
489 for(;;)
490 halt();
491 if(getconf("*debug"))
492 delay(5*60*1000);
493 else
494 delay(10000);
495 }else
496 delay(1000);
497 }
498
499 void
reboot(void * entry,void * code,ulong size)500 reboot(void *entry, void *code, ulong size)
501 {
502 int i;
503 void (*f)(ulong, ulong, ulong);
504 ulong *pdb;
505
506 /* we do pass options to the kernel we loaded, however, at CONFADDR. */
507 // writeconf();
508
509 /*
510 * the boot processor is cpu0. execute this function on it
511 * so that the new kernel has the same cpu0. this only matters
512 * because the hardware has a notion of which processor was the
513 * boot processor and we look at it at start up.
514 */
515 if (m->machno != 0) {
516 procwired(up, 0);
517 sched();
518 }
519
520 if(conf.nmach > 1) {
521 /*
522 * the other cpus could be holding locks that will never get
523 * released (e.g., in the print path) if we put them into
524 * reset now, so force them to shutdown gracefully first.
525 */
526 lock(&active);
527 active.rebooting = 1;
528 unlock(&active);
529 shutdown(0);
530 if(arch->resetothers)
531 arch->resetothers();
532 delay(20);
533 }
534
535 /*
536 * should be the only processor running now
537 */
538 active.machs = 0;
539 if (m->machno != 0)
540 print("on cpu%d (not 0)!\n", m->machno);
541
542 print("shutting down...\n");
543 delay(200);
544
545 splhi();
546
547 /* turn off buffered serial console */
548 serialoq = nil;
549
550 /* shutdown devices */
551 chandevshutdown();
552 arch->introff();
553
554 /*
555 * Modify the machine page table to directly map low memory
556 * This allows the reboot code to turn off the page mapping
557 */
558 pdb = m->pdb;
559 for (i = 0; i < LOWPTEPAGES; i++)
560 pdb[PDX(i*4*MB)] = pdb[PDX(KZERO + i*4*MB)];
561 mmuflushtlb(PADDR(pdb));
562
563 /* setup reboot trampoline function */
564 f = (void*)REBOOTADDR;
565 memmove(f, rebootcode, sizeof(rebootcode));
566
567 print("rebooting...\n");
568
569 /* off we go - never to return */
570 coherence();
571 (*f)(PADDR(entry), PADDR(code), size);
572 }
573
574
575 void
exit(int ispanic)576 exit(int ispanic)
577 {
578 shutdown(ispanic);
579 spllo();
580 arch->reset();
581 }
582
583 int
isaconfig(char * class,int ctlrno,ISAConf * isa)584 isaconfig(char *class, int ctlrno, ISAConf *isa)
585 {
586 char cc[32], *p;
587 int i;
588
589 snprint(cc, sizeof cc, "%s%d", class, ctlrno);
590 p = getconf(cc);
591 if(p == nil)
592 return 0;
593
594 isa->type = "";
595 isa->nopt = tokenize(p, isa->opt, NISAOPT);
596 for(i = 0; i < isa->nopt; i++){
597 p = isa->opt[i];
598 if(cistrncmp(p, "type=", 5) == 0)
599 isa->type = p + 5;
600 else if(cistrncmp(p, "port=", 5) == 0)
601 isa->port = strtoul(p+5, &p, 0);
602 else if(cistrncmp(p, "irq=", 4) == 0)
603 isa->irq = strtoul(p+4, &p, 0);
604 else if(cistrncmp(p, "dma=", 4) == 0)
605 isa->dma = strtoul(p+4, &p, 0);
606 else if(cistrncmp(p, "mem=", 4) == 0)
607 isa->mem = strtoul(p+4, &p, 0);
608 else if(cistrncmp(p, "size=", 5) == 0)
609 isa->size = strtoul(p+5, &p, 0);
610 else if(cistrncmp(p, "freq=", 5) == 0)
611 isa->freq = strtoul(p+5, &p, 0);
612 }
613 return 1;
614 }
615
616 int
cistrcmp(char * a,char * b)617 cistrcmp(char *a, char *b)
618 {
619 int ac, bc;
620
621 for(;;){
622 ac = *a++;
623 bc = *b++;
624
625 if(ac >= 'A' && ac <= 'Z')
626 ac = 'a' + (ac - 'A');
627 if(bc >= 'A' && bc <= 'Z')
628 bc = 'a' + (bc - 'A');
629 ac -= bc;
630 if(ac)
631 return ac;
632 if(bc == 0)
633 break;
634 }
635 return 0;
636 }
637
638 int
cistrncmp(char * a,char * b,int n)639 cistrncmp(char *a, char *b, int n)
640 {
641 unsigned ac, bc;
642
643 while(n > 0){
644 ac = *a++;
645 bc = *b++;
646 n--;
647
648 if(ac >= 'A' && ac <= 'Z')
649 ac = 'a' + (ac - 'A');
650 if(bc >= 'A' && bc <= 'Z')
651 bc = 'a' + (bc - 'A');
652
653 ac -= bc;
654 if(ac)
655 return ac;
656 if(bc == 0)
657 break;
658 }
659
660 return 0;
661 }
662
663 int less_power_slower;
664
665 /*
666 * put the processor in the halt state if we've no processes to run.
667 * an interrupt will get us going again.
668 */
669 void
idlehands(void)670 idlehands(void)
671 {
672 /*
673 * we used to halt only on single-core setups. halting in an smp system
674 * can result in a startup latency for processes that become ready.
675 * if less_power_slower is true, we care more about saving energy
676 * than reducing this latency.
677 */
678 if(conf.nmach == 1 || less_power_slower)
679 halt();
680 }
681
682 void
trimnl(char * s)683 trimnl(char *s)
684 {
685 char *nl;
686
687 nl = strchr(s, '\n');
688 if (nl != nil)
689 *nl = '\0';
690 }
691