1 /*
2 * Memory mappings. Life was easier when 2G of memory was enough.
3 *
4 * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5 * (9load sits under 1M during the load). The memory from KZERO to the
6 * top of memory is mapped 1-1 with physical memory, starting at physical
7 * address 0. All kernel memory and data structures (i.e., the entries stored
8 * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9 * then the kernel can only have 256MB of memory for itself.
10 *
11 * The 256M below KZERO comprises three parts. The lowest 4M is the
12 * virtual page table, a virtual address representation of the current
13 * page table tree. The second 4M is used for temporary per-process
14 * mappings managed by kmap and kunmap. The remaining 248M is used
15 * for global (shared by all procs and all processors) device memory
16 * mappings and managed by vmap and vunmap. The total amount (256M)
17 * could probably be reduced somewhat if desired. The largest device
18 * mapping is that of the video card, and even though modern video cards
19 * have embarrassing amounts of memory, the video drivers only use one
20 * frame buffer worth (at most 16M). Each is described in more detail below.
21 *
22 * The VPT is a 4M frame constructed by inserting the pdb into itself.
23 * This short-circuits one level of the page tables, with the result that
24 * the contents of second-level page tables can be accessed at VPT.
25 * We use the VPT to edit the page tables (see mmu) after inserting them
26 * into the page directory. It is a convenient mechanism for mapping what
27 * might be otherwise-inaccessible pages. The idea was borrowed from
28 * the Exokernel.
29 *
30 * The VPT doesn't solve all our problems, because we still need to
31 * prepare page directories before we can install them. For that, we
32 * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33 */
34
35 #include "u.h"
36 #include "../port/lib.h"
37 #include "mem.h"
38 #include "dat.h"
39 #include "fns.h"
40 #include "io.h"
41
42 /*
43 * Simple segment descriptors with no translation.
44 */
45 #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46 #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47 #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48 #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
49 ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50
51 Segdesc gdt[NGDT] =
52 {
53 [NULLSEG] { 0, 0}, /* null descriptor */
54 [KDSEG] DATASEGM(0), /* kernel data/stack */
55 [KESEG] EXECSEGM(0), /* kernel code */
56 [UDSEG] DATASEGM(3), /* user data/stack */
57 [UESEG] EXECSEGM(3), /* user code */
58 [TSSSEG] TSSSEGM(0,0), /* tss segment */
59 [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */
60 };
61
62 static int didmmuinit;
63 static void taskswitch(ulong, ulong);
64 static void memglobal(void);
65
66 #define vpt ((ulong*)VPT)
67 #define VPTX(va) (((ulong)(va))>>12)
68 #define vpd (vpt+VPTX(VPT))
69
70 void
mmuinit0(void)71 mmuinit0(void)
72 {
73 memmove(m->gdt, gdt, sizeof gdt);
74 }
75
76 void
mmuinit(void)77 mmuinit(void)
78 {
79 ulong x, *p;
80 ushort ptr[3];
81
82 didmmuinit = 1;
83
84 if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
85 VPT, vpd, KMAP);
86
87 memglobal();
88 m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
89
90 m->tss = malloc(sizeof(Tss));
91 if(m->tss == nil)
92 panic("mmuinit: no memory");
93 memset(m->tss, 0, sizeof(Tss));
94 m->tss->iomap = 0xDFFF<<16;
95
96 /*
97 * We used to keep the GDT in the Mach structure, but it
98 * turns out that that slows down access to the rest of the
99 * page. Since the Mach structure is accessed quite often,
100 * it pays off anywhere from a factor of 1.25 to 2 on real
101 * hardware to separate them (the AMDs are more sensitive
102 * than Intels in this regard). Under VMware it pays off
103 * a factor of about 10 to 100.
104 */
105 memmove(m->gdt, gdt, sizeof gdt);
106 x = (ulong)m->tss;
107 m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
108 m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
109
110 ptr[0] = sizeof(gdt)-1;
111 x = (ulong)m->gdt;
112 ptr[1] = x & 0xFFFF;
113 ptr[2] = (x>>16) & 0xFFFF;
114 lgdt(ptr);
115
116 ptr[0] = sizeof(Segdesc)*256-1;
117 x = IDTADDR;
118 ptr[1] = x & 0xFFFF;
119 ptr[2] = (x>>16) & 0xFFFF;
120 lidt(ptr);
121
122 /* make kernel text unwritable */
123 for(x = KTZERO; x < (ulong)etext; x += BY2PG){
124 p = mmuwalk(m->pdb, x, 2, 0);
125 if(p == nil)
126 panic("mmuinit");
127 *p &= ~PTEWRITE;
128 }
129
130 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
131 ltr(TSSSEL);
132 }
133
134 /*
135 * On processors that support it, we set the PTEGLOBAL bit in
136 * page table and page directory entries that map kernel memory.
137 * Doing this tells the processor not to bother flushing them
138 * from the TLB when doing the TLB flush associated with a
139 * context switch (write to CR3). Since kernel memory mappings
140 * are never removed, this is safe. (If we ever remove kernel memory
141 * mappings, we can do a full flush by turning off the PGE bit in CR4,
142 * writing to CR3, and then turning the PGE bit back on.)
143 *
144 * See also mmukmap below.
145 *
146 * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
147 */
148 static void
memglobal(void)149 memglobal(void)
150 {
151 int i, j;
152 ulong *pde, *pte;
153
154 /* only need to do this once, on bootstrap processor */
155 if(m->machno != 0)
156 return;
157
158 if(!m->havepge)
159 return;
160
161 pde = m->pdb;
162 for(i=PDX(KZERO); i<1024; i++){
163 if(pde[i] & PTEVALID){
164 pde[i] |= PTEGLOBAL;
165 if(!(pde[i] & PTESIZE)){
166 pte = KADDR(pde[i]&~(BY2PG-1));
167 for(j=0; j<1024; j++)
168 if(pte[j] & PTEVALID)
169 pte[j] |= PTEGLOBAL;
170 }
171 }
172 }
173 }
174
175 /*
176 * Flush all the user-space and device-mapping mmu info
177 * for this process, because something has been deleted.
178 * It will be paged back in on demand.
179 */
180 void
flushmmu(void)181 flushmmu(void)
182 {
183 int s;
184
185 s = splhi();
186 up->newtlb = 1;
187 mmuswitch(up);
188 splx(s);
189 }
190
191 /*
192 * Flush a single page mapping from the tlb.
193 */
194 void
flushpg(ulong va)195 flushpg(ulong va)
196 {
197 if(X86FAMILY(m->cpuidax) >= 4)
198 invlpg(va);
199 else
200 putcr3(getcr3());
201 }
202
203 /*
204 * Allocate a new page for a page directory.
205 * We keep a small cache of pre-initialized
206 * page directories in each mach.
207 */
208 static Page*
mmupdballoc(void)209 mmupdballoc(void)
210 {
211 int s;
212 Page *page;
213 ulong *pdb;
214
215 s = splhi();
216 m->pdballoc++;
217 if(m->pdbpool == 0){
218 spllo();
219 page = newpage(0, 0, 0);
220 page->va = (ulong)vpd;
221 splhi();
222 pdb = tmpmap(page);
223 memmove(pdb, m->pdb, BY2PG);
224 pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */
225 tmpunmap(pdb);
226 }else{
227 page = m->pdbpool;
228 m->pdbpool = page->next;
229 m->pdbcnt--;
230 }
231 splx(s);
232 return page;
233 }
234
235 static void
mmupdbfree(Proc * proc,Page * p)236 mmupdbfree(Proc *proc, Page *p)
237 {
238 if(islo())
239 panic("mmupdbfree: islo");
240 m->pdbfree++;
241 if(m->pdbcnt >= 10){
242 p->next = proc->mmufree;
243 proc->mmufree = p;
244 }else{
245 p->next = m->pdbpool;
246 m->pdbpool = p;
247 m->pdbcnt++;
248 }
249 }
250
251 /*
252 * A user-space memory segment has been deleted, or the
253 * process is exiting. Clear all the pde entries for user-space
254 * memory mappings and device mappings. Any entries that
255 * are needed will be paged back in as necessary.
256 */
257 static void
mmuptefree(Proc * proc)258 mmuptefree(Proc* proc)
259 {
260 int s;
261 ulong *pdb;
262 Page **last, *page;
263
264 if(proc->mmupdb == nil || proc->mmuused == nil)
265 return;
266 s = splhi();
267 pdb = tmpmap(proc->mmupdb);
268 last = &proc->mmuused;
269 for(page = *last; page; page = page->next){
270 pdb[page->daddr] = 0;
271 last = &page->next;
272 }
273 tmpunmap(pdb);
274 splx(s);
275 *last = proc->mmufree;
276 proc->mmufree = proc->mmuused;
277 proc->mmuused = 0;
278 }
279
280 static void
taskswitch(ulong pdb,ulong stack)281 taskswitch(ulong pdb, ulong stack)
282 {
283 Tss *tss;
284
285 tss = m->tss;
286 tss->ss0 = KDSEL;
287 tss->esp0 = stack;
288 tss->ss1 = KDSEL;
289 tss->esp1 = stack;
290 tss->ss2 = KDSEL;
291 tss->esp2 = stack;
292 putcr3(pdb);
293 }
294
295 void
mmuswitch(Proc * proc)296 mmuswitch(Proc* proc)
297 {
298 ulong *pdb;
299
300 if(proc->newtlb){
301 mmuptefree(proc);
302 proc->newtlb = 0;
303 }
304
305 if(proc->mmupdb){
306 pdb = tmpmap(proc->mmupdb);
307 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
308 tmpunmap(pdb);
309 taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
310 }else
311 taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
312 }
313
314 /*
315 * Release any pages allocated for a page directory base or page-tables
316 * for this process:
317 * switch to the prototype pdb for this processor (m->pdb);
318 * call mmuptefree() to place all pages used for page-tables (proc->mmuused)
319 * onto the process' free list (proc->mmufree). This has the side-effect of
320 * cleaning any user entries in the pdb (proc->mmupdb);
321 * if there's a pdb put it in the cache of pre-initialised pdb's
322 * for this processor (m->pdbpool) or on the process' free list;
323 * finally, place any pages freed back into the free pool (palloc).
324 * This routine is only called from schedinit() with palloc locked.
325 */
326 void
mmurelease(Proc * proc)327 mmurelease(Proc* proc)
328 {
329 Page *page, *next;
330 ulong *pdb;
331
332 if(islo())
333 panic("mmurelease: islo");
334 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
335 if(proc->kmaptable){
336 if(proc->mmupdb == nil)
337 panic("mmurelease: no mmupdb");
338 if(--proc->kmaptable->ref)
339 panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
340 if(proc->nkmap)
341 panic("mmurelease: nkmap %d", proc->nkmap);
342 /*
343 * remove kmaptable from pdb before putting pdb up for reuse.
344 */
345 pdb = tmpmap(proc->mmupdb);
346 if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
347 panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
348 pdb[PDX(KMAP)], proc->kmaptable->pa);
349 pdb[PDX(KMAP)] = 0;
350 tmpunmap(pdb);
351 /*
352 * move kmaptable to free list.
353 */
354 pagechainhead(proc->kmaptable);
355 proc->kmaptable = 0;
356 }
357 if(proc->mmupdb){
358 mmuptefree(proc);
359 mmupdbfree(proc, proc->mmupdb);
360 proc->mmupdb = 0;
361 }
362 for(page = proc->mmufree; page; page = next){
363 next = page->next;
364 if(--page->ref)
365 panic("mmurelease: page->ref %d", page->ref);
366 pagechainhead(page);
367 }
368 if(proc->mmufree && palloc.r.p)
369 wakeup(&palloc.r);
370 proc->mmufree = 0;
371 }
372
373 /*
374 * Allocate and install pdb for the current process.
375 */
376 static void
upallocpdb(void)377 upallocpdb(void)
378 {
379 int s;
380 ulong *pdb;
381 Page *page;
382
383 if(up->mmupdb != nil)
384 return;
385 page = mmupdballoc();
386 s = splhi();
387 if(up->mmupdb != nil){
388 /*
389 * Perhaps we got an interrupt while
390 * mmupdballoc was sleeping and that
391 * interrupt allocated an mmupdb?
392 * Seems unlikely.
393 */
394 mmupdbfree(up, page);
395 splx(s);
396 return;
397 }
398 pdb = tmpmap(page);
399 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
400 tmpunmap(pdb);
401 up->mmupdb = page;
402 putcr3(up->mmupdb->pa);
403 splx(s);
404 }
405
406 /*
407 * Update the mmu in response to a user fault. pa may have PTEWRITE set.
408 */
409 void
putmmu(ulong va,ulong pa,Page *)410 putmmu(ulong va, ulong pa, Page*)
411 {
412 int old, s;
413 Page *page;
414
415 if(up->mmupdb == nil)
416 upallocpdb();
417
418 /*
419 * We should be able to get through this with interrupts
420 * turned on (if we get interrupted we'll just pick up
421 * where we left off) but we get many faults accessing
422 * vpt[] near the end of this function, and they always happen
423 * after the process has been switched out and then
424 * switched back, usually many times in a row (perhaps
425 * it cannot switch back successfully for some reason).
426 *
427 * In any event, I'm tired of searching for this bug.
428 * Turn off interrupts during putmmu even though
429 * we shouldn't need to. - rsc
430 */
431
432 s = splhi();
433 if(!(vpd[PDX(va)]&PTEVALID)){
434 if(up->mmufree == 0){
435 spllo();
436 page = newpage(0, 0, 0);
437 splhi();
438 }
439 else{
440 page = up->mmufree;
441 up->mmufree = page->next;
442 }
443 vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
444 /* page is now mapped into the VPT - clear it */
445 memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
446 page->daddr = PDX(va);
447 page->next = up->mmuused;
448 up->mmuused = page;
449 }
450 old = vpt[VPTX(va)];
451 vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
452 if(old&PTEVALID)
453 flushpg(va);
454 if(getcr3() != up->mmupdb->pa)
455 print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
456 splx(s);
457 }
458
459 /*
460 * Double-check the user MMU.
461 * Error checking only.
462 */
463 void
checkmmu(ulong va,ulong pa)464 checkmmu(ulong va, ulong pa)
465 {
466 if(up->mmupdb == 0)
467 return;
468 if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
469 return;
470 if(PPN(vpt[VPTX(va)]) != pa)
471 print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
472 up->pid, up->text,
473 va, pa, vpt[VPTX(va)]);
474 }
475
476 /*
477 * Walk the page-table pointed to by pdb and return a pointer
478 * to the entry for virtual address va at the requested level.
479 * If the entry is invalid and create isn't requested then bail
480 * out early. Otherwise, for the 2nd level walk, allocate a new
481 * page-table page and register it in the 1st level. This is used
482 * only to edit kernel mappings, which use pages from kernel memory,
483 * so it's okay to use KADDR to look at the tables.
484 */
485 ulong*
mmuwalk(ulong * pdb,ulong va,int level,int create)486 mmuwalk(ulong* pdb, ulong va, int level, int create)
487 {
488 ulong *table;
489 void *map;
490
491 table = &pdb[PDX(va)];
492 if(!(*table & PTEVALID) && create == 0)
493 return 0;
494
495 switch(level){
496
497 default:
498 return 0;
499
500 case 1:
501 return table;
502
503 case 2:
504 if(*table & PTESIZE)
505 panic("mmuwalk2: va %luX entry %luX", va, *table);
506 if(!(*table & PTEVALID)){
507 /*
508 * Have to call low-level allocator from
509 * memory.c if we haven't set up the xalloc
510 * tables yet.
511 */
512 if(didmmuinit)
513 map = xspanalloc(BY2PG, BY2PG, 0);
514 else
515 map = rampage();
516 if(map == nil)
517 panic("mmuwalk xspanalloc failed");
518 *table = PADDR(map)|PTEWRITE|PTEVALID;
519 }
520 table = KADDR(PPN(*table));
521 return &table[PTX(va)];
522 }
523 }
524
525 /*
526 * Device mappings are shared by all procs and processors and
527 * live in the virtual range VMAP to VMAP+VMAPSIZE. The master
528 * copy of the mappings is stored in mach0->pdb, and they are
529 * paged in from there as necessary by vmapsync during faults.
530 */
531
532 static Lock vmaplock;
533
534 static int findhole(ulong *a, int n, int count);
535 static ulong vmapalloc(ulong size);
536 static void pdbunmap(ulong*, ulong, int);
537
538 /*
539 * Add a device mapping to the vmap range.
540 */
541 void*
vmap(ulong pa,int size)542 vmap(ulong pa, int size)
543 {
544 int osize;
545 ulong o, va;
546
547 /*
548 * might be asking for less than a page.
549 */
550 osize = size;
551 o = pa & (BY2PG-1);
552 pa -= o;
553 size += o;
554
555 size = ROUND(size, BY2PG);
556 if(pa == 0){
557 print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
558 return nil;
559 }
560 ilock(&vmaplock);
561 if((va = vmapalloc(size)) == 0
562 || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
563 iunlock(&vmaplock);
564 return 0;
565 }
566 iunlock(&vmaplock);
567 /* avoid trap on local processor
568 for(i=0; i<size; i+=4*MB)
569 vmapsync(va+i);
570 */
571 USED(osize);
572 // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
573 return (void*)(va + o);
574 }
575
576 static int
findhole(ulong * a,int n,int count)577 findhole(ulong *a, int n, int count)
578 {
579 int have, i;
580
581 have = 0;
582 for(i=0; i<n; i++){
583 if(a[i] == 0)
584 have++;
585 else
586 have = 0;
587 if(have >= count)
588 return i+1 - have;
589 }
590 return -1;
591 }
592
593 /*
594 * Look for free space in the vmap.
595 */
596 static ulong
vmapalloc(ulong size)597 vmapalloc(ulong size)
598 {
599 int i, n, o;
600 ulong *vpdb;
601 int vpdbsize;
602
603 vpdb = &MACHP(0)->pdb[PDX(VMAP)];
604 vpdbsize = VMAPSIZE/(4*MB);
605
606 if(size >= 4*MB){
607 n = (size+4*MB-1) / (4*MB);
608 if((o = findhole(vpdb, vpdbsize, n)) != -1)
609 return VMAP + o*4*MB;
610 return 0;
611 }
612 n = (size+BY2PG-1) / BY2PG;
613 for(i=0; i<vpdbsize; i++)
614 if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
615 if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
616 return VMAP + i*4*MB + o*BY2PG;
617 if((o = findhole(vpdb, vpdbsize, 1)) != -1)
618 return VMAP + o*4*MB;
619
620 /*
621 * could span page directory entries, but not worth the trouble.
622 * not going to be very much contention.
623 */
624 return 0;
625 }
626
627 /*
628 * Remove a device mapping from the vmap range.
629 * Since pdbunmap does not remove page tables, just entries,
630 * the call need not be interlocked with vmap.
631 */
632 void
vunmap(void * v,int size)633 vunmap(void *v, int size)
634 {
635 int i;
636 ulong va, o;
637 Mach *nm;
638 Proc *p;
639
640 /*
641 * might not be aligned
642 */
643 va = (ulong)v;
644 o = va&(BY2PG-1);
645 va -= o;
646 size += o;
647 size = ROUND(size, BY2PG);
648
649 if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
650 panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
651 va, size, getcallerpc(&v));
652
653 pdbunmap(MACHP(0)->pdb, va, size);
654
655 /*
656 * Flush mapping from all the tlbs and copied pdbs.
657 * This can be (and is) slow, since it is called only rarely.
658 * It is possible for vunmap to be called with up == nil,
659 * e.g. from the reset/init driver routines during system
660 * boot. In that case it suffices to flush the MACH(0) TLB
661 * and return.
662 */
663 if(!active.thunderbirdsarego){
664 putcr3(PADDR(MACHP(0)->pdb));
665 return;
666 }
667 for(i=0; i<conf.nproc; i++){
668 p = proctab(i);
669 if(p->state == Dead)
670 continue;
671 if(p != up)
672 p->newtlb = 1;
673 }
674 for(i=0; i<conf.nmach; i++){
675 nm = MACHP(i);
676 if(nm != m)
677 nm->flushmmu = 1;
678 }
679 flushmmu();
680 for(i=0; i<conf.nmach; i++){
681 nm = MACHP(i);
682 if(nm != m)
683 while((active.machs&(1<<nm->machno)) && nm->flushmmu)
684 ;
685 }
686 }
687
688 /*
689 * Add kernel mappings for pa -> va for a section of size bytes.
690 */
691 int
pdbmap(ulong * pdb,ulong pa,ulong va,int size)692 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
693 {
694 int pse;
695 ulong pgsz, *pte, *table;
696 ulong flag, off;
697
698 flag = pa&0xFFF;
699 pa &= ~0xFFF;
700
701 if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10))
702 pse = 1;
703 else
704 pse = 0;
705
706 for(off=0; off<size; off+=pgsz){
707 table = &pdb[PDX(va+off)];
708 if((*table&PTEVALID) && (*table&PTESIZE))
709 panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
710 va+off, pa+off, *table);
711
712 /*
713 * Check if it can be mapped using a 4MB page:
714 * va, pa aligned and size >= 4MB and processor can do it.
715 */
716 if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
717 *table = (pa+off)|flag|PTESIZE|PTEVALID;
718 pgsz = 4*MB;
719 }else{
720 pte = mmuwalk(pdb, va+off, 2, 1);
721 if(*pte&PTEVALID)
722 panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
723 va+off, pa+off, *pte);
724 *pte = (pa+off)|flag|PTEVALID;
725 pgsz = BY2PG;
726 }
727 }
728 return 0;
729 }
730
731 /*
732 * Remove mappings. Must already exist, for sanity.
733 * Only used for kernel mappings, so okay to use KADDR.
734 */
735 static void
pdbunmap(ulong * pdb,ulong va,int size)736 pdbunmap(ulong *pdb, ulong va, int size)
737 {
738 ulong vae;
739 ulong *table;
740
741 vae = va+size;
742 while(va < vae){
743 table = &pdb[PDX(va)];
744 if(!(*table & PTEVALID)){
745 panic("vunmap: not mapped");
746 /*
747 va = (va+4*MB-1) & ~(4*MB-1);
748 continue;
749 */
750 }
751 if(*table & PTESIZE){
752 *table = 0;
753 va = (va+4*MB-1) & ~(4*MB-1);
754 continue;
755 }
756 table = KADDR(PPN(*table));
757 if(!(table[PTX(va)] & PTEVALID))
758 panic("vunmap: not mapped");
759 table[PTX(va)] = 0;
760 va += BY2PG;
761 }
762 }
763
764 /*
765 * Handle a fault by bringing vmap up to date.
766 * Only copy pdb entries and they never go away,
767 * so no locking needed.
768 */
769 int
vmapsync(ulong va)770 vmapsync(ulong va)
771 {
772 ulong entry, *table;
773
774 if(va < VMAP || va >= VMAP+VMAPSIZE)
775 return 0;
776
777 entry = MACHP(0)->pdb[PDX(va)];
778 if(!(entry&PTEVALID))
779 return 0;
780 if(!(entry&PTESIZE)){
781 /* make sure entry will help the fault */
782 table = KADDR(PPN(entry));
783 if(!(table[PTX(va)]&PTEVALID))
784 return 0;
785 }
786 vpd[PDX(va)] = entry;
787 /*
788 * TLB doesn't cache negative results, so no flush needed.
789 */
790 return 1;
791 }
792
793
794 /*
795 * KMap is used to map individual pages into virtual memory.
796 * It is rare to have more than a few KMaps at a time (in the
797 * absence of interrupts, only two at a time are ever used,
798 * but interrupts can stack). The mappings are local to a process,
799 * so we can use the same range of virtual address space for
800 * all processes without any coordination.
801 */
802 #define kpt (vpt+VPTX(KMAP))
803 #define NKPT (KMAPSIZE/BY2PG)
804
805 KMap*
kmap(Page * page)806 kmap(Page *page)
807 {
808 int i, o, s;
809
810 if(up == nil)
811 panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
812 if(up->mmupdb == nil)
813 upallocpdb();
814 if(up->nkmap < 0)
815 panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
816
817 /*
818 * Splhi shouldn't be necessary here, but paranoia reigns.
819 * See comment in putmmu above.
820 */
821 s = splhi();
822 up->nkmap++;
823 if(!(vpd[PDX(KMAP)]&PTEVALID)){
824 /* allocate page directory */
825 if(KMAPSIZE > BY2XPG)
826 panic("bad kmapsize");
827 if(up->kmaptable != nil)
828 panic("kmaptable");
829 spllo();
830 up->kmaptable = newpage(0, 0, 0);
831 splhi();
832 vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
833 flushpg((ulong)kpt);
834 memset(kpt, 0, BY2PG);
835 kpt[0] = page->pa|PTEWRITE|PTEVALID;
836 up->lastkmap = 0;
837 splx(s);
838 return (KMap*)KMAP;
839 }
840 if(up->kmaptable == nil)
841 panic("no kmaptable");
842 o = up->lastkmap+1;
843 for(i=0; i<NKPT; i++){
844 if(kpt[(i+o)%NKPT] == 0){
845 o = (i+o)%NKPT;
846 kpt[o] = page->pa|PTEWRITE|PTEVALID;
847 up->lastkmap = o;
848 splx(s);
849 return (KMap*)(KMAP+o*BY2PG);
850 }
851 }
852 panic("out of kmap");
853 return nil;
854 }
855
856 void
kunmap(KMap * k)857 kunmap(KMap *k)
858 {
859 ulong va;
860
861 va = (ulong)k;
862 if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
863 panic("kunmap: no kmaps");
864 if(va < KMAP || va >= KMAP+KMAPSIZE)
865 panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
866 if(!(vpt[VPTX(va)]&PTEVALID))
867 panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
868 up->nkmap--;
869 if(up->nkmap < 0)
870 panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
871 vpt[VPTX(va)] = 0;
872 flushpg(va);
873 }
874
875 /*
876 * Temporary one-page mapping used to edit page directories.
877 *
878 * The fasttmp #define controls whether the code optimizes
879 * the case where the page is already mapped in the physical
880 * memory window.
881 */
882 #define fasttmp 1
883
884 void*
tmpmap(Page * p)885 tmpmap(Page *p)
886 {
887 ulong i;
888 ulong *entry;
889
890 if(islo())
891 panic("tmpaddr: islo");
892
893 if(fasttmp && p->pa < -KZERO)
894 return KADDR(p->pa);
895
896 /*
897 * PDX(TMPADDR) == PDX(MACHADDR), so this
898 * entry is private to the processor and shared
899 * between up->mmupdb (if any) and m->pdb.
900 */
901 entry = &vpt[VPTX(TMPADDR)];
902 if(!(*entry&PTEVALID)){
903 for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
904 print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
905 panic("tmpmap: no entry");
906 }
907 if(PPN(*entry) != PPN(TMPADDR-KZERO))
908 panic("tmpmap: already mapped entry=%#.8lux", *entry);
909 *entry = p->pa|PTEWRITE|PTEVALID;
910 flushpg(TMPADDR);
911 return (void*)TMPADDR;
912 }
913
914 void
tmpunmap(void * v)915 tmpunmap(void *v)
916 {
917 ulong *entry;
918
919 if(islo())
920 panic("tmpaddr: islo");
921 if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
922 return;
923 if(v != (void*)TMPADDR)
924 panic("tmpunmap: bad address");
925 entry = &vpt[VPTX(TMPADDR)];
926 if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
927 panic("tmpmap: not mapped entry=%#.8lux", *entry);
928 *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
929 flushpg(TMPADDR);
930 }
931
932 /*
933 * These could go back to being macros once the kernel is debugged,
934 * but the extra checking is nice to have.
935 */
936 void*
kaddr(ulong pa)937 kaddr(ulong pa)
938 {
939 if(pa > (ulong)-KZERO)
940 panic("kaddr: pa=%#.8lux", pa);
941 return (void*)(pa+KZERO);
942 }
943
944 ulong
paddr(void * v)945 paddr(void *v)
946 {
947 ulong va;
948
949 va = (ulong)v;
950 if(va < KZERO)
951 panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
952 return va-KZERO;
953 }
954
955 /*
956 * More debugging.
957 */
958 void
countpagerefs(ulong * ref,int print)959 countpagerefs(ulong *ref, int print)
960 {
961 int i, n;
962 Mach *mm;
963 Page *pg;
964 Proc *p;
965
966 n = 0;
967 for(i=0; i<conf.nproc; i++){
968 p = proctab(i);
969 if(p->mmupdb){
970 if(print){
971 if(ref[pagenumber(p->mmupdb)])
972 iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
973 p->mmupdb->pa, i, p->pid);
974 continue;
975 }
976 if(ref[pagenumber(p->mmupdb)]++ == 0)
977 n++;
978 else
979 iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
980 p->mmupdb->pa, i, p->pid);
981 }
982 if(p->kmaptable){
983 if(print){
984 if(ref[pagenumber(p->kmaptable)])
985 iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
986 p->kmaptable->pa, i, p->pid);
987 continue;
988 }
989 if(ref[pagenumber(p->kmaptable)]++ == 0)
990 n++;
991 else
992 iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
993 p->kmaptable->pa, i, p->pid);
994 }
995 for(pg=p->mmuused; pg; pg=pg->next){
996 if(print){
997 if(ref[pagenumber(pg)])
998 iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
999 pg->pa, i, p->pid);
1000 continue;
1001 }
1002 if(ref[pagenumber(pg)]++ == 0)
1003 n++;
1004 else
1005 iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
1006 pg->pa, i, p->pid);
1007 }
1008 for(pg=p->mmufree; pg; pg=pg->next){
1009 if(print){
1010 if(ref[pagenumber(pg)])
1011 iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
1012 pg->pa, i, p->pid);
1013 continue;
1014 }
1015 if(ref[pagenumber(pg)]++ == 0)
1016 n++;
1017 else
1018 iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
1019 pg->pa, i, p->pid);
1020 }
1021 }
1022 if(!print)
1023 iprint("%d pages in proc mmu\n", n);
1024 n = 0;
1025 for(i=0; i<conf.nmach; i++){
1026 mm = MACHP(i);
1027 for(pg=mm->pdbpool; pg; pg=pg->next){
1028 if(print){
1029 if(ref[pagenumber(pg)])
1030 iprint("page %#.8lux is in cpu%d pdbpool\n",
1031 pg->pa, i);
1032 continue;
1033 }
1034 if(ref[pagenumber(pg)]++ == 0)
1035 n++;
1036 else
1037 iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
1038 pg->pa, i);
1039 }
1040 }
1041 if(!print){
1042 iprint("%d pages in mach pdbpools\n", n);
1043 for(i=0; i<conf.nmach; i++)
1044 iprint("cpu%d: %d pdballoc, %d pdbfree\n",
1045 i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
1046 }
1047 }
1048
1049 void
checkfault(ulong,ulong)1050 checkfault(ulong, ulong)
1051 {
1052 }
1053
1054 /*
1055 * Return the number of bytes that can be accessed via KADDR(pa).
1056 * If pa is not a valid argument to KADDR, return 0.
1057 */
1058 ulong
cankaddr(ulong pa)1059 cankaddr(ulong pa)
1060 {
1061 if(pa >= -KZERO)
1062 return 0;
1063 return -KZERO - pa;
1064 }
1065
1066