1 /* 2 * Memory mappings. Life was easier when 2G of memory was enough. 3 * 4 * The kernel memory starts at KZERO, with the text loaded at KZERO+1M 5 * (9load sits under 1M during the load). The memory from KZERO to the 6 * top of memory is mapped 1-1 with physical memory, starting at physical 7 * address 0. All kernel memory and data structures (i.e., the entries stored 8 * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000, 9 * then the kernel can only have 256MB of memory for itself. 10 * 11 * The 256M below KZERO comprises three parts. The lowest 4M is the 12 * virtual page table, a virtual address representation of the current 13 * page table tree. The second 4M is used for temporary per-process 14 * mappings managed by kmap and kunmap. The remaining 248M is used 15 * for global (shared by all procs and all processors) device memory 16 * mappings and managed by vmap and vunmap. The total amount (256M) 17 * could probably be reduced somewhat if desired. The largest device 18 * mapping is that of the video card, and even though modern video cards 19 * have embarrassing amounts of memory, the video drivers only use one 20 * frame buffer worth (at most 16M). Each is described in more detail below. 21 * 22 * The VPT is a 4M frame constructed by inserting the pdb into itself. 23 * This short-circuits one level of the page tables, with the result that 24 * the contents of second-level page tables can be accessed at VPT. 25 * We use the VPT to edit the page tables (see mmu) after inserting them 26 * into the page directory. It is a convenient mechanism for mapping what 27 * might be otherwise-inaccessible pages. The idea was borrowed from 28 * the Exokernel. 29 * 30 * The VPT doesn't solve all our problems, because we still need to 31 * prepare page directories before we can install them. For that, we 32 * use tmpmap/tmpunmap, which map a single page at TMPADDR. 33 */ 34 35 #include "u.h" 36 #include "../port/lib.h" 37 #include "mem.h" 38 #include "dat.h" 39 #include "fns.h" 40 #include "io.h" 41 42 /* 43 * Simple segment descriptors with no translation. 44 */ 45 #define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW } 46 #define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } 47 #define EXEC16SEGM(p) { 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR } 48 #define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\ 49 ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP } 50 51 Segdesc gdt[NGDT] = 52 { 53 [NULLSEG] { 0, 0}, /* null descriptor */ 54 [KDSEG] DATASEGM(0), /* kernel data/stack */ 55 [KESEG] EXECSEGM(0), /* kernel code */ 56 [UDSEG] DATASEGM(3), /* user data/stack */ 57 [UESEG] EXECSEGM(3), /* user code */ 58 [TSSSEG] TSSSEGM(0,0), /* tss segment */ 59 [KESEG16] EXEC16SEGM(0), /* kernel code 16-bit */ 60 }; 61 62 static int didmmuinit; 63 static void taskswitch(ulong, ulong); 64 static void memglobal(void); 65 66 #define vpt ((ulong*)VPT) 67 #define VPTX(va) (((ulong)(va))>>12) 68 #define vpd (vpt+VPTX(VPT)) 69 70 void 71 mmuinit0(void) 72 { 73 memmove(m->gdt, gdt, sizeof gdt); 74 } 75 76 void 77 mmuinit(void) 78 { 79 ulong x, *p; 80 ushort ptr[3]; 81 82 didmmuinit = 1; 83 84 if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n", 85 VPT, vpd, KMAP); 86 87 memglobal(); 88 m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID; 89 90 m->tss = malloc(sizeof(Tss)); 91 memset(m->tss, 0, sizeof(Tss)); 92 m->tss->iomap = 0xDFFF<<16; 93 94 /* 95 * We used to keep the GDT in the Mach structure, but it 96 * turns out that that slows down access to the rest of the 97 * page. Since the Mach structure is accessed quite often, 98 * it pays off anywhere from a factor of 1.25 to 2 on real 99 * hardware to separate them (the AMDs are more sensitive 100 * than Intels in this regard). Under VMware it pays off 101 * a factor of about 10 to 100. 102 */ 103 memmove(m->gdt, gdt, sizeof gdt); 104 x = (ulong)m->tss; 105 m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss); 106 m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP; 107 108 ptr[0] = sizeof(gdt)-1; 109 x = (ulong)m->gdt; 110 ptr[1] = x & 0xFFFF; 111 ptr[2] = (x>>16) & 0xFFFF; 112 lgdt(ptr); 113 114 ptr[0] = sizeof(Segdesc)*256-1; 115 x = IDTADDR; 116 ptr[1] = x & 0xFFFF; 117 ptr[2] = (x>>16) & 0xFFFF; 118 lidt(ptr); 119 120 /* make kernel text unwritable */ 121 for(x = KTZERO; x < (ulong)etext; x += BY2PG){ 122 p = mmuwalk(m->pdb, x, 2, 0); 123 if(p == nil) 124 panic("mmuinit"); 125 *p &= ~PTEWRITE; 126 } 127 128 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); 129 ltr(TSSSEL); 130 } 131 132 /* 133 * On processors that support it, we set the PTEGLOBAL bit in 134 * page table and page directory entries that map kernel memory. 135 * Doing this tells the processor not to bother flushing them 136 * from the TLB when doing the TLB flush associated with a 137 * context switch (write to CR3). Since kernel memory mappings 138 * are never removed, this is safe. (If we ever remove kernel memory 139 * mappings, we can do a full flush by turning off the PGE bit in CR4, 140 * writing to CR3, and then turning the PGE bit back on.) 141 * 142 * See also mmukmap below. 143 * 144 * Processor support for the PTEGLOBAL bit is enabled in devarch.c. 145 */ 146 static void 147 memglobal(void) 148 { 149 int i, j; 150 ulong *pde, *pte; 151 152 /* only need to do this once, on bootstrap processor */ 153 if(m->machno != 0) 154 return; 155 156 if(!m->havepge) 157 return; 158 159 pde = m->pdb; 160 for(i=PDX(KZERO); i<1024; i++){ 161 if(pde[i] & PTEVALID){ 162 pde[i] |= PTEGLOBAL; 163 if(!(pde[i] & PTESIZE)){ 164 pte = KADDR(pde[i]&~(BY2PG-1)); 165 for(j=0; j<1024; j++) 166 if(pte[j] & PTEVALID) 167 pte[j] |= PTEGLOBAL; 168 } 169 } 170 } 171 } 172 173 /* 174 * Flush all the user-space and device-mapping mmu info 175 * for this process, because something has been deleted. 176 * It will be paged back in on demand. 177 */ 178 void 179 flushmmu(void) 180 { 181 int s; 182 183 s = splhi(); 184 up->newtlb = 1; 185 mmuswitch(up); 186 splx(s); 187 } 188 189 /* 190 * Flush a single page mapping from the tlb. 191 */ 192 void 193 flushpg(ulong va) 194 { 195 if(X86FAMILY(m->cpuidax) >= 4) 196 invlpg(va); 197 else 198 putcr3(getcr3()); 199 } 200 201 /* 202 * Allocate a new page for a page directory. 203 * We keep a small cache of pre-initialized 204 * page directories in each mach. 205 */ 206 static Page* 207 mmupdballoc(void) 208 { 209 int s; 210 Page *page; 211 ulong *pdb; 212 213 s = splhi(); 214 m->pdballoc++; 215 if(m->pdbpool == 0){ 216 spllo(); 217 page = newpage(0, 0, 0); 218 page->va = (ulong)vpd; 219 splhi(); 220 pdb = tmpmap(page); 221 memmove(pdb, m->pdb, BY2PG); 222 pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID; /* set up VPT */ 223 tmpunmap(pdb); 224 }else{ 225 page = m->pdbpool; 226 m->pdbpool = page->next; 227 m->pdbcnt--; 228 } 229 splx(s); 230 return page; 231 } 232 233 static void 234 mmupdbfree(Proc *proc, Page *p) 235 { 236 if(islo()) 237 panic("mmupdbfree: islo"); 238 m->pdbfree++; 239 if(m->pdbcnt >= 10){ 240 p->next = proc->mmufree; 241 proc->mmufree = p; 242 }else{ 243 p->next = m->pdbpool; 244 m->pdbpool = p; 245 m->pdbcnt++; 246 } 247 } 248 249 /* 250 * A user-space memory segment has been deleted, or the 251 * process is exiting. Clear all the pde entries for user-space 252 * memory mappings and device mappings. Any entries that 253 * are needed will be paged back in as necessary. 254 */ 255 static void 256 mmuptefree(Proc* proc) 257 { 258 int s; 259 ulong *pdb; 260 Page **last, *page; 261 262 if(proc->mmupdb == nil || proc->mmuused == nil) 263 return; 264 s = splhi(); 265 pdb = tmpmap(proc->mmupdb); 266 last = &proc->mmuused; 267 for(page = *last; page; page = page->next){ 268 pdb[page->daddr] = 0; 269 last = &page->next; 270 } 271 tmpunmap(pdb); 272 splx(s); 273 *last = proc->mmufree; 274 proc->mmufree = proc->mmuused; 275 proc->mmuused = 0; 276 } 277 278 static void 279 taskswitch(ulong pdb, ulong stack) 280 { 281 Tss *tss; 282 283 tss = m->tss; 284 tss->ss0 = KDSEL; 285 tss->esp0 = stack; 286 tss->ss1 = KDSEL; 287 tss->esp1 = stack; 288 tss->ss2 = KDSEL; 289 tss->esp2 = stack; 290 putcr3(pdb); 291 } 292 293 void 294 mmuswitch(Proc* proc) 295 { 296 ulong *pdb; 297 298 if(proc->newtlb){ 299 mmuptefree(proc); 300 proc->newtlb = 0; 301 } 302 303 if(proc->mmupdb){ 304 pdb = tmpmap(proc->mmupdb); 305 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; 306 tmpunmap(pdb); 307 taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK)); 308 }else 309 taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK)); 310 } 311 312 /* 313 * Release any pages allocated for a page directory base or page-tables 314 * for this process: 315 * switch to the prototype pdb for this processor (m->pdb); 316 * call mmuptefree() to place all pages used for page-tables (proc->mmuused) 317 * onto the process' free list (proc->mmufree). This has the side-effect of 318 * cleaning any user entries in the pdb (proc->mmupdb); 319 * if there's a pdb put it in the cache of pre-initialised pdb's 320 * for this processor (m->pdbpool) or on the process' free list; 321 * finally, place any pages freed back into the free pool (palloc). 322 * This routine is only called from schedinit() with palloc locked. 323 */ 324 void 325 mmurelease(Proc* proc) 326 { 327 Page *page, *next; 328 ulong *pdb; 329 330 if(islo()) 331 panic("mmurelease: islo"); 332 taskswitch(PADDR(m->pdb), (ulong)m + BY2PG); 333 if(proc->kmaptable){ 334 if(proc->mmupdb == nil) 335 panic("mmurelease: no mmupdb"); 336 if(--proc->kmaptable->ref) 337 panic("mmurelease: kmap ref %d\n", proc->kmaptable->ref); 338 if(proc->nkmap) 339 panic("mmurelease: nkmap %d\n", proc->nkmap); 340 /* 341 * remove kmaptable from pdb before putting pdb up for reuse. 342 */ 343 pdb = tmpmap(proc->mmupdb); 344 if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa) 345 panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux", 346 pdb[PDX(KMAP)], proc->kmaptable->pa); 347 pdb[PDX(KMAP)] = 0; 348 tmpunmap(pdb); 349 /* 350 * move kmaptable to free list. 351 */ 352 pagechainhead(proc->kmaptable); 353 proc->kmaptable = 0; 354 } 355 if(proc->mmupdb){ 356 mmuptefree(proc); 357 mmupdbfree(proc, proc->mmupdb); 358 proc->mmupdb = 0; 359 } 360 for(page = proc->mmufree; page; page = next){ 361 next = page->next; 362 if(--page->ref) 363 panic("mmurelease: page->ref %d\n", page->ref); 364 pagechainhead(page); 365 } 366 if(proc->mmufree && palloc.r.p) 367 wakeup(&palloc.r); 368 proc->mmufree = 0; 369 } 370 371 /* 372 * Allocate and install pdb for the current process. 373 */ 374 static void 375 upallocpdb(void) 376 { 377 int s; 378 ulong *pdb; 379 Page *page; 380 381 if(up->mmupdb != nil) 382 return; 383 page = mmupdballoc(); 384 s = splhi(); 385 if(up->mmupdb != nil){ 386 /* 387 * Perhaps we got an interrupt while 388 * mmupdballoc was sleeping and that 389 * interrupt allocated an mmupdb? 390 * Seems unlikely. 391 */ 392 mmupdbfree(up, page); 393 splx(s); 394 return; 395 } 396 pdb = tmpmap(page); 397 pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)]; 398 tmpunmap(pdb); 399 up->mmupdb = page; 400 putcr3(up->mmupdb->pa); 401 splx(s); 402 } 403 404 /* 405 * Update the mmu in response to a user fault. pa may have PTEWRITE set. 406 */ 407 void 408 putmmu(ulong va, ulong pa, Page*) 409 { 410 int old, s; 411 Page *page; 412 413 if(up->mmupdb == nil) 414 upallocpdb(); 415 416 /* 417 * We should be able to get through this with interrupts 418 * turned on (if we get interrupted we'll just pick up 419 * where we left off) but we get many faults accessing 420 * vpt[] near the end of this function, and they always happen 421 * after the process has been switched out and then 422 * switched back, usually many times in a row (perhaps 423 * it cannot switch back successfully for some reason). 424 * 425 * In any event, I'm tired of searching for this bug. 426 * Turn off interrupts during putmmu even though 427 * we shouldn't need to. - rsc 428 */ 429 430 s = splhi(); 431 if(!(vpd[PDX(va)]&PTEVALID)){ 432 if(up->mmufree == 0){ 433 spllo(); 434 page = newpage(0, 0, 0); 435 splhi(); 436 } 437 else{ 438 page = up->mmufree; 439 up->mmufree = page->next; 440 } 441 vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID; 442 /* page is now mapped into the VPT - clear it */ 443 memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG); 444 page->daddr = PDX(va); 445 page->next = up->mmuused; 446 up->mmuused = page; 447 } 448 old = vpt[VPTX(va)]; 449 vpt[VPTX(va)] = pa|PTEUSER|PTEVALID; 450 if(old&PTEVALID) 451 flushpg(va); 452 if(getcr3() != up->mmupdb->pa) 453 print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa); 454 splx(s); 455 } 456 457 /* 458 * Double-check the user MMU. 459 * Error checking only. 460 */ 461 void 462 checkmmu(ulong va, ulong pa) 463 { 464 if(up->mmupdb == 0) 465 return; 466 if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID)) 467 return; 468 if(PPN(vpt[VPTX(va)]) != pa) 469 print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n", 470 up->pid, up->text, 471 va, pa, vpt[VPTX(va)]); 472 } 473 474 /* 475 * Walk the page-table pointed to by pdb and return a pointer 476 * to the entry for virtual address va at the requested level. 477 * If the entry is invalid and create isn't requested then bail 478 * out early. Otherwise, for the 2nd level walk, allocate a new 479 * page-table page and register it in the 1st level. This is used 480 * only to edit kernel mappings, which use pages from kernel memory, 481 * so it's okay to use KADDR to look at the tables. 482 */ 483 ulong* 484 mmuwalk(ulong* pdb, ulong va, int level, int create) 485 { 486 ulong *table; 487 void *map; 488 489 table = &pdb[PDX(va)]; 490 if(!(*table & PTEVALID) && create == 0) 491 return 0; 492 493 switch(level){ 494 495 default: 496 return 0; 497 498 case 1: 499 return table; 500 501 case 2: 502 if(*table & PTESIZE) 503 panic("mmuwalk2: va %luX entry %luX\n", va, *table); 504 if(!(*table & PTEVALID)){ 505 /* 506 * Have to call low-level allocator from 507 * memory.c if we haven't set up the xalloc 508 * tables yet. 509 */ 510 if(didmmuinit) 511 map = xspanalloc(BY2PG, BY2PG, 0); 512 else 513 map = rampage(); 514 if(map == nil) 515 panic("mmuwalk xspanalloc failed"); 516 *table = PADDR(map)|PTEWRITE|PTEVALID; 517 } 518 table = KADDR(PPN(*table)); 519 return &table[PTX(va)]; 520 } 521 } 522 523 /* 524 * Device mappings are shared by all procs and processors and 525 * live in the virtual range VMAP to VMAP+VMAPSIZE. The master 526 * copy of the mappings is stored in mach0->pdb, and they are 527 * paged in from there as necessary by vmapsync during faults. 528 */ 529 530 static Lock vmaplock; 531 532 static int findhole(ulong *a, int n, int count); 533 static ulong vmapalloc(ulong size); 534 static void pdbunmap(ulong*, ulong, int); 535 536 /* 537 * Add a device mapping to the vmap range. 538 */ 539 void* 540 vmap(ulong pa, int size) 541 { 542 int osize; 543 ulong o, va; 544 545 /* 546 * might be asking for less than a page. 547 */ 548 osize = size; 549 o = pa & (BY2PG-1); 550 pa -= o; 551 size += o; 552 553 size = ROUND(size, BY2PG); 554 if(pa == 0){ 555 print("vmap pa=0 pc=%#p\n", getcallerpc(&pa)); 556 return nil; 557 } 558 ilock(&vmaplock); 559 if((va = vmapalloc(size)) == 0 560 || pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){ 561 iunlock(&vmaplock); 562 return 0; 563 } 564 iunlock(&vmaplock); 565 /* avoid trap on local processor 566 for(i=0; i<size; i+=4*MB) 567 vmapsync(va+i); 568 */ 569 USED(osize); 570 // print(" vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o); 571 return (void*)(va + o); 572 } 573 574 static int 575 findhole(ulong *a, int n, int count) 576 { 577 int have, i; 578 579 have = 0; 580 for(i=0; i<n; i++){ 581 if(a[i] == 0) 582 have++; 583 else 584 have = 0; 585 if(have >= count) 586 return i+1 - have; 587 } 588 return -1; 589 } 590 591 /* 592 * Look for free space in the vmap. 593 */ 594 static ulong 595 vmapalloc(ulong size) 596 { 597 int i, n, o; 598 ulong *vpdb; 599 int vpdbsize; 600 601 vpdb = &MACHP(0)->pdb[PDX(VMAP)]; 602 vpdbsize = VMAPSIZE/(4*MB); 603 604 if(size >= 4*MB){ 605 n = (size+4*MB-1) / (4*MB); 606 if((o = findhole(vpdb, vpdbsize, n)) != -1) 607 return VMAP + o*4*MB; 608 return 0; 609 } 610 n = (size+BY2PG-1) / BY2PG; 611 for(i=0; i<vpdbsize; i++) 612 if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE)) 613 if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1) 614 return VMAP + i*4*MB + o*BY2PG; 615 if((o = findhole(vpdb, vpdbsize, 1)) != -1) 616 return VMAP + o*4*MB; 617 618 /* 619 * could span page directory entries, but not worth the trouble. 620 * not going to be very much contention. 621 */ 622 return 0; 623 } 624 625 /* 626 * Remove a device mapping from the vmap range. 627 * Since pdbunmap does not remove page tables, just entries, 628 * the call need not be interlocked with vmap. 629 */ 630 void 631 vunmap(void *v, int size) 632 { 633 int i; 634 ulong va, o; 635 Mach *nm; 636 Proc *p; 637 638 /* 639 * might not be aligned 640 */ 641 va = (ulong)v; 642 o = va&(BY2PG-1); 643 va -= o; 644 size += o; 645 size = ROUND(size, BY2PG); 646 647 if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE) 648 panic("vunmap va=%#.8lux size=%#x pc=%#.8lux\n", 649 va, size, getcallerpc(&va)); 650 651 pdbunmap(MACHP(0)->pdb, va, size); 652 653 /* 654 * Flush mapping from all the tlbs and copied pdbs. 655 * This can be (and is) slow, since it is called only rarely. 656 * It is possible for vunmap to be called with up == nil, 657 * e.g. from the reset/init driver routines during system 658 * boot. In that case it suffices to flush the MACH(0) TLB 659 * and return. 660 */ 661 if(!active.thunderbirdsarego){ 662 putcr3(PADDR(MACHP(0)->pdb)); 663 return; 664 } 665 for(i=0; i<conf.nproc; i++){ 666 p = proctab(i); 667 if(p->state == Dead) 668 continue; 669 if(p != up) 670 p->newtlb = 1; 671 } 672 for(i=0; i<conf.nmach; i++){ 673 nm = MACHP(i); 674 if(nm != m) 675 nm->flushmmu = 1; 676 } 677 flushmmu(); 678 for(i=0; i<conf.nmach; i++){ 679 nm = MACHP(i); 680 if(nm != m) 681 while((active.machs&(1<<nm->machno)) && nm->flushmmu) 682 ; 683 } 684 } 685 686 /* 687 * Add kernel mappings for pa -> va for a section of size bytes. 688 */ 689 int 690 pdbmap(ulong *pdb, ulong pa, ulong va, int size) 691 { 692 int pse; 693 ulong pgsz, *pte, *table; 694 ulong flag, off; 695 696 flag = pa&0xFFF; 697 pa &= ~0xFFF; 698 699 if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10)) 700 pse = 1; 701 else 702 pse = 0; 703 704 for(off=0; off<size; off+=pgsz){ 705 table = &pdb[PDX(va+off)]; 706 if((*table&PTEVALID) && (*table&PTESIZE)) 707 panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux", 708 va+off, pa+off, *table); 709 710 /* 711 * Check if it can be mapped using a 4MB page: 712 * va, pa aligned and size >= 4MB and processor can do it. 713 */ 714 if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){ 715 *table = (pa+off)|flag|PTESIZE|PTEVALID; 716 pgsz = 4*MB; 717 }else{ 718 pte = mmuwalk(pdb, va+off, 2, 1); 719 if(*pte&PTEVALID) 720 panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux", 721 va+off, pa+off, *pte); 722 *pte = (pa+off)|flag|PTEVALID; 723 pgsz = BY2PG; 724 } 725 } 726 return 0; 727 } 728 729 /* 730 * Remove mappings. Must already exist, for sanity. 731 * Only used for kernel mappings, so okay to use KADDR. 732 */ 733 static void 734 pdbunmap(ulong *pdb, ulong va, int size) 735 { 736 ulong vae; 737 ulong *table; 738 739 vae = va+size; 740 while(va < vae){ 741 table = &pdb[PDX(va)]; 742 if(!(*table & PTEVALID)){ 743 panic("vunmap: not mapped"); 744 /* 745 va = (va+4*MB-1) & ~(4*MB-1); 746 continue; 747 */ 748 } 749 if(*table & PTESIZE){ 750 *table = 0; 751 va = (va+4*MB-1) & ~(4*MB-1); 752 continue; 753 } 754 table = KADDR(PPN(*table)); 755 if(!(table[PTX(va)] & PTEVALID)) 756 panic("vunmap: not mapped"); 757 table[PTX(va)] = 0; 758 va += BY2PG; 759 } 760 } 761 762 /* 763 * Handle a fault by bringing vmap up to date. 764 * Only copy pdb entries and they never go away, 765 * so no locking needed. 766 */ 767 int 768 vmapsync(ulong va) 769 { 770 ulong entry, *table; 771 772 if(va < VMAP || va >= VMAP+VMAPSIZE) 773 return 0; 774 775 entry = MACHP(0)->pdb[PDX(va)]; 776 if(!(entry&PTEVALID)) 777 return 0; 778 if(!(entry&PTESIZE)){ 779 /* make sure entry will help the fault */ 780 table = KADDR(PPN(entry)); 781 if(!(table[PTX(va)]&PTEVALID)) 782 return 0; 783 } 784 vpd[PDX(va)] = entry; 785 /* 786 * TLB doesn't cache negative results, so no flush needed. 787 */ 788 return 1; 789 } 790 791 792 /* 793 * KMap is used to map individual pages into virtual memory. 794 * It is rare to have more than a few KMaps at a time (in the 795 * absence of interrupts, only two at a time are ever used, 796 * but interrupts can stack). The mappings are local to a process, 797 * so we can use the same range of virtual address space for 798 * all processes without any coordination. 799 */ 800 #define kpt (vpt+VPTX(KMAP)) 801 #define NKPT (KMAPSIZE/BY2PG) 802 803 KMap* 804 kmap(Page *page) 805 { 806 int i, o, s; 807 808 if(up == nil) 809 panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page)); 810 if(up->mmupdb == nil) 811 upallocpdb(); 812 if(up->nkmap < 0) 813 panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap); 814 815 /* 816 * Splhi shouldn't be necessary here, but paranoia reigns. 817 * See comment in putmmu above. 818 */ 819 s = splhi(); 820 up->nkmap++; 821 if(!(vpd[PDX(KMAP)]&PTEVALID)){ 822 /* allocate page directory */ 823 if(KMAPSIZE > BY2XPG) 824 panic("bad kmapsize"); 825 if(up->kmaptable != nil) 826 panic("kmaptable"); 827 spllo(); 828 up->kmaptable = newpage(0, 0, 0); 829 splhi(); 830 vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID; 831 flushpg((ulong)kpt); 832 memset(kpt, 0, BY2PG); 833 kpt[0] = page->pa|PTEWRITE|PTEVALID; 834 up->lastkmap = 0; 835 splx(s); 836 return (KMap*)KMAP; 837 } 838 if(up->kmaptable == nil) 839 panic("no kmaptable"); 840 o = up->lastkmap+1; 841 for(i=0; i<NKPT; i++){ 842 if(kpt[(i+o)%NKPT] == 0){ 843 o = (i+o)%NKPT; 844 kpt[o] = page->pa|PTEWRITE|PTEVALID; 845 up->lastkmap = o; 846 splx(s); 847 return (KMap*)(KMAP+o*BY2PG); 848 } 849 } 850 panic("out of kmap"); 851 return nil; 852 } 853 854 void 855 kunmap(KMap *k) 856 { 857 ulong va; 858 859 va = (ulong)k; 860 if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID)) 861 panic("kunmap: no kmaps"); 862 if(va < KMAP || va >= KMAP+KMAPSIZE) 863 panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k)); 864 if(!(vpt[VPTX(va)]&PTEVALID)) 865 panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k)); 866 up->nkmap--; 867 if(up->nkmap < 0) 868 panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap); 869 vpt[VPTX(va)] = 0; 870 flushpg(va); 871 } 872 873 /* 874 * Temporary one-page mapping used to edit page directories. 875 * 876 * The fasttmp #define controls whether the code optimizes 877 * the case where the page is already mapped in the physical 878 * memory window. 879 */ 880 #define fasttmp 1 881 882 void* 883 tmpmap(Page *p) 884 { 885 ulong i; 886 ulong *entry; 887 888 if(islo()) 889 panic("tmpaddr: islo"); 890 891 if(fasttmp && p->pa < -KZERO) 892 return KADDR(p->pa); 893 894 /* 895 * PDX(TMPADDR) == PDX(MACHADDR), so this 896 * entry is private to the processor and shared 897 * between up->mmupdb (if any) and m->pdb. 898 */ 899 entry = &vpt[VPTX(TMPADDR)]; 900 if(!(*entry&PTEVALID)){ 901 for(i=KZERO; i<=CPU0MACH; i+=BY2PG) 902 print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i)); 903 panic("tmpmap: no entry"); 904 } 905 if(PPN(*entry) != PPN(TMPADDR-KZERO)) 906 panic("tmpmap: already mapped entry=%#.8lux", *entry); 907 *entry = p->pa|PTEWRITE|PTEVALID; 908 flushpg(TMPADDR); 909 return (void*)TMPADDR; 910 } 911 912 void 913 tmpunmap(void *v) 914 { 915 ulong *entry; 916 917 if(islo()) 918 panic("tmpaddr: islo"); 919 if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR) 920 return; 921 if(v != (void*)TMPADDR) 922 panic("tmpunmap: bad address"); 923 entry = &vpt[VPTX(TMPADDR)]; 924 if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR))) 925 panic("tmpmap: not mapped entry=%#.8lux", *entry); 926 *entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID; 927 flushpg(TMPADDR); 928 } 929 930 /* 931 * These could go back to being macros once the kernel is debugged, 932 * but the extra checking is nice to have. 933 */ 934 void* 935 kaddr(ulong pa) 936 { 937 if(pa > (ulong)-KZERO) 938 panic("kaddr: pa=%#.8lux", pa); 939 return (void*)(pa+KZERO); 940 } 941 942 ulong 943 paddr(void *v) 944 { 945 ulong va; 946 947 va = (ulong)v; 948 if(va < KZERO) 949 panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v)); 950 return va-KZERO; 951 } 952 953 /* 954 * More debugging. 955 */ 956 void 957 countpagerefs(ulong *ref, int print) 958 { 959 int i, n; 960 Mach *mm; 961 Page *pg; 962 Proc *p; 963 964 n = 0; 965 for(i=0; i<conf.nproc; i++){ 966 p = proctab(i); 967 if(p->mmupdb){ 968 if(print){ 969 if(ref[pagenumber(p->mmupdb)]) 970 iprint("page %#.8lux is proc %d (pid %lud) pdb\n", 971 p->mmupdb->pa, i, p->pid); 972 continue; 973 } 974 if(ref[pagenumber(p->mmupdb)]++ == 0) 975 n++; 976 else 977 iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n", 978 p->mmupdb->pa, i, p->pid); 979 } 980 if(p->kmaptable){ 981 if(print){ 982 if(ref[pagenumber(p->kmaptable)]) 983 iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n", 984 p->kmaptable->pa, i, p->pid); 985 continue; 986 } 987 if(ref[pagenumber(p->kmaptable)]++ == 0) 988 n++; 989 else 990 iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n", 991 p->kmaptable->pa, i, p->pid); 992 } 993 for(pg=p->mmuused; pg; pg=pg->next){ 994 if(print){ 995 if(ref[pagenumber(pg)]) 996 iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n", 997 pg->pa, i, p->pid); 998 continue; 999 } 1000 if(ref[pagenumber(pg)]++ == 0) 1001 n++; 1002 else 1003 iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n", 1004 pg->pa, i, p->pid); 1005 } 1006 for(pg=p->mmufree; pg; pg=pg->next){ 1007 if(print){ 1008 if(ref[pagenumber(pg)]) 1009 iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n", 1010 pg->pa, i, p->pid); 1011 continue; 1012 } 1013 if(ref[pagenumber(pg)]++ == 0) 1014 n++; 1015 else 1016 iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n", 1017 pg->pa, i, p->pid); 1018 } 1019 } 1020 if(!print) 1021 iprint("%d pages in proc mmu\n", n); 1022 n = 0; 1023 for(i=0; i<conf.nmach; i++){ 1024 mm = MACHP(i); 1025 for(pg=mm->pdbpool; pg; pg=pg->next){ 1026 if(print){ 1027 if(ref[pagenumber(pg)]) 1028 iprint("page %#.8lux is in cpu%d pdbpool\n", 1029 pg->pa, i); 1030 continue; 1031 } 1032 if(ref[pagenumber(pg)]++ == 0) 1033 n++; 1034 else 1035 iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n", 1036 pg->pa, i); 1037 } 1038 } 1039 if(!print){ 1040 iprint("%d pages in mach pdbpools\n", n); 1041 for(i=0; i<conf.nmach; i++) 1042 iprint("cpu%d: %d pdballoc, %d pdbfree\n", 1043 i, MACHP(i)->pdballoc, MACHP(i)->pdbfree); 1044 } 1045 } 1046 1047 void 1048 checkfault(ulong, ulong) 1049 { 1050 } 1051 1052 /* 1053 * Return the number of bytes that can be accessed via KADDR(pa). 1054 * If pa is not a valid argument to KADDR, return 0. 1055 */ 1056 ulong 1057 cankaddr(ulong pa) 1058 { 1059 if(pa >= -KZERO) 1060 return 0; 1061 return -KZERO - pa; 1062 } 1063 1064