xref: /plan9/sys/src/9/pc/mmu.c (revision 61fd6f66d32d636e7478acc4015442e0633f537d)
1 /*
2  * Memory mappings.  Life was easier when 2G of memory was enough.
3  *
4  * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5  * (9load sits under 1M during the load).  The memory from KZERO to the
6  * top of memory is mapped 1-1 with physical memory, starting at physical
7  * address 0.  All kernel memory and data structures (i.e., the entries stored
8  * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9  * then the kernel can only have 256MB of memory for itself.
10  *
11  * The 256M below KZERO comprises three parts.  The lowest 4M is the
12  * virtual page table, a virtual address representation of the current
13  * page table tree.  The second 4M is used for temporary per-process
14  * mappings managed by kmap and kunmap.  The remaining 248M is used
15  * for global (shared by all procs and all processors) device memory
16  * mappings and managed by vmap and vunmap.  The total amount (256M)
17  * could probably be reduced somewhat if desired.  The largest device
18  * mapping is that of the video card, and even though modern video cards
19  * have embarrassing amounts of memory, the video drivers only use one
20  * frame buffer worth (at most 16M).  Each is described in more detail below.
21  *
22  * The VPT is a 4M frame constructed by inserting the pdb into itself.
23  * This short-circuits one level of the page tables, with the result that
24  * the contents of second-level page tables can be accessed at VPT.
25  * We use the VPT to edit the page tables (see mmu) after inserting them
26  * into the page directory.  It is a convenient mechanism for mapping what
27  * might be otherwise-inaccessible pages.  The idea was borrowed from
28  * the Exokernel.
29  *
30  * The VPT doesn't solve all our problems, because we still need to
31  * prepare page directories before we can install them.  For that, we
32  * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33  */
34 
35 #include	"u.h"
36 #include	"../port/lib.h"
37 #include	"mem.h"
38 #include	"dat.h"
39 #include	"fns.h"
40 #include	"io.h"
41 
42 /*
43  * Simple segment descriptors with no translation.
44  */
45 #define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46 #define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47 #define	EXEC16SEGM(p) 	{ 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48 #define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\
49 			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50 
51 Segdesc gdt[NGDT] =
52 {
53 [NULLSEG]	{ 0, 0},		/* null descriptor */
54 [KDSEG]		DATASEGM(0),		/* kernel data/stack */
55 [KESEG]		EXECSEGM(0),		/* kernel code */
56 [UDSEG]		DATASEGM(3),		/* user data/stack */
57 [UESEG]		EXECSEGM(3),		/* user code */
58 [TSSSEG]	TSSSEGM(0,0),		/* tss segment */
59 [KESEG16]		EXEC16SEGM(0),	/* kernel code 16-bit */
60 };
61 
62 static int didmmuinit;
63 static void taskswitch(ulong, ulong);
64 static void memglobal(void);
65 
66 #define	vpt ((ulong*)VPT)
67 #define	VPTX(va)		(((ulong)(va))>>12)
68 #define	vpd (vpt+VPTX(VPT))
69 
70 void
mmuinit0(void)71 mmuinit0(void)
72 {
73 	memmove(m->gdt, gdt, sizeof gdt);
74 }
75 
76 void
mmuinit(void)77 mmuinit(void)
78 {
79 	ulong x, *p;
80 	ushort ptr[3];
81 
82 	didmmuinit = 1;
83 
84 	if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
85 		VPT, vpd, KMAP);
86 
87 	memglobal();
88 	m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
89 
90 	m->tss = malloc(sizeof(Tss));
91 	if(m->tss == nil)
92 		panic("mmuinit: no memory");
93 	memset(m->tss, 0, sizeof(Tss));
94 	m->tss->iomap = 0xDFFF<<16;
95 
96 	/*
97 	 * We used to keep the GDT in the Mach structure, but it
98 	 * turns out that that slows down access to the rest of the
99 	 * page.  Since the Mach structure is accessed quite often,
100 	 * it pays off anywhere from a factor of 1.25 to 2 on real
101 	 * hardware to separate them (the AMDs are more sensitive
102 	 * than Intels in this regard).  Under VMware it pays off
103 	 * a factor of about 10 to 100.
104 	 */
105 	memmove(m->gdt, gdt, sizeof gdt);
106 	x = (ulong)m->tss;
107 	m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
108 	m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
109 
110 	ptr[0] = sizeof(gdt)-1;
111 	x = (ulong)m->gdt;
112 	ptr[1] = x & 0xFFFF;
113 	ptr[2] = (x>>16) & 0xFFFF;
114 	lgdt(ptr);
115 
116 	ptr[0] = sizeof(Segdesc)*256-1;
117 	x = IDTADDR;
118 	ptr[1] = x & 0xFFFF;
119 	ptr[2] = (x>>16) & 0xFFFF;
120 	lidt(ptr);
121 
122 	/* make kernel text unwritable */
123 	for(x = KTZERO; x < (ulong)etext; x += BY2PG){
124 		p = mmuwalk(m->pdb, x, 2, 0);
125 		if(p == nil)
126 			panic("mmuinit");
127 		*p &= ~PTEWRITE;
128 	}
129 
130 	taskswitch(PADDR(m->pdb),  (ulong)m + BY2PG);
131 	ltr(TSSSEL);
132 }
133 
134 /*
135  * On processors that support it, we set the PTEGLOBAL bit in
136  * page table and page directory entries that map kernel memory.
137  * Doing this tells the processor not to bother flushing them
138  * from the TLB when doing the TLB flush associated with a
139  * context switch (write to CR3).  Since kernel memory mappings
140  * are never removed, this is safe.  (If we ever remove kernel memory
141  * mappings, we can do a full flush by turning off the PGE bit in CR4,
142  * writing to CR3, and then turning the PGE bit back on.)
143  *
144  * See also mmukmap below.
145  *
146  * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
147  */
148 static void
memglobal(void)149 memglobal(void)
150 {
151 	int i, j;
152 	ulong *pde, *pte;
153 
154 	/* only need to do this once, on bootstrap processor */
155 	if(m->machno != 0)
156 		return;
157 
158 	if(!m->havepge)
159 		return;
160 
161 	pde = m->pdb;
162 	for(i=PDX(KZERO); i<1024; i++){
163 		if(pde[i] & PTEVALID){
164 			pde[i] |= PTEGLOBAL;
165 			if(!(pde[i] & PTESIZE)){
166 				pte = KADDR(pde[i]&~(BY2PG-1));
167 				for(j=0; j<1024; j++)
168 					if(pte[j] & PTEVALID)
169 						pte[j] |= PTEGLOBAL;
170 			}
171 		}
172 	}
173 }
174 
175 /*
176  * Flush all the user-space and device-mapping mmu info
177  * for this process, because something has been deleted.
178  * It will be paged back in on demand.
179  */
180 void
flushmmu(void)181 flushmmu(void)
182 {
183 	int s;
184 
185 	s = splhi();
186 	up->newtlb = 1;
187 	mmuswitch(up);
188 	splx(s);
189 }
190 
191 /*
192  * Flush a single page mapping from the tlb.
193  */
194 void
flushpg(ulong va)195 flushpg(ulong va)
196 {
197 	if(X86FAMILY(m->cpuidax) >= 4)
198 		invlpg(va);
199 	else
200 		putcr3(getcr3());
201 }
202 
203 /*
204  * Allocate a new page for a page directory.
205  * We keep a small cache of pre-initialized
206  * page directories in each mach.
207  */
208 static Page*
mmupdballoc(void)209 mmupdballoc(void)
210 {
211 	int s;
212 	Page *page;
213 	ulong *pdb;
214 
215 	s = splhi();
216 	m->pdballoc++;
217 	if(m->pdbpool == 0){
218 		spllo();
219 		page = newpage(0, 0, 0);
220 		page->va = (ulong)vpd;
221 		splhi();
222 		pdb = tmpmap(page);
223 		memmove(pdb, m->pdb, BY2PG);
224 		pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID;	/* set up VPT */
225 		tmpunmap(pdb);
226 	}else{
227 		page = m->pdbpool;
228 		m->pdbpool = page->next;
229 		m->pdbcnt--;
230 	}
231 	splx(s);
232 	return page;
233 }
234 
235 static void
mmupdbfree(Proc * proc,Page * p)236 mmupdbfree(Proc *proc, Page *p)
237 {
238 	if(islo())
239 		panic("mmupdbfree: islo");
240 	m->pdbfree++;
241 	if(m->pdbcnt >= 10){
242 		p->next = proc->mmufree;
243 		proc->mmufree = p;
244 	}else{
245 		p->next = m->pdbpool;
246 		m->pdbpool = p;
247 		m->pdbcnt++;
248 	}
249 }
250 
251 /*
252  * A user-space memory segment has been deleted, or the
253  * process is exiting.  Clear all the pde entries for user-space
254  * memory mappings and device mappings.  Any entries that
255  * are needed will be paged back in as necessary.
256  */
257 static void
mmuptefree(Proc * proc)258 mmuptefree(Proc* proc)
259 {
260 	int s;
261 	ulong *pdb;
262 	Page **last, *page;
263 
264 	if(proc->mmupdb == nil || proc->mmuused == nil)
265 		return;
266 	s = splhi();
267 	pdb = tmpmap(proc->mmupdb);
268 	last = &proc->mmuused;
269 	for(page = *last; page; page = page->next){
270 		pdb[page->daddr] = 0;
271 		last = &page->next;
272 	}
273 	tmpunmap(pdb);
274 	splx(s);
275 	*last = proc->mmufree;
276 	proc->mmufree = proc->mmuused;
277 	proc->mmuused = 0;
278 }
279 
280 static void
taskswitch(ulong pdb,ulong stack)281 taskswitch(ulong pdb, ulong stack)
282 {
283 	Tss *tss;
284 
285 	tss = m->tss;
286 	tss->ss0 = KDSEL;
287 	tss->esp0 = stack;
288 	tss->ss1 = KDSEL;
289 	tss->esp1 = stack;
290 	tss->ss2 = KDSEL;
291 	tss->esp2 = stack;
292 	putcr3(pdb);
293 }
294 
295 void
mmuswitch(Proc * proc)296 mmuswitch(Proc* proc)
297 {
298 	ulong *pdb;
299 
300 	if(proc->newtlb){
301 		mmuptefree(proc);
302 		proc->newtlb = 0;
303 	}
304 
305 	if(proc->mmupdb){
306 		pdb = tmpmap(proc->mmupdb);
307 		pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
308 		tmpunmap(pdb);
309 		taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
310 	}else
311 		taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
312 }
313 
314 /*
315  * Release any pages allocated for a page directory base or page-tables
316  * for this process:
317  *   switch to the prototype pdb for this processor (m->pdb);
318  *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)
319  *   onto the process' free list (proc->mmufree). This has the side-effect of
320  *   cleaning any user entries in the pdb (proc->mmupdb);
321  *   if there's a pdb put it in the cache of pre-initialised pdb's
322  *   for this processor (m->pdbpool) or on the process' free list;
323  *   finally, place any pages freed back into the free pool (palloc).
324  * This routine is only called from schedinit() with palloc locked.
325  */
326 void
mmurelease(Proc * proc)327 mmurelease(Proc* proc)
328 {
329 	Page *page, *next;
330 	ulong *pdb;
331 
332 	if(islo())
333 		panic("mmurelease: islo");
334 	taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
335 	if(proc->kmaptable){
336 		if(proc->mmupdb == nil)
337 			panic("mmurelease: no mmupdb");
338 		if(--proc->kmaptable->ref)
339 			panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
340 		if(proc->nkmap)
341 			panic("mmurelease: nkmap %d", proc->nkmap);
342 		/*
343 		 * remove kmaptable from pdb before putting pdb up for reuse.
344 		 */
345 		pdb = tmpmap(proc->mmupdb);
346 		if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
347 			panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
348 				pdb[PDX(KMAP)], proc->kmaptable->pa);
349 		pdb[PDX(KMAP)] = 0;
350 		tmpunmap(pdb);
351 		/*
352 		 * move kmaptable to free list.
353 		 */
354 		pagechainhead(proc->kmaptable);
355 		proc->kmaptable = 0;
356 	}
357 	if(proc->mmupdb){
358 		mmuptefree(proc);
359 		mmupdbfree(proc, proc->mmupdb);
360 		proc->mmupdb = 0;
361 	}
362 	for(page = proc->mmufree; page; page = next){
363 		next = page->next;
364 		if(--page->ref)
365 			panic("mmurelease: page->ref %d", page->ref);
366 		pagechainhead(page);
367 	}
368 	if(proc->mmufree && palloc.r.p)
369 		wakeup(&palloc.r);
370 	proc->mmufree = 0;
371 }
372 
373 /*
374  * Allocate and install pdb for the current process.
375  */
376 static void
upallocpdb(void)377 upallocpdb(void)
378 {
379 	int s;
380 	ulong *pdb;
381 	Page *page;
382 
383 	if(up->mmupdb != nil)
384 		return;
385 	page = mmupdballoc();
386 	s = splhi();
387 	if(up->mmupdb != nil){
388 		/*
389 		 * Perhaps we got an interrupt while
390 		 * mmupdballoc was sleeping and that
391 		 * interrupt allocated an mmupdb?
392 		 * Seems unlikely.
393 		 */
394 		mmupdbfree(up, page);
395 		splx(s);
396 		return;
397 	}
398 	pdb = tmpmap(page);
399 	pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
400 	tmpunmap(pdb);
401 	up->mmupdb = page;
402 	putcr3(up->mmupdb->pa);
403 	splx(s);
404 }
405 
406 /*
407  * Update the mmu in response to a user fault.  pa may have PTEWRITE set.
408  */
409 void
putmmu(ulong va,ulong pa,Page *)410 putmmu(ulong va, ulong pa, Page*)
411 {
412 	int old, s;
413 	Page *page;
414 
415 	if(up->mmupdb == nil)
416 		upallocpdb();
417 
418 	/*
419 	 * We should be able to get through this with interrupts
420 	 * turned on (if we get interrupted we'll just pick up
421 	 * where we left off) but we get many faults accessing
422 	 * vpt[] near the end of this function, and they always happen
423 	 * after the process has been switched out and then
424 	 * switched back, usually many times in a row (perhaps
425 	 * it cannot switch back successfully for some reason).
426 	 *
427 	 * In any event, I'm tired of searching for this bug.
428 	 * Turn off interrupts during putmmu even though
429 	 * we shouldn't need to.		- rsc
430 	 */
431 
432 	s = splhi();
433 	if(!(vpd[PDX(va)]&PTEVALID)){
434 		if(up->mmufree == 0){
435 			spllo();
436 			page = newpage(0, 0, 0);
437 			splhi();
438 		}
439 		else{
440 			page = up->mmufree;
441 			up->mmufree = page->next;
442 		}
443 		vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
444 		/* page is now mapped into the VPT - clear it */
445 		memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
446 		page->daddr = PDX(va);
447 		page->next = up->mmuused;
448 		up->mmuused = page;
449 	}
450 	old = vpt[VPTX(va)];
451 	vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
452 	if(old&PTEVALID)
453 		flushpg(va);
454 	if(getcr3() != up->mmupdb->pa)
455 		print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
456 	splx(s);
457 }
458 
459 /*
460  * Double-check the user MMU.
461  * Error checking only.
462  */
463 void
checkmmu(ulong va,ulong pa)464 checkmmu(ulong va, ulong pa)
465 {
466 	if(up->mmupdb == 0)
467 		return;
468 	if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
469 		return;
470 	if(PPN(vpt[VPTX(va)]) != pa)
471 		print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
472 			up->pid, up->text,
473 			va, pa, vpt[VPTX(va)]);
474 }
475 
476 /*
477  * Walk the page-table pointed to by pdb and return a pointer
478  * to the entry for virtual address va at the requested level.
479  * If the entry is invalid and create isn't requested then bail
480  * out early. Otherwise, for the 2nd level walk, allocate a new
481  * page-table page and register it in the 1st level.  This is used
482  * only to edit kernel mappings, which use pages from kernel memory,
483  * so it's okay to use KADDR to look at the tables.
484  */
485 ulong*
mmuwalk(ulong * pdb,ulong va,int level,int create)486 mmuwalk(ulong* pdb, ulong va, int level, int create)
487 {
488 	ulong *table;
489 	void *map;
490 
491 	table = &pdb[PDX(va)];
492 	if(!(*table & PTEVALID) && create == 0)
493 		return 0;
494 
495 	switch(level){
496 
497 	default:
498 		return 0;
499 
500 	case 1:
501 		return table;
502 
503 	case 2:
504 		if(*table & PTESIZE)
505 			panic("mmuwalk2: va %luX entry %luX", va, *table);
506 		if(!(*table & PTEVALID)){
507 			/*
508 			 * Have to call low-level allocator from
509 			 * memory.c if we haven't set up the xalloc
510 			 * tables yet.
511 			 */
512 			if(didmmuinit)
513 				map = xspanalloc(BY2PG, BY2PG, 0);
514 			else
515 				map = rampage();
516 			if(map == nil)
517 				panic("mmuwalk xspanalloc failed");
518 			*table = PADDR(map)|PTEWRITE|PTEVALID;
519 		}
520 		table = KADDR(PPN(*table));
521 		return &table[PTX(va)];
522 	}
523 }
524 
525 /*
526  * Device mappings are shared by all procs and processors and
527  * live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
528  * copy of the mappings is stored in mach0->pdb, and they are
529  * paged in from there as necessary by vmapsync during faults.
530  */
531 
532 static Lock vmaplock;
533 
534 static int findhole(ulong *a, int n, int count);
535 static ulong vmapalloc(ulong size);
536 static void pdbunmap(ulong*, ulong, int);
537 
538 /*
539  * Add a device mapping to the vmap range.
540  */
541 void*
vmap(ulong pa,int size)542 vmap(ulong pa, int size)
543 {
544 	int osize;
545 	ulong o, va;
546 
547 	/*
548 	 * might be asking for less than a page.
549 	 */
550 	osize = size;
551 	o = pa & (BY2PG-1);
552 	pa -= o;
553 	size += o;
554 
555 	size = ROUND(size, BY2PG);
556 	if(pa == 0){
557 		print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
558 		return nil;
559 	}
560 	ilock(&vmaplock);
561 	if((va = vmapalloc(size)) == 0
562 	|| pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
563 		iunlock(&vmaplock);
564 		return 0;
565 	}
566 	iunlock(&vmaplock);
567 	/* avoid trap on local processor
568 	for(i=0; i<size; i+=4*MB)
569 		vmapsync(va+i);
570 	*/
571 	USED(osize);
572 //	print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
573 	return (void*)(va + o);
574 }
575 
576 static int
findhole(ulong * a,int n,int count)577 findhole(ulong *a, int n, int count)
578 {
579 	int have, i;
580 
581 	have = 0;
582 	for(i=0; i<n; i++){
583 		if(a[i] == 0)
584 			have++;
585 		else
586 			have = 0;
587 		if(have >= count)
588 			return i+1 - have;
589 	}
590 	return -1;
591 }
592 
593 /*
594  * Look for free space in the vmap.
595  */
596 static ulong
vmapalloc(ulong size)597 vmapalloc(ulong size)
598 {
599 	int i, n, o;
600 	ulong *vpdb;
601 	int vpdbsize;
602 
603 	vpdb = &MACHP(0)->pdb[PDX(VMAP)];
604 	vpdbsize = VMAPSIZE/(4*MB);
605 
606 	if(size >= 4*MB){
607 		n = (size+4*MB-1) / (4*MB);
608 		if((o = findhole(vpdb, vpdbsize, n)) != -1)
609 			return VMAP + o*4*MB;
610 		return 0;
611 	}
612 	n = (size+BY2PG-1) / BY2PG;
613 	for(i=0; i<vpdbsize; i++)
614 		if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
615 			if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
616 				return VMAP + i*4*MB + o*BY2PG;
617 	if((o = findhole(vpdb, vpdbsize, 1)) != -1)
618 		return VMAP + o*4*MB;
619 
620 	/*
621 	 * could span page directory entries, but not worth the trouble.
622 	 * not going to be very much contention.
623 	 */
624 	return 0;
625 }
626 
627 /*
628  * Remove a device mapping from the vmap range.
629  * Since pdbunmap does not remove page tables, just entries,
630  * the call need not be interlocked with vmap.
631  */
632 void
vunmap(void * v,int size)633 vunmap(void *v, int size)
634 {
635 	int i;
636 	ulong va, o;
637 	Mach *nm;
638 	Proc *p;
639 
640 	/*
641 	 * might not be aligned
642 	 */
643 	va = (ulong)v;
644 	o = va&(BY2PG-1);
645 	va -= o;
646 	size += o;
647 	size = ROUND(size, BY2PG);
648 
649 	if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
650 		panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
651 			va, size, getcallerpc(&v));
652 
653 	pdbunmap(MACHP(0)->pdb, va, size);
654 
655 	/*
656 	 * Flush mapping from all the tlbs and copied pdbs.
657 	 * This can be (and is) slow, since it is called only rarely.
658 	 * It is possible for vunmap to be called with up == nil,
659 	 * e.g. from the reset/init driver routines during system
660 	 * boot. In that case it suffices to flush the MACH(0) TLB
661 	 * and return.
662 	 */
663 	if(!active.thunderbirdsarego){
664 		putcr3(PADDR(MACHP(0)->pdb));
665 		return;
666 	}
667 	for(i=0; i<conf.nproc; i++){
668 		p = proctab(i);
669 		if(p->state == Dead)
670 			continue;
671 		if(p != up)
672 			p->newtlb = 1;
673 	}
674 	for(i=0; i<conf.nmach; i++){
675 		nm = MACHP(i);
676 		if(nm != m)
677 			nm->flushmmu = 1;
678 	}
679 	flushmmu();
680 	for(i=0; i<conf.nmach; i++){
681 		nm = MACHP(i);
682 		if(nm != m)
683 			while((active.machs&(1<<nm->machno)) && nm->flushmmu)
684 				;
685 	}
686 }
687 
688 /*
689  * Add kernel mappings for pa -> va for a section of size bytes.
690  */
691 int
pdbmap(ulong * pdb,ulong pa,ulong va,int size)692 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
693 {
694 	int pse;
695 	ulong pgsz, *pte, *table;
696 	ulong flag, off;
697 
698 	flag = pa&0xFFF;
699 	pa &= ~0xFFF;
700 
701 	if((MACHP(0)->cpuiddx & Pse) && (getcr4() & 0x10))
702 		pse = 1;
703 	else
704 		pse = 0;
705 
706 	for(off=0; off<size; off+=pgsz){
707 		table = &pdb[PDX(va+off)];
708 		if((*table&PTEVALID) && (*table&PTESIZE))
709 			panic("vmap: va=%#.8lux pa=%#.8lux pde=%#.8lux",
710 				va+off, pa+off, *table);
711 
712 		/*
713 		 * Check if it can be mapped using a 4MB page:
714 		 * va, pa aligned and size >= 4MB and processor can do it.
715 		 */
716 		if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 && (size-off) >= 4*MB){
717 			*table = (pa+off)|flag|PTESIZE|PTEVALID;
718 			pgsz = 4*MB;
719 		}else{
720 			pte = mmuwalk(pdb, va+off, 2, 1);
721 			if(*pte&PTEVALID)
722 				panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
723 					va+off, pa+off, *pte);
724 			*pte = (pa+off)|flag|PTEVALID;
725 			pgsz = BY2PG;
726 		}
727 	}
728 	return 0;
729 }
730 
731 /*
732  * Remove mappings.  Must already exist, for sanity.
733  * Only used for kernel mappings, so okay to use KADDR.
734  */
735 static void
pdbunmap(ulong * pdb,ulong va,int size)736 pdbunmap(ulong *pdb, ulong va, int size)
737 {
738 	ulong vae;
739 	ulong *table;
740 
741 	vae = va+size;
742 	while(va < vae){
743 		table = &pdb[PDX(va)];
744 		if(!(*table & PTEVALID)){
745 			panic("vunmap: not mapped");
746 			/*
747 			va = (va+4*MB-1) & ~(4*MB-1);
748 			continue;
749 			*/
750 		}
751 		if(*table & PTESIZE){
752 			*table = 0;
753 			va = (va+4*MB-1) & ~(4*MB-1);
754 			continue;
755 		}
756 		table = KADDR(PPN(*table));
757 		if(!(table[PTX(va)] & PTEVALID))
758 			panic("vunmap: not mapped");
759 		table[PTX(va)] = 0;
760 		va += BY2PG;
761 	}
762 }
763 
764 /*
765  * Handle a fault by bringing vmap up to date.
766  * Only copy pdb entries and they never go away,
767  * so no locking needed.
768  */
769 int
vmapsync(ulong va)770 vmapsync(ulong va)
771 {
772 	ulong entry, *table;
773 
774 	if(va < VMAP || va >= VMAP+VMAPSIZE)
775 		return 0;
776 
777 	entry = MACHP(0)->pdb[PDX(va)];
778 	if(!(entry&PTEVALID))
779 		return 0;
780 	if(!(entry&PTESIZE)){
781 		/* make sure entry will help the fault */
782 		table = KADDR(PPN(entry));
783 		if(!(table[PTX(va)]&PTEVALID))
784 			return 0;
785 	}
786 	vpd[PDX(va)] = entry;
787 	/*
788 	 * TLB doesn't cache negative results, so no flush needed.
789 	 */
790 	return 1;
791 }
792 
793 
794 /*
795  * KMap is used to map individual pages into virtual memory.
796  * It is rare to have more than a few KMaps at a time (in the
797  * absence of interrupts, only two at a time are ever used,
798  * but interrupts can stack).  The mappings are local to a process,
799  * so we can use the same range of virtual address space for
800  * all processes without any coordination.
801  */
802 #define kpt (vpt+VPTX(KMAP))
803 #define NKPT (KMAPSIZE/BY2PG)
804 
805 KMap*
kmap(Page * page)806 kmap(Page *page)
807 {
808 	int i, o, s;
809 
810 	if(up == nil)
811 		panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
812 	if(up->mmupdb == nil)
813 		upallocpdb();
814 	if(up->nkmap < 0)
815 		panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
816 
817 	/*
818 	 * Splhi shouldn't be necessary here, but paranoia reigns.
819 	 * See comment in putmmu above.
820 	 */
821 	s = splhi();
822 	up->nkmap++;
823 	if(!(vpd[PDX(KMAP)]&PTEVALID)){
824 		/* allocate page directory */
825 		if(KMAPSIZE > BY2XPG)
826 			panic("bad kmapsize");
827 		if(up->kmaptable != nil)
828 			panic("kmaptable");
829 		spllo();
830 		up->kmaptable = newpage(0, 0, 0);
831 		splhi();
832 		vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
833 		flushpg((ulong)kpt);
834 		memset(kpt, 0, BY2PG);
835 		kpt[0] = page->pa|PTEWRITE|PTEVALID;
836 		up->lastkmap = 0;
837 		splx(s);
838 		return (KMap*)KMAP;
839 	}
840 	if(up->kmaptable == nil)
841 		panic("no kmaptable");
842 	o = up->lastkmap+1;
843 	for(i=0; i<NKPT; i++){
844 		if(kpt[(i+o)%NKPT] == 0){
845 			o = (i+o)%NKPT;
846 			kpt[o] = page->pa|PTEWRITE|PTEVALID;
847 			up->lastkmap = o;
848 			splx(s);
849 			return (KMap*)(KMAP+o*BY2PG);
850 		}
851 	}
852 	panic("out of kmap");
853 	return nil;
854 }
855 
856 void
kunmap(KMap * k)857 kunmap(KMap *k)
858 {
859 	ulong va;
860 
861 	va = (ulong)k;
862 	if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
863 		panic("kunmap: no kmaps");
864 	if(va < KMAP || va >= KMAP+KMAPSIZE)
865 		panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
866 	if(!(vpt[VPTX(va)]&PTEVALID))
867 		panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
868 	up->nkmap--;
869 	if(up->nkmap < 0)
870 		panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
871 	vpt[VPTX(va)] = 0;
872 	flushpg(va);
873 }
874 
875 /*
876  * Temporary one-page mapping used to edit page directories.
877  *
878  * The fasttmp #define controls whether the code optimizes
879  * the case where the page is already mapped in the physical
880  * memory window.
881  */
882 #define fasttmp 1
883 
884 void*
tmpmap(Page * p)885 tmpmap(Page *p)
886 {
887 	ulong i;
888 	ulong *entry;
889 
890 	if(islo())
891 		panic("tmpaddr: islo");
892 
893 	if(fasttmp && p->pa < -KZERO)
894 		return KADDR(p->pa);
895 
896 	/*
897 	 * PDX(TMPADDR) == PDX(MACHADDR), so this
898 	 * entry is private to the processor and shared
899 	 * between up->mmupdb (if any) and m->pdb.
900 	 */
901 	entry = &vpt[VPTX(TMPADDR)];
902 	if(!(*entry&PTEVALID)){
903 		for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
904 			print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
905 		panic("tmpmap: no entry");
906 	}
907 	if(PPN(*entry) != PPN(TMPADDR-KZERO))
908 		panic("tmpmap: already mapped entry=%#.8lux", *entry);
909 	*entry = p->pa|PTEWRITE|PTEVALID;
910 	flushpg(TMPADDR);
911 	return (void*)TMPADDR;
912 }
913 
914 void
tmpunmap(void * v)915 tmpunmap(void *v)
916 {
917 	ulong *entry;
918 
919 	if(islo())
920 		panic("tmpaddr: islo");
921 	if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
922 		return;
923 	if(v != (void*)TMPADDR)
924 		panic("tmpunmap: bad address");
925 	entry = &vpt[VPTX(TMPADDR)];
926 	if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
927 		panic("tmpmap: not mapped entry=%#.8lux", *entry);
928 	*entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
929 	flushpg(TMPADDR);
930 }
931 
932 /*
933  * These could go back to being macros once the kernel is debugged,
934  * but the extra checking is nice to have.
935  */
936 void*
kaddr(ulong pa)937 kaddr(ulong pa)
938 {
939 	if(pa > (ulong)-KZERO)
940 		panic("kaddr: pa=%#.8lux", pa);
941 	return (void*)(pa+KZERO);
942 }
943 
944 ulong
paddr(void * v)945 paddr(void *v)
946 {
947 	ulong va;
948 
949 	va = (ulong)v;
950 	if(va < KZERO)
951 		panic("paddr: va=%#.8lux pc=%#p", va, getcallerpc(&v));
952 	return va-KZERO;
953 }
954 
955 /*
956  * More debugging.
957  */
958 void
countpagerefs(ulong * ref,int print)959 countpagerefs(ulong *ref, int print)
960 {
961 	int i, n;
962 	Mach *mm;
963 	Page *pg;
964 	Proc *p;
965 
966 	n = 0;
967 	for(i=0; i<conf.nproc; i++){
968 		p = proctab(i);
969 		if(p->mmupdb){
970 			if(print){
971 				if(ref[pagenumber(p->mmupdb)])
972 					iprint("page %#.8lux is proc %d (pid %lud) pdb\n",
973 						p->mmupdb->pa, i, p->pid);
974 				continue;
975 			}
976 			if(ref[pagenumber(p->mmupdb)]++ == 0)
977 				n++;
978 			else
979 				iprint("page %#.8lux is proc %d (pid %lud) pdb but has other refs!\n",
980 					p->mmupdb->pa, i, p->pid);
981 		}
982 		if(p->kmaptable){
983 			if(print){
984 				if(ref[pagenumber(p->kmaptable)])
985 					iprint("page %#.8lux is proc %d (pid %lud) kmaptable\n",
986 						p->kmaptable->pa, i, p->pid);
987 				continue;
988 			}
989 			if(ref[pagenumber(p->kmaptable)]++ == 0)
990 				n++;
991 			else
992 				iprint("page %#.8lux is proc %d (pid %lud) kmaptable but has other refs!\n",
993 					p->kmaptable->pa, i, p->pid);
994 		}
995 		for(pg=p->mmuused; pg; pg=pg->next){
996 			if(print){
997 				if(ref[pagenumber(pg)])
998 					iprint("page %#.8lux is on proc %d (pid %lud) mmuused\n",
999 						pg->pa, i, p->pid);
1000 				continue;
1001 			}
1002 			if(ref[pagenumber(pg)]++ == 0)
1003 				n++;
1004 			else
1005 				iprint("page %#.8lux is on proc %d (pid %lud) mmuused but has other refs!\n",
1006 					pg->pa, i, p->pid);
1007 		}
1008 		for(pg=p->mmufree; pg; pg=pg->next){
1009 			if(print){
1010 				if(ref[pagenumber(pg)])
1011 					iprint("page %#.8lux is on proc %d (pid %lud) mmufree\n",
1012 						pg->pa, i, p->pid);
1013 				continue;
1014 			}
1015 			if(ref[pagenumber(pg)]++ == 0)
1016 				n++;
1017 			else
1018 				iprint("page %#.8lux is on proc %d (pid %lud) mmufree but has other refs!\n",
1019 					pg->pa, i, p->pid);
1020 		}
1021 	}
1022 	if(!print)
1023 		iprint("%d pages in proc mmu\n", n);
1024 	n = 0;
1025 	for(i=0; i<conf.nmach; i++){
1026 		mm = MACHP(i);
1027 		for(pg=mm->pdbpool; pg; pg=pg->next){
1028 			if(print){
1029 				if(ref[pagenumber(pg)])
1030 					iprint("page %#.8lux is in cpu%d pdbpool\n",
1031 						pg->pa, i);
1032 				continue;
1033 			}
1034 			if(ref[pagenumber(pg)]++ == 0)
1035 				n++;
1036 			else
1037 				iprint("page %#.8lux is in cpu%d pdbpool but has other refs!\n",
1038 					pg->pa, i);
1039 		}
1040 	}
1041 	if(!print){
1042 		iprint("%d pages in mach pdbpools\n", n);
1043 		for(i=0; i<conf.nmach; i++)
1044 			iprint("cpu%d: %d pdballoc, %d pdbfree\n",
1045 				i, MACHP(i)->pdballoc, MACHP(i)->pdbfree);
1046 	}
1047 }
1048 
1049 void
checkfault(ulong,ulong)1050 checkfault(ulong, ulong)
1051 {
1052 }
1053 
1054 /*
1055  * Return the number of bytes that can be accessed via KADDR(pa).
1056  * If pa is not a valid argument to KADDR, return 0.
1057  */
1058 ulong
cankaddr(ulong pa)1059 cankaddr(ulong pa)
1060 {
1061 	if(pa >= -KZERO)
1062 		return 0;
1063 	return -KZERO - pa;
1064 }
1065 
1066