xref: /plan9/sys/src/9/pcboot/mmu.c (revision 25210b069a6ed8c047fa67220cf1dff32812f121)
1 /*
2  * Memory mappings.  Life was easier when 2G of memory was enough.
3  *
4  * The kernel memory starts at KZERO, with the text loaded at KZERO+1M
5  * (9load sits under 1M during the load).  The memory from KZERO to the
6  * top of memory is mapped 1-1 with physical memory, starting at physical
7  * address 0.  All kernel memory and data structures (i.e., the entries stored
8  * into conf.mem) must sit in this physical range: if KZERO is at 0xF0000000,
9  * then the kernel can only have 256MB of memory for itself.
10  *
11  * The 256M below KZERO comprises three parts.  The lowest 4M is the
12  * virtual page table, a virtual address representation of the current
13  * page table tree.  The second 4M is used for temporary per-process
14  * mappings managed by kmap and kunmap.  The remaining 248M is used
15  * for global (shared by all procs and all processors) device memory
16  * mappings and managed by vmap and vunmap.  The total amount (256M)
17  * could probably be reduced somewhat if desired.  The largest device
18  * mapping is that of the video card, and even though modern video cards
19  * have embarrassing amounts of memory, the video drivers only use one
20  * frame buffer worth (at most 16M).  Each is described in more detail below.
21  *
22  * The VPT is a 4M frame constructed by inserting the pdb into itself.
23  * This short-circuits one level of the page tables, with the result that
24  * the contents of second-level page tables can be accessed at VPT.
25  * We use the VPT to edit the page tables (see mmu) after inserting them
26  * into the page directory.  It is a convenient mechanism for mapping what
27  * might be otherwise-inaccessible pages.  The idea was borrowed from
28  * the Exokernel.
29  *
30  * The VPT doesn't solve all our problems, because we still need to
31  * prepare page directories before we can install them.  For that, we
32  * use tmpmap/tmpunmap, which map a single page at TMPADDR.
33  */
34 
35 #include	"u.h"
36 #include	"../port/lib.h"
37 #include	"mem.h"
38 #include	"dat.h"
39 #include	"fns.h"
40 #include	"io.h"
41 
42 /*
43  * Simple segment descriptors with no translation.
44  */
45 #define	DATASEGM(p) 	{ 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
46 #define	EXECSEGM(p) 	{ 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
47 #define	EXEC16SEGM(p) 	{ 0xFFFF, SEGG|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
48 #define	TSSSEGM(b,p)	{ ((b)<<16)|sizeof(Tss),\
49 			  ((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
50 
51 void realmodeintrinst(void);
52 void _stop32pg(void);
53 
54 Segdesc gdt[NGDT] =
55 {
56 [NULLSEG]	{ 0, 0},		/* null descriptor */
57 [KDSEG]		DATASEGM(0),		/* kernel data/stack */
58 [KESEG]		EXECSEGM(0),		/* kernel code */
59 [UDSEG]		DATASEGM(3),		/* user data/stack */
60 [UESEG]		EXECSEGM(3),		/* user code */
61 [TSSSEG]	TSSSEGM(0,0),		/* tss segment */
62 [KESEG16]		EXEC16SEGM(0),	/* kernel code 16-bit */
63 };
64 
65 static int didmmuinit;
66 static void taskswitch(ulong, ulong);
67 static void memglobal(void);
68 
69 #define	vpt ((ulong*)VPT)
70 #define	VPTX(va)		(((ulong)(va))>>12)
71 #define	vpd (vpt+VPTX(VPT))
72 
73 void
mmuinit0(void)74 mmuinit0(void)
75 {
76 	memmove(m->gdt, gdt, sizeof gdt);
77 }
78 
79 void
mmuinit(void)80 mmuinit(void)
81 {
82 	ulong x, *p;
83 	ushort ptr[3];
84 
85 	didmmuinit = 1;
86 
87 	if(0) print("vpt=%#.8ux vpd=%#p kmap=%#.8ux\n",
88 		VPT, vpd, KMAP);
89 
90 	memglobal();
91 	m->pdb[PDX(VPT)] = PADDR(m->pdb)|PTEWRITE|PTEVALID;
92 
93 	m->tss = malloc(sizeof(Tss));
94 	if(m->tss == nil)
95 		panic("mmuinit: no memory");
96 	memset(m->tss, 0, sizeof(Tss));
97 	m->tss->iomap = 0xDFFF<<16;
98 
99 	/*
100 	 * We used to keep the GDT in the Mach structure, but it
101 	 * turns out that that slows down access to the rest of the
102 	 * page.  Since the Mach structure is accessed quite often,
103 	 * it pays off anywhere from a factor of 1.25 to 2 on real
104 	 * hardware to separate them (the AMDs are more sensitive
105 	 * than Intels in this regard).  Under VMware it pays off
106 	 * a factor of about 10 to 100.
107 	 */
108 	memmove(m->gdt, gdt, sizeof gdt);
109 	x = (ulong)m->tss;
110 	m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
111 	m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
112 
113 	ptr[0] = sizeof(gdt)-1;
114 	x = (ulong)m->gdt;
115 	ptr[1] = x & 0xFFFF;
116 	ptr[2] = (x>>16) & 0xFFFF;
117 	lgdt(ptr);
118 
119 	ptr[0] = sizeof(Segdesc)*256-1;
120 	x = IDTADDR;
121 	ptr[1] = x & 0xFFFF;
122 	ptr[2] = (x>>16) & 0xFFFF;
123 	lidt(ptr);
124 
125 	/*
126 	 * this kills 9load but not 9boot.  9load dies at the taskswitch.
127 	 * should track down exactly why some day.
128 	 */
129 	/* make most kernel text unwritable */
130 if(0)	for(x = PGROUND((ulong)_stop32pg); x < (ulong)etext; x += BY2PG){
131 		if (x == (ulong)realmodeintrinst & ~(BY2PG-1))
132 			continue;
133 		p = mmuwalk(m->pdb, x, 2, 0);
134 		if(p == nil)
135 			panic("mmuinit");
136 		*p &= ~PTEWRITE;
137 	}
138 
139 	taskswitch(PADDR(m->pdb), (ulong)m + MACHSIZE);
140 	ltr(TSSSEL);
141 }
142 
143 /*
144  * On processors that support it, we set the PTEGLOBAL bit in
145  * page table and page directory entries that map kernel memory.
146  * Doing this tells the processor not to bother flushing them
147  * from the TLB when doing the TLB flush associated with a
148  * context switch (write to CR3).  Since kernel memory mappings
149  * are never removed, this is safe.  (If we ever remove kernel memory
150  * mappings, we can do a full flush by turning off the PGE bit in CR4,
151  * writing to CR3, and then turning the PGE bit back on.)
152  *
153  * See also mmukmap below.
154  *
155  * Processor support for the PTEGLOBAL bit is enabled in devarch.c.
156  */
157 static void
memglobal(void)158 memglobal(void)
159 {
160 	int i, j;
161 	ulong *pde, *pte;
162 
163 	/* only need to do this once, on bootstrap processor */
164 	if(m->machno != 0)
165 		return;
166 
167 	if(!m->havepge)
168 		return;
169 
170 	pde = m->pdb;
171 	for(i=PDX(KZERO); i<1024; i++){
172 		if(pde[i] & PTEVALID){
173 			pde[i] |= PTEGLOBAL;
174 			if(!(pde[i] & PTESIZE)){
175 				pte = KADDR(pde[i]&~(BY2PG-1));
176 				for(j=0; j<1024; j++)
177 					if(pte[j] & PTEVALID)
178 						pte[j] |= PTEGLOBAL;
179 			}
180 		}
181 	}
182 }
183 
184 /*
185  * Flush all the user-space and device-mapping mmu info
186  * for this process, because something has been deleted.
187  * It will be paged back in on demand.
188  */
189 void
flushmmu(void)190 flushmmu(void)
191 {
192 	int s;
193 
194 	s = splhi();
195 	up->newtlb = 1;
196 	mmuswitch(up);
197 	splx(s);
198 }
199 
200 /*
201  * Flush a single page mapping from the tlb.
202  */
203 void
flushpg(ulong va)204 flushpg(ulong va)
205 {
206 	if(X86FAMILY(m->cpuidax) >= 4)
207 		invlpg(va);
208 	else
209 		putcr3(getcr3());
210 }
211 
212 /*
213  * Allocate a new page for a page directory.
214  * We keep a small cache of pre-initialized
215  * page directories in each mach.
216  */
217 static Page*
mmupdballoc(void)218 mmupdballoc(void)
219 {
220 	int s;
221 	Page *page;
222 	ulong *pdb;
223 
224 	s = splhi();
225 	m->pdballoc++;
226 	if(m->pdbpool == 0){
227 		spllo();
228 		page = newpage(0, 0, 0);
229 		page->va = (ulong)vpd;
230 		splhi();
231 		pdb = tmpmap(page);
232 		memmove(pdb, m->pdb, BY2PG);
233 		pdb[PDX(VPT)] = page->pa|PTEWRITE|PTEVALID;	/* set up VPT */
234 		tmpunmap(pdb);
235 	}else{
236 		page = m->pdbpool;
237 		m->pdbpool = page->next;
238 		m->pdbcnt--;
239 	}
240 	splx(s);
241 	return page;
242 }
243 
244 static void
mmupdbfree(Proc * proc,Page * p)245 mmupdbfree(Proc *proc, Page *p)
246 {
247 	if(islo())
248 		panic("mmupdbfree: islo");
249 	m->pdbfree++;
250 	if(m->pdbcnt >= 10){
251 		p->next = proc->mmufree;
252 		proc->mmufree = p;
253 	}else{
254 		p->next = m->pdbpool;
255 		m->pdbpool = p;
256 		m->pdbcnt++;
257 	}
258 }
259 
260 /*
261  * A user-space memory segment has been deleted, or the
262  * process is exiting.  Clear all the pde entries for user-space
263  * memory mappings and device mappings.  Any entries that
264  * are needed will be paged back in as necessary.
265  */
266 static void
mmuptefree(Proc * proc)267 mmuptefree(Proc* proc)
268 {
269 	int s;
270 	ulong *pdb;
271 	Page **last, *page;
272 
273 	if(proc->mmupdb == nil || proc->mmuused == nil)
274 		return;
275 	s = splhi();
276 	pdb = tmpmap(proc->mmupdb);
277 	last = &proc->mmuused;
278 	for(page = *last; page; page = page->next){
279 		pdb[page->daddr] = 0;
280 		last = &page->next;
281 	}
282 	tmpunmap(pdb);
283 	splx(s);
284 	*last = proc->mmufree;
285 	proc->mmufree = proc->mmuused;
286 	proc->mmuused = 0;
287 }
288 
289 static void
taskswitch(ulong pdb,ulong stack)290 taskswitch(ulong pdb, ulong stack)
291 {
292 	Tss *tss;
293 
294 	tss = m->tss;
295 	tss->ss0 = KDSEL;
296 	tss->esp0 = stack;
297 	tss->ss1 = KDSEL;
298 	tss->esp1 = stack;
299 	tss->ss2 = KDSEL;
300 	tss->esp2 = stack;
301 	putcr3(pdb);
302 }
303 
304 void
mmuswitch(Proc * proc)305 mmuswitch(Proc* proc)
306 {
307 	ulong *pdb;
308 
309 	if(proc->newtlb){
310 		mmuptefree(proc);
311 		proc->newtlb = 0;
312 	}
313 
314 	if(proc->mmupdb){
315 		pdb = tmpmap(proc->mmupdb);
316 		pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
317 		tmpunmap(pdb);
318 		taskswitch(proc->mmupdb->pa, (ulong)(proc->kstack+KSTACK));
319 	}else
320 		taskswitch(PADDR(m->pdb), (ulong)(proc->kstack+KSTACK));
321 }
322 
323 /*
324  * Release any pages allocated for a page directory base or page-tables
325  * for this process:
326  *   switch to the prototype pdb for this processor (m->pdb);
327  *   call mmuptefree() to place all pages used for page-tables (proc->mmuused)
328  *   onto the process' free list (proc->mmufree). This has the side-effect of
329  *   cleaning any user entries in the pdb (proc->mmupdb);
330  *   if there's a pdb put it in the cache of pre-initialised pdb's
331  *   for this processor (m->pdbpool) or on the process' free list;
332  *   finally, place any pages freed back into the free pool (palloc).
333  * This routine is only called from schedinit() with palloc locked.
334  */
335 void
mmurelease(Proc * proc)336 mmurelease(Proc* proc)
337 {
338 	Page *page, *next;
339 	ulong *pdb;
340 
341 	if(islo())
342 		panic("mmurelease: islo");
343 	taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
344 	if(proc->kmaptable){
345 		if(proc->mmupdb == nil)
346 			panic("mmurelease: no mmupdb");
347 		if(--proc->kmaptable->ref)
348 			panic("mmurelease: kmap ref %d", proc->kmaptable->ref);
349 		if(proc->nkmap)
350 			panic("mmurelease: nkmap %d", proc->nkmap);
351 		/*
352 		 * remove kmaptable from pdb before putting pdb up for reuse.
353 		 */
354 		pdb = tmpmap(proc->mmupdb);
355 		if(PPN(pdb[PDX(KMAP)]) != proc->kmaptable->pa)
356 			panic("mmurelease: bad kmap pde %#.8lux kmap %#.8lux",
357 				pdb[PDX(KMAP)], proc->kmaptable->pa);
358 		pdb[PDX(KMAP)] = 0;
359 		tmpunmap(pdb);
360 		/*
361 		 * move kmaptable to free list.
362 		 */
363 		pagechainhead(proc->kmaptable);
364 		proc->kmaptable = 0;
365 	}
366 	if(proc->mmupdb){
367 		mmuptefree(proc);
368 		mmupdbfree(proc, proc->mmupdb);
369 		proc->mmupdb = 0;
370 	}
371 	for(page = proc->mmufree; page; page = next){
372 		next = page->next;
373 		if(--page->ref)
374 			panic("mmurelease: page->ref %d", page->ref);
375 		pagechainhead(page);
376 	}
377 	if(proc->mmufree && palloc.r.p)
378 		wakeup(&palloc.r);
379 	proc->mmufree = 0;
380 }
381 
382 /*
383  * Allocate and install pdb for the current process.
384  */
385 static void
upallocpdb(void)386 upallocpdb(void)
387 {
388 	int s;
389 	ulong *pdb;
390 	Page *page;
391 
392 	if(up->mmupdb != nil)
393 		return;
394 	page = mmupdballoc();
395 	s = splhi();
396 	if(up->mmupdb != nil){
397 		/*
398 		 * Perhaps we got an interrupt while
399 		 * mmupdballoc was sleeping and that
400 		 * interrupt allocated an mmupdb?
401 		 * Seems unlikely.
402 		 */
403 		mmupdbfree(up, page);
404 		splx(s);
405 		return;
406 	}
407 	pdb = tmpmap(page);
408 	pdb[PDX(MACHADDR)] = m->pdb[PDX(MACHADDR)];
409 	tmpunmap(pdb);
410 	up->mmupdb = page;
411 	putcr3(up->mmupdb->pa);
412 	splx(s);
413 }
414 
415 /*
416  * Update the mmu in response to a user fault.  pa may have PTEWRITE set.
417  */
418 void
putmmu(ulong va,ulong pa,Page *)419 putmmu(ulong va, ulong pa, Page*)
420 {
421 	int old, s;
422 	Page *page;
423 
424 	if(up->mmupdb == nil)
425 		upallocpdb();
426 
427 	/*
428 	 * We should be able to get through this with interrupts
429 	 * turned on (if we get interrupted we'll just pick up
430 	 * where we left off) but we get many faults accessing
431 	 * vpt[] near the end of this function, and they always happen
432 	 * after the process has been switched out and then
433 	 * switched back, usually many times in a row (perhaps
434 	 * it cannot switch back successfully for some reason).
435 	 *
436 	 * In any event, I'm tired of searching for this bug.
437 	 * Turn off interrupts during putmmu even though
438 	 * we shouldn't need to.		- rsc
439 	 */
440 
441 	s = splhi();
442 	if(!(vpd[PDX(va)]&PTEVALID)){
443 		if(up->mmufree == 0){
444 			spllo();
445 			page = newpage(0, 0, 0);
446 			splhi();
447 		}
448 		else{
449 			page = up->mmufree;
450 			up->mmufree = page->next;
451 		}
452 		vpd[PDX(va)] = PPN(page->pa)|PTEUSER|PTEWRITE|PTEVALID;
453 		/* page is now mapped into the VPT - clear it */
454 		memset((void*)(VPT+PDX(va)*BY2PG), 0, BY2PG);
455 		page->daddr = PDX(va);
456 		page->next = up->mmuused;
457 		up->mmuused = page;
458 	}
459 	old = vpt[VPTX(va)];
460 	vpt[VPTX(va)] = pa|PTEUSER|PTEVALID;
461 	if(old&PTEVALID)
462 		flushpg(va);
463 	if(getcr3() != up->mmupdb->pa)
464 		print("bad cr3 %#.8lux %#.8lux\n", getcr3(), up->mmupdb->pa);
465 	splx(s);
466 }
467 
468 /*
469  * Double-check the user MMU.
470  * Error checking only.
471  */
472 void
checkmmu(ulong va,ulong pa)473 checkmmu(ulong va, ulong pa)
474 {
475 	if(up->mmupdb == 0)
476 		return;
477 	if(!(vpd[PDX(va)]&PTEVALID) || !(vpt[VPTX(va)]&PTEVALID))
478 		return;
479 	if(PPN(vpt[VPTX(va)]) != pa)
480 		print("%ld %s: va=%#08lux pa=%#08lux pte=%#08lux\n",
481 			up->pid, up->text,
482 			va, pa, vpt[VPTX(va)]);
483 }
484 
485 /*
486  * Walk the page-table pointed to by pdb and return a pointer
487  * to the entry for virtual address va at the requested level.
488  * If the entry is invalid and create isn't requested then bail
489  * out early. Otherwise, for the 2nd level walk, allocate a new
490  * page-table page and register it in the 1st level.  This is used
491  * only to edit kernel mappings, which use pages from kernel memory,
492  * so it's okay to use KADDR to look at the tables.
493  */
494 ulong*
mmuwalk(ulong * pdb,ulong va,int level,int create)495 mmuwalk(ulong* pdb, ulong va, int level, int create)
496 {
497 	ulong *table;
498 	void *map;
499 
500 	table = &pdb[PDX(va)];
501 	if(!(*table & PTEVALID) && create == 0)
502 		return 0;
503 
504 	switch(level){
505 
506 	default:
507 		return 0;
508 
509 	case 1:
510 		return table;
511 
512 	case 2:
513 		if(*table & PTESIZE)
514 			panic("mmuwalk2: va %luX entry %luX", va, *table);
515 		if(!(*table & PTEVALID)){
516 			/*
517 			 * Have to call low-level allocator from
518 			 * memory.c if we haven't set up the xalloc
519 			 * tables yet.
520 			 */
521 			if(didmmuinit)
522 				map = xspanalloc(BY2PG, BY2PG, 0);
523 			else
524 				map = rampage();
525 			if(map == nil)
526 				panic("mmuwalk xspanalloc failed");
527 			*table = PADDR(map)|PTEWRITE|PTEVALID;
528 		}
529 		table = KADDR(PPN(*table));
530 		return &table[PTX(va)];
531 	}
532 }
533 
534 /*
535  * Device mappings are shared by all procs and processors and
536  * live in the virtual range VMAP to VMAP+VMAPSIZE.  The master
537  * copy of the mappings is stored in mach0->pdb, and they are
538  * paged in from there as necessary by vmapsync during faults.
539  */
540 
541 static Lock vmaplock;
542 
543 static int findhole(ulong *a, int n, int count);
544 static ulong vmapalloc(ulong size);
545 static void pdbunmap(ulong*, ulong, int);
546 
547 /*
548  * Add a device mapping to the vmap range.
549  */
550 void*
vmap(ulong pa,int size)551 vmap(ulong pa, int size)
552 {
553 	int osize;
554 	ulong o, va;
555 
556 	/*
557 	 * might be asking for less than a page.
558 	 */
559 	osize = size;
560 	o = pa & (BY2PG-1);
561 	pa -= o;
562 	size += o;
563 
564 	size = ROUND(size, BY2PG);
565 	if(pa == 0){
566 		print("vmap pa=0 pc=%#p\n", getcallerpc(&pa));
567 		return nil;
568 	}
569 	ilock(&vmaplock);
570 	if((va = vmapalloc(size)) == 0
571 	|| pdbmap(MACHP(0)->pdb, pa|PTEUNCACHED|PTEWRITE, va, size) < 0){
572 		iunlock(&vmaplock);
573 		return 0;
574 	}
575 	iunlock(&vmaplock);
576 	/* avoid trap on local processor
577 	for(i=0; i<size; i+=4*MB)
578 		vmapsync(va+i);
579 	*/
580 	USED(osize);
581 //	print("  vmap %#.8lux %d => %#.8lux\n", pa+o, osize, va+o);
582 	return (void*)(va + o);
583 }
584 
585 static int
findhole(ulong * a,int n,int count)586 findhole(ulong *a, int n, int count)
587 {
588 	int have, i;
589 
590 	have = 0;
591 	for(i=0; i<n; i++){
592 		if(a[i] == 0)
593 			have++;
594 		else
595 			have = 0;
596 		if(have >= count)
597 			return i+1 - have;
598 	}
599 	return -1;
600 }
601 
602 /*
603  * Look for free space in the vmap.
604  */
605 static ulong
vmapalloc(ulong size)606 vmapalloc(ulong size)
607 {
608 	int i, n, o;
609 	ulong *vpdb;
610 	int vpdbsize;
611 
612 	vpdb = &MACHP(0)->pdb[PDX(VMAP)];
613 	vpdbsize = VMAPSIZE/(4*MB);
614 
615 	if(size >= 4*MB){
616 		n = (size+4*MB-1) / (4*MB);
617 		if((o = findhole(vpdb, vpdbsize, n)) != -1)
618 			return VMAP + o*4*MB;
619 		return 0;
620 	}
621 	n = (size+BY2PG-1) / BY2PG;
622 	for(i=0; i<vpdbsize; i++)
623 		if((vpdb[i]&PTEVALID) && !(vpdb[i]&PTESIZE))
624 			if((o = findhole(KADDR(PPN(vpdb[i])), WD2PG, n)) != -1)
625 				return VMAP + i*4*MB + o*BY2PG;
626 	if((o = findhole(vpdb, vpdbsize, 1)) != -1)
627 		return VMAP + o*4*MB;
628 
629 	/*
630 	 * could span page directory entries, but not worth the trouble.
631 	 * not going to be very much contention.
632 	 */
633 	return 0;
634 }
635 
636 /*
637  * Remove a device mapping from the vmap range.
638  * Since pdbunmap does not remove page tables, just entries,
639  * the call need not be interlocked with vmap.
640  */
641 void
vunmap(void * v,int size)642 vunmap(void *v, int size)
643 {
644 	int i;
645 	ulong va, o;
646 	Mach *nm;
647 	Proc *p;
648 
649 	/*
650 	 * might not be aligned
651 	 */
652 	va = (ulong)v;
653 	o = va&(BY2PG-1);
654 	va -= o;
655 	size += o;
656 	size = ROUND(size, BY2PG);
657 
658 	if(size < 0 || va < VMAP || va+size > VMAP+VMAPSIZE)
659 		panic("vunmap va=%#.8lux size=%#x pc=%#.8lux",
660 			va, size, getcallerpc(&v));
661 
662 	pdbunmap(MACHP(0)->pdb, va, size);
663 
664 	/*
665 	 * Flush mapping from all the tlbs and copied pdbs.
666 	 * This can be (and is) slow, since it is called only rarely.
667 	 * It is possible for vunmap to be called with up == nil,
668 	 * e.g. from the reset/init driver routines during system
669 	 * boot. In that case it suffices to flush the MACH(0) TLB
670 	 * and return.
671 	 */
672 	if(!active.thunderbirdsarego){
673 		if(MACHP(0)->pdb == 0)
674 			panic("vunmap: nil m->pdb pc=%#p", getcallerpc(&v));
675 		if(PADDR(MACHP(0)->pdb) == 0)
676 			panic("vunmap: nil PADDR(m->pdb)");
677 		putcr3(PADDR(MACHP(0)->pdb));
678 		return;
679 	}
680 	for(i=0; i<conf.nproc; i++){
681 		p = proctab(i);
682 		if(p->state == Dead)
683 			continue;
684 		if(p != up)
685 			p->newtlb = 1;
686 	}
687 	for(i=0; i<conf.nmach; i++){
688 		nm = MACHP(i);
689 		if(nm != m)
690 			nm->flushmmu = 1;
691 	}
692 	flushmmu();
693 	for(i=0; i<conf.nmach; i++){
694 		nm = MACHP(i);
695 		if(nm != m)
696 			while((active.machs&(1<<nm->machno)) && nm->flushmmu)
697 				;
698 	}
699 }
700 
701 /*
702  * Add kernel mappings for va -> pa for a section of size bytes.
703  */
704 int
pdbmap(ulong * pdb,ulong pa,ulong va,int size)705 pdbmap(ulong *pdb, ulong pa, ulong va, int size)
706 {
707 	int pse;
708 	ulong pgsz, *pte, *table;
709 	ulong flag, off;
710 
711 	flag = pa&0xFFF;
712 	pa &= ~0xFFF;
713 
714 	if((MACHP(0)->cpuiddx & 0x08) && (getcr4() & 0x10))
715 		pse = 1;
716 	else
717 		pse = 0;
718 
719 	for(off=0; off<size; off+=pgsz){
720 		table = &pdb[PDX(va+off)];
721 		if((*table&PTEVALID) && (*table&PTESIZE))
722 			panic("vmap: pdb pte valid and big page: "
723 				"va=%#.8lux pa=%#.8lux pde=%#.8lux",
724 				va+off, pa+off, *table);
725 
726 		/*
727 		 * Check if it can be mapped using a 4MB page:
728 		 * va, pa aligned and size >= 4MB and processor can do it.
729 		 */
730 		if(pse && (pa+off)%(4*MB) == 0 && (va+off)%(4*MB) == 0 &&
731 		    (size-off) >= 4*MB){
732 			*table = (pa+off)|flag|PTESIZE|PTEVALID;
733 			pgsz = 4*MB;
734 		}else{
735 			pte = mmuwalk(pdb, va+off, 2, 1);
736 			if(*pte&PTEVALID)
737 				panic("vmap: va=%#.8lux pa=%#.8lux pte=%#.8lux",
738 					va+off, pa+off, *pte);
739 			*pte = (pa+off)|flag|PTEVALID;
740 			pgsz = BY2PG;
741 		}
742 	}
743 	return 0;
744 }
745 
746 /*
747  * Remove mappings.  Must already exist, for sanity.
748  * Only used for kernel mappings, so okay to use KADDR.
749  */
750 static void
pdbunmap(ulong * pdb,ulong va,int size)751 pdbunmap(ulong *pdb, ulong va, int size)
752 {
753 	ulong vae;
754 	ulong *table;
755 
756 	vae = va+size;
757 	while(va < vae){
758 		table = &pdb[PDX(va)];
759 		if(!(*table & PTEVALID)){
760 			panic("vunmap: not mapped");
761 			/*
762 			va = (va+4*MB-1) & ~(4*MB-1);
763 			continue;
764 			*/
765 		}
766 		if(*table & PTESIZE){
767 			*table = 0;
768 			va = (va+4*MB-1) & ~(4*MB-1);
769 			continue;
770 		}
771 		table = KADDR(PPN(*table));
772 		if(!(table[PTX(va)] & PTEVALID))
773 			panic("vunmap: not mapped");
774 		table[PTX(va)] = 0;
775 		va += BY2PG;
776 	}
777 }
778 
779 /*
780  * Handle a fault by bringing vmap up to date.
781  * Only copy pdb entries and they never go away,
782  * so no locking needed.
783  */
784 int
vmapsync(ulong va)785 vmapsync(ulong va)
786 {
787 	ulong entry, *table;
788 
789 	if(va < VMAP || va >= VMAP+VMAPSIZE)
790 		return 0;
791 
792 	entry = MACHP(0)->pdb[PDX(va)];
793 	if(!(entry&PTEVALID))
794 		return 0;
795 	if(!(entry&PTESIZE)){
796 		/* make sure entry will help the fault */
797 		table = KADDR(PPN(entry));
798 		if(!(table[PTX(va)]&PTEVALID))
799 			return 0;
800 	}
801 	vpd[PDX(va)] = entry;
802 	/*
803 	 * TLB doesn't cache negative results, so no flush needed.
804 	 */
805 	return 1;
806 }
807 
808 
809 /*
810  * KMap is used to map individual pages into virtual memory.
811  * It is rare to have more than a few KMaps at a time (in the
812  * absence of interrupts, only two at a time are ever used,
813  * but interrupts can stack).  The mappings are local to a process,
814  * so we can use the same range of virtual address space for
815  * all processes without any coordination.
816  */
817 #define kpt (vpt+VPTX(KMAP))
818 #define NKPT (KMAPSIZE/BY2PG)
819 
820 KMap*
kmap(Page * page)821 kmap(Page *page)
822 {
823 	int i, o, s;
824 
825 	if(up == nil)
826 		panic("kmap: up=0 pc=%#.8lux", getcallerpc(&page));
827 	if(up->mmupdb == nil)
828 		upallocpdb();
829 	if(up->nkmap < 0)
830 		panic("kmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
831 
832 	/*
833 	 * Splhi shouldn't be necessary here, but paranoia reigns.
834 	 * See comment in putmmu above.
835 	 */
836 	s = splhi();
837 	up->nkmap++;
838 	if(!(vpd[PDX(KMAP)]&PTEVALID)){
839 		/* allocate page directory */
840 		if(KMAPSIZE > BY2XPG)
841 			panic("bad kmapsize");
842 		if(up->kmaptable != nil)
843 			panic("kmaptable");
844 		spllo();
845 		up->kmaptable = newpage(0, 0, 0);
846 		splhi();
847 		vpd[PDX(KMAP)] = up->kmaptable->pa|PTEWRITE|PTEVALID;
848 		flushpg((ulong)kpt);
849 		memset(kpt, 0, BY2PG);
850 		kpt[0] = page->pa|PTEWRITE|PTEVALID;
851 		up->lastkmap = 0;
852 		splx(s);
853 		return (KMap*)KMAP;
854 	}
855 	if(up->kmaptable == nil)
856 		panic("no kmaptable");
857 	o = up->lastkmap+1;
858 	for(i=0; i<NKPT; i++){
859 		if(kpt[(i+o)%NKPT] == 0){
860 			o = (i+o)%NKPT;
861 			kpt[o] = page->pa|PTEWRITE|PTEVALID;
862 			up->lastkmap = o;
863 			splx(s);
864 			return (KMap*)(KMAP+o*BY2PG);
865 		}
866 	}
867 	panic("out of kmap");
868 	return nil;
869 }
870 
871 void
kunmap(KMap * k)872 kunmap(KMap *k)
873 {
874 	ulong va;
875 
876 	va = (ulong)k;
877 	if(up->mmupdb == nil || !(vpd[PDX(KMAP)]&PTEVALID))
878 		panic("kunmap: no kmaps");
879 	if(va < KMAP || va >= KMAP+KMAPSIZE)
880 		panic("kunmap: bad address %#.8lux pc=%#p", va, getcallerpc(&k));
881 	if(!(vpt[VPTX(va)]&PTEVALID))
882 		panic("kunmap: not mapped %#.8lux pc=%#p", va, getcallerpc(&k));
883 	up->nkmap--;
884 	if(up->nkmap < 0)
885 		panic("kunmap %lud %s: nkmap=%d", up->pid, up->text, up->nkmap);
886 	vpt[VPTX(va)] = 0;
887 	flushpg(va);
888 }
889 
890 /*
891  * Temporary one-page mapping used to edit page directories.
892  *
893  * The fasttmp #define controls whether the code optimizes
894  * the case where the page is already mapped in the physical
895  * memory window.
896  */
897 #define fasttmp 1
898 
899 void*
tmpmap(Page * p)900 tmpmap(Page *p)
901 {
902 	ulong i;
903 	ulong *entry;
904 
905 	if(islo())
906 		panic("tmpaddr: islo");
907 
908 	if(fasttmp && p->pa < -KZERO)
909 		return KADDR(p->pa);
910 
911 	/*
912 	 * PDX(TMPADDR) == PDX(MACHADDR), so this
913 	 * entry is private to the processor and shared
914 	 * between up->mmupdb (if any) and m->pdb.
915 	 */
916 	entry = &vpt[VPTX(TMPADDR)];
917 	if(!(*entry&PTEVALID)){
918 		for(i=KZERO; i<=CPU0MACH; i+=BY2PG)
919 			print("%#p: *%#p=%#p (vpt=%#p index=%#p)\n", i, &vpt[VPTX(i)], vpt[VPTX(i)], vpt, VPTX(i));
920 		panic("tmpmap: no entry");
921 	}
922 	if(PPN(*entry) != PPN(TMPADDR-KZERO))
923 		panic("tmpmap: already mapped entry=%#.8lux", *entry);
924 	*entry = p->pa|PTEWRITE|PTEVALID;
925 	flushpg(TMPADDR);
926 	return (void*)TMPADDR;
927 }
928 
929 void
tmpunmap(void * v)930 tmpunmap(void *v)
931 {
932 	ulong *entry;
933 
934 	if(islo())
935 		panic("tmpaddr: islo");
936 	if(fasttmp && (ulong)v >= KZERO && v != (void*)TMPADDR)
937 		return;
938 	if(v != (void*)TMPADDR)
939 		panic("tmpunmap: bad address");
940 	entry = &vpt[VPTX(TMPADDR)];
941 	if(!(*entry&PTEVALID) || PPN(*entry) == PPN(PADDR(TMPADDR)))
942 		panic("tmpmap: not mapped entry=%#.8lux", *entry);
943 	*entry = PPN(TMPADDR-KZERO)|PTEWRITE|PTEVALID;
944 	flushpg(TMPADDR);
945 }
946 
947 /*
948  * These could go back to being macros once the kernel is debugged,
949  * but the extra checking is nice to have.
950  */
951 void*
kaddr(ulong pa)952 kaddr(ulong pa)
953 {
954 	if(pa > (ulong)-KZERO)
955 		panic("kaddr: pa=%#.8lux > -KZERO pc=%#p", pa, getcallerpc(&pa));
956 	return (void*)(pa | KZERO);
957 }
958 
959 ulong
paddr(void * v)960 paddr(void *v)
961 {
962 	ulong va;
963 
964 	va = (ulong)v;
965 	if(va < KZERO)
966 		panic("paddr: va=%#.8lux < KZERO pc=%#p", va, getcallerpc(&v));
967 	return va & ~KSEGM;
968 }
969 
970 /*
971  * More debugging.
972  */
973 void
countpagerefs(ulong * ref,int print)974 countpagerefs(ulong *ref, int print)
975 {
976 	USED(ref, print);
977 }
978 
979 void
checkfault(ulong,ulong)980 checkfault(ulong, ulong)
981 {
982 }
983 
984 /*
985  * Return the number of bytes that can be accessed via KADDR(pa).
986  * If pa is not a valid argument to KADDR, return 0.
987  */
988 ulong
cankaddr(ulong pa)989 cankaddr(ulong pa)
990 {
991 	if(pa >= -KZERO)
992 		return 0;
993 	return -KZERO - pa;
994 }
995 
996