xref: /onnv-gate/usr/src/uts/i86pc/vm/kboot_mmu.c (revision 3446:5903aece022d)
1*3446Smrj /*
2*3446Smrj  * CDDL HEADER START
3*3446Smrj  *
4*3446Smrj  * The contents of this file are subject to the terms of the
5*3446Smrj  * Common Development and Distribution License (the "License").
6*3446Smrj  * You may not use this file except in compliance with the License.
7*3446Smrj  *
8*3446Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*3446Smrj  * or http://www.opensolaris.org/os/licensing.
10*3446Smrj  * See the License for the specific language governing permissions
11*3446Smrj  * and limitations under the License.
12*3446Smrj  *
13*3446Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14*3446Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*3446Smrj  * If applicable, add the following below this CDDL HEADER, with the
16*3446Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17*3446Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18*3446Smrj  *
19*3446Smrj  * CDDL HEADER END
20*3446Smrj  */
21*3446Smrj 
22*3446Smrj /*
23*3446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24*3446Smrj  * Use is subject to license terms.
25*3446Smrj  */
26*3446Smrj 
27*3446Smrj #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*3446Smrj 
29*3446Smrj #include <sys/types.h>
30*3446Smrj #include <sys/systm.h>
31*3446Smrj #include <sys/archsystm.h>
32*3446Smrj #include <sys/debug.h>
33*3446Smrj #include <sys/bootconf.h>
34*3446Smrj #include <sys/bootsvcs.h>
35*3446Smrj #include <sys/bootinfo.h>
36*3446Smrj #include <sys/mman.h>
37*3446Smrj #include <sys/cmn_err.h>
38*3446Smrj #include <sys/param.h>
39*3446Smrj #include <sys/machparam.h>
40*3446Smrj #include <sys/machsystm.h>
41*3446Smrj #include <sys/promif.h>
42*3446Smrj #include <sys/kobj.h>
43*3446Smrj #include <vm/kboot_mmu.h>
44*3446Smrj #include <vm/hat_pte.h>
45*3446Smrj #include <vm/hat_i86.h>
46*3446Smrj #include <vm/seg_kmem.h>
47*3446Smrj 
48*3446Smrj #if 0
49*3446Smrj /*
50*3446Smrj  * Joe's debug printing
51*3446Smrj  */
52*3446Smrj #define	DBG(x)    \
53*3446Smrj 	bop_printf(NULL, "boot_mmu.c: %s is %" PRIx64 "\n", #x, (uint64_t)(x));
54*3446Smrj #else
55*3446Smrj #define	DBG(x)	/* naught */
56*3446Smrj #endif
57*3446Smrj 
58*3446Smrj /*
59*3446Smrj  * Page table and memory stuff.
60*3446Smrj  */
61*3446Smrj static caddr_t window;
62*3446Smrj static caddr_t pte_to_window;
63*3446Smrj 
64*3446Smrj /*
65*3446Smrj  * this are needed by mmu_init()
66*3446Smrj  */
67*3446Smrj int kbm_nx_support = 0;		/* NX bit in PTEs is in use */
68*3446Smrj int kbm_pae_support = 0;	/* PAE is 64 bit Page table entries */
69*3446Smrj int kbm_pge_support = 0;	/* PGE is Page table global bit enabled */
70*3446Smrj int kbm_largepage_support = 0;
71*3446Smrj uint_t kbm_nucleus_size = 0;
72*3446Smrj 
73*3446Smrj #define	BOOT_SHIFT(l)	(shift_amt[l])
74*3446Smrj #define	BOOT_SZ(l)	((size_t)1 << BOOT_SHIFT(l))
75*3446Smrj #define	BOOT_OFFSET(l)	(BOOT_SZ(l) - 1)
76*3446Smrj #define	BOOT_MASK(l)	(~BOOT_OFFSET(l))
77*3446Smrj 
78*3446Smrj /*
79*3446Smrj  * Initialize memory management parameters for boot time page table management
80*3446Smrj  */
81*3446Smrj void
82*3446Smrj kbm_init(struct xboot_info *bi)
83*3446Smrj {
84*3446Smrj 	/*
85*3446Smrj 	 * configure mmu information
86*3446Smrj 	 */
87*3446Smrj 	kbm_nucleus_size = (uintptr_t)bi->bi_kseg_size;
88*3446Smrj 	kbm_largepage_support = bi->bi_use_largepage;
89*3446Smrj 	kbm_nx_support = bi->bi_use_nx;
90*3446Smrj 	kbm_pae_support = bi->bi_use_pae;
91*3446Smrj 	kbm_pge_support = bi->bi_use_pge;
92*3446Smrj 	window = bi->bi_pt_window;
93*3446Smrj 	DBG(window);
94*3446Smrj 	pte_to_window = bi->bi_pte_to_pt_window;
95*3446Smrj 	DBG(pte_to_window);
96*3446Smrj 	if (kbm_pae_support) {
97*3446Smrj 		shift_amt = shift_amt_pae;
98*3446Smrj 		ptes_per_table = 512;
99*3446Smrj 		pte_size = 8;
100*3446Smrj 		lpagesize = TWO_MEG;
101*3446Smrj #ifdef __amd64
102*3446Smrj 		top_level = 3;
103*3446Smrj #else
104*3446Smrj 		top_level = 2;
105*3446Smrj #endif
106*3446Smrj 	} else {
107*3446Smrj 		shift_amt = shift_amt_nopae;
108*3446Smrj 		ptes_per_table = 1024;
109*3446Smrj 		pte_size = 4;
110*3446Smrj 		lpagesize = FOUR_MEG;
111*3446Smrj 		top_level = 1;
112*3446Smrj 	}
113*3446Smrj 
114*3446Smrj 	top_page_table = bi->bi_top_page_table;
115*3446Smrj 	DBG(top_page_table);
116*3446Smrj }
117*3446Smrj 
118*3446Smrj /*
119*3446Smrj  * Change the addressible page table window to point at a given page
120*3446Smrj  */
121*3446Smrj /*ARGSUSED*/
122*3446Smrj void *
123*3446Smrj kbm_remap_window(paddr_t physaddr, int writeable)
124*3446Smrj {
125*3446Smrj 	uint_t pt_bits = PT_NOCONSIST | PT_VALID | PT_WRITABLE;
126*3446Smrj 
127*3446Smrj 	DBG(physaddr);
128*3446Smrj 
129*3446Smrj 	if (kbm_pae_support)
130*3446Smrj 		*((x86pte_t *)pte_to_window) = physaddr | pt_bits;
131*3446Smrj 	else
132*3446Smrj 		*((x86pte32_t *)pte_to_window) = physaddr | pt_bits;
133*3446Smrj 	mmu_tlbflush_entry(window);
134*3446Smrj 	DBG(window);
135*3446Smrj 	return (window);
136*3446Smrj }
137*3446Smrj 
138*3446Smrj /*
139*3446Smrj  * Add a mapping for the physical page at the given virtual address.
140*3446Smrj  */
141*3446Smrj void
142*3446Smrj kbm_map(uintptr_t va, paddr_t pa, uint_t level, uint_t is_kernel)
143*3446Smrj {
144*3446Smrj 	x86pte_t *ptep;
145*3446Smrj 	paddr_t pte_physaddr;
146*3446Smrj 	x86pte_t pteval;
147*3446Smrj 
148*3446Smrj 	if (khat_running)
149*3446Smrj 		panic("kbm_map() called too late");
150*3446Smrj 
151*3446Smrj 	pteval = pa_to_ma(pa) | PT_NOCONSIST | PT_VALID | PT_WRITABLE;
152*3446Smrj 	if (level == 1)
153*3446Smrj 		pteval |= PT_PAGESIZE;
154*3446Smrj 	if (kbm_pge_support && is_kernel)
155*3446Smrj 		pteval |= PT_GLOBAL;
156*3446Smrj 
157*3446Smrj 	/*
158*3446Smrj 	 * Find the pte that will map this address. This creates any
159*3446Smrj 	 * missing intermediate level page tables.
160*3446Smrj 	 */
161*3446Smrj 	ptep = find_pte(va, &pte_physaddr, level, 0);
162*3446Smrj 	if (ptep == NULL)
163*3446Smrj 		bop_panic("kbm_map: find_pte returned NULL");
164*3446Smrj 
165*3446Smrj 	if (kbm_pae_support)
166*3446Smrj 		*ptep = pteval;
167*3446Smrj 	else
168*3446Smrj 		*((x86pte32_t *)ptep) = pteval;
169*3446Smrj 	mmu_tlbflush_entry((caddr_t)va);
170*3446Smrj }
171*3446Smrj 
172*3446Smrj /*
173*3446Smrj  * Probe the boot time page tables to find the first mapping
174*3446Smrj  * including va (or higher) and return non-zero if one is found.
175*3446Smrj  * va is updated to the starting address and len to the pagesize.
176*3446Smrj  * pp will be set to point to the 1st page_t of the mapped page(s).
177*3446Smrj  *
178*3446Smrj  * Note that if va is in the middle of a large page, the returned va
179*3446Smrj  * will be less than what was asked for.
180*3446Smrj  */
181*3446Smrj int
182*3446Smrj kbm_probe(uintptr_t *va, size_t *len, pfn_t *pfn, uint_t *prot)
183*3446Smrj {
184*3446Smrj 	uintptr_t	probe_va;
185*3446Smrj 	x86pte_t	*ptep;
186*3446Smrj 	paddr_t		pte_physaddr;
187*3446Smrj 	x86pte_t	pte_val;
188*3446Smrj 	level_t		l;
189*3446Smrj 
190*3446Smrj 	if (khat_running)
191*3446Smrj 		panic("kbm_probe() called too late");
192*3446Smrj 	*len = 0;
193*3446Smrj 	*pfn = PFN_INVALID;
194*3446Smrj 	*prot = 0;
195*3446Smrj 	probe_va = *va;
196*3446Smrj restart_new_va:
197*3446Smrj 	l = top_level;
198*3446Smrj 	for (;;) {
199*3446Smrj 		if (IN_VA_HOLE(probe_va))
200*3446Smrj 			probe_va = mmu.hole_end;
201*3446Smrj 
202*3446Smrj 		if (IN_HYPERVISOR_VA(probe_va))
203*3446Smrj 			return (0);
204*3446Smrj 
205*3446Smrj 		/*
206*3446Smrj 		 * If we don't have a valid PTP/PTE at this level
207*3446Smrj 		 * then we can bump VA by this level's pagesize and try again.
208*3446Smrj 		 * When the probe_va wraps around, we are done.
209*3446Smrj 		 */
210*3446Smrj 		ptep = find_pte(probe_va, &pte_physaddr, l, 1);
211*3446Smrj 		if (ptep == NULL)
212*3446Smrj 			bop_panic("kbm_probe: find_pte returned NULL");
213*3446Smrj 		if (kbm_pae_support)
214*3446Smrj 			pte_val = *ptep;
215*3446Smrj 		else
216*3446Smrj 			pte_val = *((x86pte32_t *)ptep);
217*3446Smrj 		if (!PTE_ISVALID(pte_val)) {
218*3446Smrj 			probe_va = (probe_va & BOOT_MASK(l)) + BOOT_SZ(l);
219*3446Smrj 			if (probe_va <= *va)
220*3446Smrj 				return (0);
221*3446Smrj 			goto restart_new_va;
222*3446Smrj 		}
223*3446Smrj 
224*3446Smrj 		/*
225*3446Smrj 		 * If this entry is a pointer to a lower level page table
226*3446Smrj 		 * go down to it.
227*3446Smrj 		 */
228*3446Smrj 		if (!PTE_ISPAGE(pte_val, l)) {
229*3446Smrj 			ASSERT(l > 0);
230*3446Smrj 			--l;
231*3446Smrj 			continue;
232*3446Smrj 		}
233*3446Smrj 
234*3446Smrj 		/*
235*3446Smrj 		 * We found a boot level page table entry
236*3446Smrj 		 */
237*3446Smrj 		*len = BOOT_SZ(l);
238*3446Smrj 		*va = probe_va & ~(*len - 1);
239*3446Smrj 		*pfn = PTE2PFN(pte_val, l);
240*3446Smrj 
241*3446Smrj 
242*3446Smrj 		*prot = PROT_READ | PROT_EXEC;
243*3446Smrj 		if (PTE_GET(pte_val, PT_WRITABLE))
244*3446Smrj 			*prot |= PROT_WRITE;
245*3446Smrj 
246*3446Smrj 		/*
247*3446Smrj 		 * pt_nx is cleared if processor doesn't support NX bit
248*3446Smrj 		 */
249*3446Smrj 		if (PTE_GET(pte_val, mmu.pt_nx))
250*3446Smrj 			*prot &= ~PROT_EXEC;
251*3446Smrj 
252*3446Smrj 		return (1);
253*3446Smrj 	}
254*3446Smrj }
255*3446Smrj 
256*3446Smrj 
257*3446Smrj /*
258*3446Smrj  * Destroy a boot loader page table 4K mapping.
259*3446Smrj  */
260*3446Smrj void
261*3446Smrj kbm_unmap(uintptr_t va)
262*3446Smrj {
263*3446Smrj 	if (khat_running)
264*3446Smrj 		panic("kbm_unmap() called too late");
265*3446Smrj 	else {
266*3446Smrj 		x86pte_t *ptep;
267*3446Smrj 		level_t	level = 0;
268*3446Smrj 		uint_t  probe_only = 1;
269*3446Smrj 
270*3446Smrj 		ptep = find_pte(va, NULL, level, probe_only);
271*3446Smrj 		if (ptep == NULL)
272*3446Smrj 			return;
273*3446Smrj 
274*3446Smrj 		if (kbm_pae_support)
275*3446Smrj 			*ptep = 0;
276*3446Smrj 		else
277*3446Smrj 			*((x86pte32_t *)ptep) = 0;
278*3446Smrj 		mmu_tlbflush_entry((caddr_t)va);
279*3446Smrj 	}
280*3446Smrj }
281*3446Smrj 
282*3446Smrj 
283*3446Smrj /*
284*3446Smrj  * Change a boot loader page table 4K mapping.
285*3446Smrj  * Returns the pfn of the old mapping.
286*3446Smrj  */
287*3446Smrj pfn_t
288*3446Smrj kbm_remap(uintptr_t va, pfn_t pfn)
289*3446Smrj {
290*3446Smrj 	x86pte_t *ptep;
291*3446Smrj 	level_t	level = 0;
292*3446Smrj 	uint_t  probe_only = 1;
293*3446Smrj 	x86pte_t pte_val = pa_to_ma(pfn_to_pa(pfn)) | PT_WRITABLE |
294*3446Smrj 	    PT_NOCONSIST | PT_VALID;
295*3446Smrj 	x86pte_t old_pte;
296*3446Smrj 
297*3446Smrj 	if (khat_running)
298*3446Smrj 		panic("kbm_remap() called too late");
299*3446Smrj 	ptep = find_pte(va, NULL, level, probe_only);
300*3446Smrj 	if (ptep == NULL)
301*3446Smrj 		bop_panic("kbm_remap: find_pte returned NULL");
302*3446Smrj 
303*3446Smrj 	if (kbm_pae_support)
304*3446Smrj 		old_pte = *ptep;
305*3446Smrj 	else
306*3446Smrj 		old_pte = *((x86pte32_t *)ptep);
307*3446Smrj 
308*3446Smrj 	if (kbm_pae_support)
309*3446Smrj 		*((x86pte_t *)ptep) = pte_val;
310*3446Smrj 	else
311*3446Smrj 		*((x86pte32_t *)ptep) = pte_val;
312*3446Smrj 	mmu_tlbflush_entry((caddr_t)va);
313*3446Smrj 
314*3446Smrj 	if (!(old_pte & PT_VALID) || ma_to_pa(old_pte) == -1)
315*3446Smrj 		return (PFN_INVALID);
316*3446Smrj 	return (mmu_btop(ma_to_pa(old_pte)));
317*3446Smrj }
318*3446Smrj 
319*3446Smrj 
320*3446Smrj /*
321*3446Smrj  * Change a boot loader page table 4K mapping to read only.
322*3446Smrj  */
323*3446Smrj void
324*3446Smrj kbm_read_only(uintptr_t va, paddr_t pa)
325*3446Smrj {
326*3446Smrj 	x86pte_t pte_val = pa_to_ma(pa) |
327*3446Smrj 	    PT_NOCONSIST | PT_REF | PT_MOD | PT_VALID;
328*3446Smrj 	x86pte_t *ptep;
329*3446Smrj 	level_t	level = 0;
330*3446Smrj 
331*3446Smrj 	ptep = find_pte(va, NULL, level, 0);
332*3446Smrj 	if (ptep == NULL)
333*3446Smrj 		bop_panic("kbm_read_only: find_pte returned NULL");
334*3446Smrj 
335*3446Smrj 	if (kbm_pae_support)
336*3446Smrj 		*ptep = pte_val;
337*3446Smrj 	else
338*3446Smrj 		*((x86pte32_t *)ptep) = pte_val;
339*3446Smrj 	mmu_tlbflush_entry((caddr_t)va);
340*3446Smrj }
341*3446Smrj 
342*3446Smrj /*
343*3446Smrj  * interfaces for kernel debugger to access physical memory
344*3446Smrj  */
345*3446Smrj static x86pte_t save_pte;
346*3446Smrj 
347*3446Smrj void *
348*3446Smrj kbm_push(paddr_t pa)
349*3446Smrj {
350*3446Smrj 	static int first_time = 1;
351*3446Smrj 
352*3446Smrj 	if (first_time) {
353*3446Smrj 		first_time = 0;
354*3446Smrj 		return (window);
355*3446Smrj 	}
356*3446Smrj 
357*3446Smrj 	if (kbm_pae_support)
358*3446Smrj 		save_pte = *((x86pte_t *)pte_to_window);
359*3446Smrj 	else
360*3446Smrj 		save_pte = *((x86pte32_t *)pte_to_window);
361*3446Smrj 	return (kbm_remap_window(pa, 0));
362*3446Smrj }
363*3446Smrj 
364*3446Smrj void
365*3446Smrj kbm_pop(void)
366*3446Smrj {
367*3446Smrj 	if (kbm_pae_support)
368*3446Smrj 		*((x86pte_t *)pte_to_window) = save_pte;
369*3446Smrj 	else
370*3446Smrj 		*((x86pte32_t *)pte_to_window) = save_pte;
371*3446Smrj 	mmu_tlbflush_entry(window);
372*3446Smrj }
373*3446Smrj 
374*3446Smrj x86pte_t
375*3446Smrj get_pteval(paddr_t table, uint_t index)
376*3446Smrj {
377*3446Smrj 	void *table_ptr = kbm_remap_window(table, 0);
378*3446Smrj 
379*3446Smrj 	if (kbm_pae_support)
380*3446Smrj 		return (((x86pte_t *)table_ptr)[index]);
381*3446Smrj 	return (((x86pte32_t *)table_ptr)[index]);
382*3446Smrj }
383*3446Smrj 
384*3446Smrj void
385*3446Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
386*3446Smrj {
387*3446Smrj 	void *table_ptr = kbm_remap_window(table, 0);
388*3446Smrj 	if (kbm_pae_support)
389*3446Smrj 		((x86pte_t *)table_ptr)[index] = pteval;
390*3446Smrj 	else
391*3446Smrj 		((x86pte32_t *)table_ptr)[index] = pteval;
392*3446Smrj 	if (level == top_level && level == 2)
393*3446Smrj 		reload_cr3();
394*3446Smrj }
395*3446Smrj 
396*3446Smrj paddr_t
397*3446Smrj make_ptable(x86pte_t *pteval, uint_t level)
398*3446Smrj {
399*3446Smrj 	paddr_t new_table;
400*3446Smrj 	void *table_ptr;
401*3446Smrj 
402*3446Smrj 	new_table = do_bop_phys_alloc(MMU_PAGESIZE, MMU_PAGESIZE);
403*3446Smrj 	table_ptr = kbm_remap_window(new_table, 1);
404*3446Smrj 	bzero(table_ptr, MMU_PAGESIZE);
405*3446Smrj 
406*3446Smrj 	if (level == top_level && level == 2)
407*3446Smrj 		*pteval = pa_to_ma(new_table) | PT_VALID;
408*3446Smrj 	else
409*3446Smrj 		*pteval = pa_to_ma(new_table) |
410*3446Smrj 		    PT_VALID | PT_REF | PT_USER | PT_WRITABLE;
411*3446Smrj 
412*3446Smrj 	return (new_table);
413*3446Smrj }
414*3446Smrj 
415*3446Smrj x86pte_t *
416*3446Smrj map_pte(paddr_t table, uint_t index)
417*3446Smrj {
418*3446Smrj 	void *table_ptr = kbm_remap_window(table, 0);
419*3446Smrj 	return ((x86pte_t *)((caddr_t)table_ptr + index * pte_size));
420*3446Smrj }
421