xref: /onnv-gate/usr/src/uts/i86pc/vm/i86_mmu.c (revision 11474:857f9db4ef05)
13446Smrj /*
23446Smrj  * CDDL HEADER START
33446Smrj  *
43446Smrj  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
73446Smrj  *
83446Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93446Smrj  * or http://www.opensolaris.org/os/licensing.
103446Smrj  * See the License for the specific language governing permissions
113446Smrj  * and limitations under the License.
123446Smrj  *
133446Smrj  * When distributing Covered Code, include this CDDL HEADER in each
143446Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153446Smrj  * If applicable, add the following below this CDDL HEADER, with the
163446Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
173446Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
183446Smrj  *
193446Smrj  * CDDL HEADER END
203446Smrj  */
213446Smrj /*
22*11474SJonathan.Adams@Sun.COM  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
233446Smrj  * Use is subject to license terms.
243446Smrj  */
253446Smrj 
263446Smrj #include <sys/t_lock.h>
273446Smrj #include <sys/memlist.h>
283446Smrj #include <sys/cpuvar.h>
293446Smrj #include <sys/vmem.h>
303446Smrj #include <sys/mman.h>
313446Smrj #include <sys/vm.h>
323446Smrj #include <sys/kmem.h>
333446Smrj #include <sys/cmn_err.h>
343446Smrj #include <sys/debug.h>
353446Smrj #include <sys/vm_machparam.h>
363446Smrj #include <sys/tss.h>
373446Smrj #include <sys/vnode.h>
383446Smrj #include <vm/hat.h>
393446Smrj #include <vm/anon.h>
403446Smrj #include <vm/as.h>
413446Smrj #include <vm/page.h>
423446Smrj #include <vm/seg.h>
433446Smrj #include <vm/seg_kmem.h>
443446Smrj #include <vm/seg_map.h>
453446Smrj #include <vm/hat_i86.h>
463446Smrj #include <sys/promif.h>
473446Smrj #include <sys/x86_archext.h>
483446Smrj #include <sys/systm.h>
493446Smrj #include <sys/archsystm.h>
503446Smrj #include <sys/sunddi.h>
513446Smrj #include <sys/ddidmareq.h>
523446Smrj #include <sys/controlregs.h>
533446Smrj #include <sys/reboot.h>
543446Smrj #include <sys/kdi.h>
553446Smrj #include <sys/bootconf.h>
563446Smrj #include <sys/bootsvcs.h>
573446Smrj #include <sys/bootinfo.h>
583446Smrj #include <vm/kboot_mmu.h>
593446Smrj 
605084Sjohnlev #ifdef __xpv
615084Sjohnlev #include <sys/hypervisor.h>
625084Sjohnlev #endif
635084Sjohnlev 
643446Smrj caddr_t
i86devmap(pfn_t pf,pgcnt_t pgcnt,uint_t prot)653446Smrj i86devmap(pfn_t pf, pgcnt_t pgcnt, uint_t prot)
663446Smrj {
673446Smrj 	caddr_t addr;
683446Smrj 	caddr_t addr1;
693446Smrj 	page_t *pp;
703446Smrj 
713446Smrj 	addr1 = addr = vmem_alloc(heap_arena, mmu_ptob(pgcnt), VM_SLEEP);
723446Smrj 
733446Smrj 	for (; pgcnt != 0; addr += MMU_PAGESIZE, ++pf, --pgcnt) {
743446Smrj 		pp = page_numtopp_nolock(pf);
753446Smrj 		if (pp == NULL) {
763446Smrj 			hat_devload(kas.a_hat, addr, MMU_PAGESIZE, pf,
773446Smrj 			    prot | HAT_NOSYNC, HAT_LOAD_LOCK);
783446Smrj 		} else {
793446Smrj 			hat_memload(kas.a_hat, addr, pp,
803446Smrj 			    prot | HAT_NOSYNC, HAT_LOAD_LOCK);
813446Smrj 		}
823446Smrj 	}
833446Smrj 
843446Smrj 	return (addr1);
853446Smrj }
863446Smrj 
873446Smrj /*
883446Smrj  * This routine is like page_numtopp, but accepts only free pages, which
893446Smrj  * it allocates (unfrees) and returns with the exclusive lock held.
903446Smrj  * It is used by machdep.c/dma_init() to find contiguous free pages.
913446Smrj  *
923446Smrj  * XXX this and some others should probably be in vm_machdep.c
933446Smrj  */
943446Smrj page_t *
page_numtopp_alloc(pfn_t pfnum)953446Smrj page_numtopp_alloc(pfn_t pfnum)
963446Smrj {
973446Smrj 	page_t *pp;
983446Smrj 
993446Smrj retry:
1003446Smrj 	pp = page_numtopp_nolock(pfnum);
1013446Smrj 	if (pp == NULL) {
1023446Smrj 		return (NULL);
1033446Smrj 	}
1043446Smrj 
1053446Smrj 	if (!page_trylock(pp, SE_EXCL)) {
1063446Smrj 		return (NULL);
1073446Smrj 	}
1083446Smrj 
1093446Smrj 	if (page_pptonum(pp) != pfnum) {
1103446Smrj 		page_unlock(pp);
1113446Smrj 		goto retry;
1123446Smrj 	}
1133446Smrj 
1143446Smrj 	if (!PP_ISFREE(pp)) {
1153446Smrj 		page_unlock(pp);
1163446Smrj 		return (NULL);
1173446Smrj 	}
1183446Smrj 	if (pp->p_szc) {
1193446Smrj 		page_demote_free_pages(pp);
1203446Smrj 		page_unlock(pp);
1213446Smrj 		goto retry;
1223446Smrj 	}
1233446Smrj 
1243446Smrj 	/* If associated with a vnode, destroy mappings */
1253446Smrj 
1263446Smrj 	if (pp->p_vnode) {
1273446Smrj 
1283446Smrj 		page_destroy_free(pp);
1293446Smrj 
1303446Smrj 		if (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_NO_RECLAIM)) {
1313446Smrj 			return (NULL);
1323446Smrj 		}
1333446Smrj 
1343446Smrj 		if (page_pptonum(pp) != pfnum) {
1353446Smrj 			page_unlock(pp);
1363446Smrj 			goto retry;
1373446Smrj 		}
1383446Smrj 	}
1393446Smrj 
1405159Sjohnlev 	if (!PP_ISFREE(pp)) {
1413446Smrj 		page_unlock(pp);
1423446Smrj 		return (NULL);
1433446Smrj 	}
1443446Smrj 
1455159Sjohnlev 	if (!page_reclaim(pp, (kmutex_t *)NULL))
1465159Sjohnlev 		return (NULL);
1475159Sjohnlev 
1483446Smrj 	return (pp);
1493446Smrj }
1503446Smrj 
1513446Smrj /*
1523446Smrj  * Flag is not set early in boot. Once it is set we are no longer
1533446Smrj  * using boot's page tables.
1543446Smrj  */
1553446Smrj uint_t khat_running = 0;
1563446Smrj 
1573446Smrj /*
1583446Smrj  * This procedure is callable only while the boot loader is in charge of the
1593446Smrj  * MMU. It assumes that PA == VA for page table pointers.  It doesn't live in
1603446Smrj  * kboot_mmu.c since it's used from common code.
1613446Smrj  */
1623446Smrj pfn_t
va_to_pfn(void * vaddr)1633446Smrj va_to_pfn(void *vaddr)
1643446Smrj {
1653446Smrj 	uintptr_t	des_va = ALIGN2PAGE(vaddr);
1663446Smrj 	uintptr_t	va = des_va;
1673446Smrj 	size_t		len;
1683446Smrj 	uint_t		prot;
1693446Smrj 	pfn_t		pfn;
1703446Smrj 
1713446Smrj 	if (khat_running)
1723446Smrj 		panic("va_to_pfn(): called too late\n");
1733446Smrj 
1743446Smrj 	if (kbm_probe(&va, &len, &pfn, &prot) == 0)
1753446Smrj 		return (PFN_INVALID);
1763446Smrj 	if (va > des_va)
1773446Smrj 		return (PFN_INVALID);
1783446Smrj 	if (va < des_va)
1793446Smrj 		pfn += mmu_btop(des_va - va);
1803446Smrj 	return (pfn);
1813446Smrj }
1823446Smrj 
1833446Smrj /*
1843446Smrj  * Initialize a special area in the kernel that always holds some PTEs for
1853446Smrj  * faster performance. This always holds segmap's PTEs.
1863446Smrj  * In the 32 bit kernel this maps the kernel heap too.
1873446Smrj  */
1883446Smrj void
hat_kmap_init(uintptr_t base,size_t len)1893446Smrj hat_kmap_init(uintptr_t base, size_t len)
1903446Smrj {
1913446Smrj 	uintptr_t map_addr;	/* base rounded down to large page size */
1923446Smrj 	uintptr_t map_eaddr;	/* base + len rounded up */
1933446Smrj 	size_t map_len;
1943446Smrj 	caddr_t ptes;		/* mapping area in kernel for kmap ptes */
1953446Smrj 	size_t window_size;	/* size of mapping area for ptes */
1963446Smrj 	ulong_t htable_cnt;	/* # of page tables to cover map_len */
1973446Smrj 	ulong_t i;
1983446Smrj 	htable_t *ht;
1993446Smrj 	uintptr_t va;
2003446Smrj 
2013446Smrj 	/*
2023446Smrj 	 * We have to map in an area that matches an entire page table.
2035084Sjohnlev 	 * The PTEs are large page aligned to avoid spurious pagefaults
2045084Sjohnlev 	 * on the hypervisor.
2053446Smrj 	 */
2063446Smrj 	map_addr = base & LEVEL_MASK(1);
2073446Smrj 	map_eaddr = (base + len + LEVEL_SIZE(1) - 1) & LEVEL_MASK(1);
2083446Smrj 	map_len = map_eaddr - map_addr;
2093446Smrj 	window_size = mmu_btop(map_len) * mmu.pte_size;
2103446Smrj 	window_size = (window_size + LEVEL_SIZE(1)) & LEVEL_MASK(1);
2113446Smrj 	htable_cnt = map_len >> LEVEL_SHIFT(1);
2123446Smrj 
2133446Smrj 	/*
2143446Smrj 	 * allocate vmem for the kmap_ptes
2153446Smrj 	 */
2163446Smrj 	ptes = vmem_xalloc(heap_arena, window_size, LEVEL_SIZE(1), 0,
2173446Smrj 	    0, NULL, NULL, VM_SLEEP);
2183446Smrj 	mmu.kmap_htables =
2193446Smrj 	    kmem_alloc(htable_cnt * sizeof (htable_t *), KM_SLEEP);
2203446Smrj 
2213446Smrj 	/*
2223446Smrj 	 * Map the page tables that cover kmap into the allocated range.
2233446Smrj 	 * Note we don't ever htable_release() the kmap page tables - they
2243446Smrj 	 * can't ever be stolen, freed, etc.
2253446Smrj 	 */
2263446Smrj 	for (va = map_addr, i = 0; i < htable_cnt; va += LEVEL_SIZE(1), ++i) {
2273446Smrj 		ht = htable_create(kas.a_hat, va, 0, NULL);
2283446Smrj 		if (ht == NULL)
2293446Smrj 			panic("hat_kmap_init: ht == NULL");
2303446Smrj 		mmu.kmap_htables[i] = ht;
2313446Smrj 
2323446Smrj 		hat_devload(kas.a_hat, ptes + i * MMU_PAGESIZE,
2333446Smrj 		    MMU_PAGESIZE, ht->ht_pfn,
2345084Sjohnlev #ifdef __xpv
2355084Sjohnlev 		    PROT_READ | HAT_NOSYNC | HAT_UNORDERED_OK,
2365084Sjohnlev #else
2373446Smrj 		    PROT_READ | PROT_WRITE | HAT_NOSYNC | HAT_UNORDERED_OK,
2385084Sjohnlev #endif
2393446Smrj 		    HAT_LOAD | HAT_LOAD_NOCONSIST);
2403446Smrj 	}
2413446Smrj 
2423446Smrj 	/*
2433446Smrj 	 * set information in mmu to activate handling of kmap
2443446Smrj 	 */
2453446Smrj 	mmu.kmap_addr = map_addr;
2463446Smrj 	mmu.kmap_eaddr = map_eaddr;
2473446Smrj 	mmu.kmap_ptes = (x86pte_t *)ptes;
2483446Smrj }
2493446Smrj 
2503446Smrj extern caddr_t	kpm_vbase;
2513446Smrj extern size_t	kpm_size;
2523446Smrj 
2535084Sjohnlev #ifdef __xpv
2545084Sjohnlev /*
2555084Sjohnlev  * Create the initial segkpm mappings for the hypervisor. To avoid having
2565084Sjohnlev  * to deal with page tables being read only, we make all mappings
2575084Sjohnlev  * read only at first.
2585084Sjohnlev  */
2595084Sjohnlev static void
xen_kpm_create(paddr_t paddr,level_t lvl)2605084Sjohnlev xen_kpm_create(paddr_t paddr, level_t lvl)
2615084Sjohnlev {
2625084Sjohnlev 	ulong_t pg_off;
2635084Sjohnlev 
2645084Sjohnlev 	for (pg_off = 0; pg_off < LEVEL_SIZE(lvl); pg_off += MMU_PAGESIZE) {
2655084Sjohnlev 		kbm_map((uintptr_t)kpm_vbase + paddr, (paddr_t)0, 0, 1);
2665084Sjohnlev 		kbm_read_only((uintptr_t)kpm_vbase + paddr + pg_off,
2675084Sjohnlev 		    paddr + pg_off);
2685084Sjohnlev 	}
2695084Sjohnlev }
2705084Sjohnlev 
2715084Sjohnlev /*
2725084Sjohnlev  * Try to make all kpm mappings writable. Failures are ok, as those
2735084Sjohnlev  * are just pagetable, GDT, etc. pages.
2745084Sjohnlev  */
2755084Sjohnlev static void
xen_kpm_finish_init(void)2765084Sjohnlev xen_kpm_finish_init(void)
2775084Sjohnlev {
2785084Sjohnlev 	pfn_t gdtpfn = mmu_btop(CPU->cpu_m.mcpu_gdtpa);
2795084Sjohnlev 	pfn_t pfn;
2805084Sjohnlev 	page_t *pp;
2815084Sjohnlev 
2825084Sjohnlev 	for (pfn = 0; pfn < mfn_count; ++pfn) {
2835084Sjohnlev 		/*
2845084Sjohnlev 		 * skip gdt
2855084Sjohnlev 		 */
2865084Sjohnlev 		if (pfn == gdtpfn)
2875084Sjohnlev 			continue;
2885084Sjohnlev 
2895084Sjohnlev 		/*
2905084Sjohnlev 		 * p_index is a hint that this is a pagetable
2915084Sjohnlev 		 */
2925084Sjohnlev 		pp = page_numtopp_nolock(pfn);
2935084Sjohnlev 		if (pp && pp->p_index) {
2945084Sjohnlev 			pp->p_index = 0;
2955084Sjohnlev 			continue;
2965084Sjohnlev 		}
2975084Sjohnlev 		(void) xen_kpm_page(pfn, PT_VALID | PT_WRITABLE);
2985084Sjohnlev 	}
2995084Sjohnlev }
3005084Sjohnlev #endif
3015084Sjohnlev 
3023446Smrj /*
3033446Smrj  * Routine to pre-allocate data structures for hat_kern_setup(). It computes
3043446Smrj  * how many pagetables it needs by walking the boot loader's page tables.
3053446Smrj  */
3063446Smrj /*ARGSUSED*/
3073446Smrj void
hat_kern_alloc(caddr_t segmap_base,size_t segmap_size,caddr_t ekernelheap)3083446Smrj hat_kern_alloc(
3093446Smrj 	caddr_t	segmap_base,
3103446Smrj 	size_t	segmap_size,
3113446Smrj 	caddr_t	ekernelheap)
3123446Smrj {
3133446Smrj 	uintptr_t	last_va = (uintptr_t)-1;	/* catch 1st time */
3143446Smrj 	uintptr_t	va = 0;
3153446Smrj 	size_t		size;
3163446Smrj 	pfn_t		pfn;
3173446Smrj 	uint_t		prot;
3183446Smrj 	uint_t		table_cnt = 1;
3193446Smrj 	uint_t		mapping_cnt;
3203446Smrj 	level_t		start_level;
3213446Smrj 	level_t		l;
3223446Smrj 	struct memlist	*pmem;
3233446Smrj 	level_t		lpagel = mmu.max_page_level;
3243446Smrj 	uint64_t	paddr;
3253446Smrj 	int64_t		psize;
3265084Sjohnlev 	int		nwindows;
3273446Smrj 
3283446Smrj 	if (kpm_size > 0) {
3293446Smrj 		/*
3305084Sjohnlev 		 * Create the kpm page tables.  When running on the
3315084Sjohnlev 		 * hypervisor these are made read/only at first.
3325084Sjohnlev 		 * Later we'll add write permission where possible.
3333446Smrj 		 */
334*11474SJonathan.Adams@Sun.COM 		for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
335*11474SJonathan.Adams@Sun.COM 			paddr = pmem->ml_address;
336*11474SJonathan.Adams@Sun.COM 			psize = pmem->ml_size;
3373446Smrj 			while (psize >= MMU_PAGESIZE) {
3385349Skchow 				/* find the largest page size */
3395349Skchow 				for (l = lpagel; l > 0; l--) {
3405349Skchow 					if ((paddr & LEVEL_OFFSET(l)) == 0 &&
3415349Skchow 					    psize > LEVEL_SIZE(l))
3425349Skchow 						break;
3435349Skchow 				}
3445349Skchow 
3455084Sjohnlev #if defined(__xpv)
3465084Sjohnlev 				/*
3475084Sjohnlev 				 * Create read/only mappings to avoid
3485084Sjohnlev 				 * conflicting with pagetable usage
3495084Sjohnlev 				 */
3505084Sjohnlev 				xen_kpm_create(paddr, l);
3515084Sjohnlev #else
3523446Smrj 				kbm_map((uintptr_t)kpm_vbase + paddr, paddr,
3533446Smrj 				    l, 1);
3545084Sjohnlev #endif
3553446Smrj 				paddr += LEVEL_SIZE(l);
3563446Smrj 				psize -= LEVEL_SIZE(l);
3573446Smrj 			}
3583446Smrj 		}
3595084Sjohnlev 	}
3605084Sjohnlev 
3615084Sjohnlev 	/*
3625084Sjohnlev 	 * If this machine doesn't have a kpm segment, we need to allocate
3635084Sjohnlev 	 * a small number of 'windows' which can be used to map pagetables.
3645084Sjohnlev 	 */
3655084Sjohnlev 	nwindows = (kpm_size == 0) ? 2 * NCPU : 0;
3665084Sjohnlev 
3675084Sjohnlev #if defined(__xpv)
3685084Sjohnlev 	/*
3695084Sjohnlev 	 * On a hypervisor, these windows are also used by the xpv_panic
3705084Sjohnlev 	 * code, where we need one window for each level of the pagetable
3715084Sjohnlev 	 * hierarchy.
3725084Sjohnlev 	 */
3735084Sjohnlev 	nwindows = MAX(nwindows, mmu.max_level);
3745084Sjohnlev #endif
3755084Sjohnlev 
3765084Sjohnlev 	if (nwindows != 0) {
3773446Smrj 		/*
3783446Smrj 		 * Create the page windows and 1 page of VA in
3793446Smrj 		 * which we map the PTEs of those windows.
3803446Smrj 		 */
3815084Sjohnlev 		mmu.pwin_base = vmem_xalloc(heap_arena, nwindows * MMU_PAGESIZE,
3823446Smrj 		    LEVEL_SIZE(1), 0, 0, NULL, NULL, VM_SLEEP);
3835084Sjohnlev 		ASSERT(nwindows <= MMU_PAGESIZE / mmu.pte_size);
3843446Smrj 		mmu.pwin_pte_va = vmem_xalloc(heap_arena, MMU_PAGESIZE,
3853446Smrj 		    MMU_PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
3863446Smrj 
3873446Smrj 		/*
3883446Smrj 		 * Find/Create the page table window mappings.
3893446Smrj 		 */
3903446Smrj 		paddr = 0;
3913446Smrj 		(void) find_pte((uintptr_t)mmu.pwin_base, &paddr, 0, 0);
3923446Smrj 		ASSERT(paddr != 0);
3933446Smrj 		ASSERT((paddr & MMU_PAGEOFFSET) == 0);
3943446Smrj 		mmu.pwin_pte_pa = paddr;
3955084Sjohnlev #ifdef __xpv
3965084Sjohnlev 		(void) find_pte((uintptr_t)mmu.pwin_pte_va, NULL, 0, 0);
3975084Sjohnlev 		kbm_read_only((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa);
3985084Sjohnlev #else
3993446Smrj 		kbm_map((uintptr_t)mmu.pwin_pte_va, mmu.pwin_pte_pa, 0, 1);
4005084Sjohnlev #endif
4013446Smrj 	}
4023446Smrj 
4033446Smrj 	/*
4043446Smrj 	 * Walk the boot loader's page tables and figure out
4053446Smrj 	 * how many tables and page mappings there will be.
4063446Smrj 	 */
4073446Smrj 	while (kbm_probe(&va, &size, &pfn, &prot) != 0) {
4083446Smrj 		/*
4093446Smrj 		 * At each level, if the last_va falls into a new htable,
4103446Smrj 		 * increment table_cnt. We can stop at the 1st level where
4113446Smrj 		 * they are in the same htable.
4123446Smrj 		 */
4135349Skchow 		start_level = 0;
4145349Skchow 		while (start_level <= mmu.max_page_level) {
4155349Skchow 			if (size == LEVEL_SIZE(start_level))
4165349Skchow 				break;
4175349Skchow 			start_level++;
4185349Skchow 		}
4193446Smrj 
4203446Smrj 		for (l = start_level; l < mmu.max_level; ++l) {
4213446Smrj 			if (va >> LEVEL_SHIFT(l + 1) ==
4223446Smrj 			    last_va >> LEVEL_SHIFT(l + 1))
4233446Smrj 				break;
4243446Smrj 			++table_cnt;
4253446Smrj 		}
4263446Smrj 		last_va = va;
4275349Skchow 		l = (start_level == 0) ? 1 : start_level;
4285349Skchow 		va = (va & LEVEL_MASK(l)) + LEVEL_SIZE(l);
4293446Smrj 	}
4303446Smrj 
4313446Smrj 	/*
4323446Smrj 	 * Besides the boot loader mappings, we're going to fill in
4333446Smrj 	 * the entire top level page table for the kernel. Make sure there's
4343446Smrj 	 * enough reserve for that too.
4353446Smrj 	 */
4363446Smrj 	table_cnt += mmu.top_level_count - ((kernelbase >>
4373446Smrj 	    LEVEL_SHIFT(mmu.max_level)) & (mmu.top_level_count - 1));
4383446Smrj 
4393446Smrj #if defined(__i386)
4403446Smrj 	/*
4413446Smrj 	 * The 32 bit PAE hat allocates tables one level below the top when
4423446Smrj 	 * kernelbase isn't 1 Gig aligned. We'll just be sloppy and allocate
4433446Smrj 	 * a bunch more to the reserve. Any unused will be returned later.
4443446Smrj 	 * Note we've already counted these mappings, just not the extra
4453446Smrj 	 * pagetables.
4463446Smrj 	 */
4473446Smrj 	if (mmu.pae_hat != 0 && (kernelbase & LEVEL_OFFSET(mmu.max_level)) != 0)
4483446Smrj 		table_cnt += mmu.ptes_per_table -
4493446Smrj 		    ((kernelbase & LEVEL_OFFSET(mmu.max_level)) >>
4503446Smrj 		    LEVEL_SHIFT(mmu.max_level - 1));
4513446Smrj #endif
4523446Smrj 
4533446Smrj 	/*
4543446Smrj 	 * Add 1/4 more into table_cnt for extra slop.  The unused
4553446Smrj 	 * slop is freed back when we htable_adjust_reserve() later.
4563446Smrj 	 */
4573446Smrj 	table_cnt += table_cnt >> 2;
4583446Smrj 
4593446Smrj 	/*
4603446Smrj 	 * We only need mapping entries (hments) for shared pages.
4613446Smrj 	 * This should be far, far fewer than the total possible,
4623446Smrj 	 * We'll allocate enough for 1/16 of all possible PTEs.
4633446Smrj 	 */
4643446Smrj 	mapping_cnt = (table_cnt * mmu.ptes_per_table) >> 4;
4653446Smrj 
4663446Smrj 	/*
4673446Smrj 	 * Now create the initial htable/hment reserves
4683446Smrj 	 */
4693446Smrj 	htable_initial_reserve(table_cnt);
4703446Smrj 	hment_reserve(mapping_cnt);
4713446Smrj 	x86pte_cpu_init(CPU);
4723446Smrj }
4733446Smrj 
4743446Smrj 
4753446Smrj /*
4763446Smrj  * This routine handles the work of creating the kernel's initial mappings
4773446Smrj  * by deciphering the mappings in the page tables created by the boot program.
4783446Smrj  *
4793446Smrj  * We maintain large page mappings, but only to a level 1 pagesize.
4803446Smrj  * The boot loader can only add new mappings once this function starts.
4813446Smrj  * In particular it can not change the pagesize used for any existing
4823446Smrj  * mappings or this code breaks!
4833446Smrj  */
4843446Smrj 
4853446Smrj void
hat_kern_setup(void)4863446Smrj hat_kern_setup(void)
4873446Smrj {
4883446Smrj 	/*
4893446Smrj 	 * Attach htables to the existing pagetables
4903446Smrj 	 */
4915084Sjohnlev 	/* BEGIN CSTYLED */
4923446Smrj 	htable_attach(kas.a_hat, 0, mmu.max_level, NULL,
4935084Sjohnlev #ifdef __xpv
4945084Sjohnlev 	    mmu_btop(xen_info->pt_base - ONE_GIG));
4955084Sjohnlev #else
4963446Smrj 	    mmu_btop(getcr3()));
4975084Sjohnlev #endif
4985084Sjohnlev 	/* END CSTYLED */
4993446Smrj 
5005084Sjohnlev #if defined(__i386) && !defined(__xpv)
5015460Sjosephb 	CPU->cpu_tss->tss_cr3 = dftss0->tss_cr3 = getcr3();
5023446Smrj #endif /* __i386 */
5033446Smrj 
5045084Sjohnlev #if defined(__xpv) && defined(__amd64)
5055084Sjohnlev 	/*
5065084Sjohnlev 	 * Try to make the kpm mappings r/w. Failures here are OK, as
5075084Sjohnlev 	 * it's probably just a pagetable
5085084Sjohnlev 	 */
5095084Sjohnlev 	xen_kpm_finish_init();
5105084Sjohnlev #endif
5115084Sjohnlev 
5123446Smrj 	/*
5133446Smrj 	 * The kernel HAT is now officially open for business.
5143446Smrj 	 */
5153446Smrj 	khat_running = 1;
5163446Smrj 
5173446Smrj 	CPUSET_ATOMIC_ADD(kas.a_hat->hat_cpus, CPU->cpu_id);
5183446Smrj 	CPU->cpu_current_hat = kas.a_hat;
5193446Smrj }
520