xref: /onnv-gate/usr/src/uts/i86pc/vm/htable.h (revision 5084:7d838c5c0eed)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53446Smrj  * Common Development and Distribution License (the "License").
63446Smrj  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #ifndef	_VM_HTABLE_H
270Sstevel@tonic-gate #define	_VM_HTABLE_H
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #ifdef	__cplusplus
320Sstevel@tonic-gate extern "C" {
330Sstevel@tonic-gate #endif
340Sstevel@tonic-gate 
350Sstevel@tonic-gate #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL)
360Sstevel@tonic-gate #include <asm/htable.h>
370Sstevel@tonic-gate #endif
380Sstevel@tonic-gate 
390Sstevel@tonic-gate extern void atomic_andb(uint8_t *addr, uint8_t value);
400Sstevel@tonic-gate extern void atomic_orb(uint8_t *addr, uint8_t value);
410Sstevel@tonic-gate extern void atomic_inc16(uint16_t *addr);
420Sstevel@tonic-gate extern void atomic_dec16(uint16_t *addr);
430Sstevel@tonic-gate extern void mmu_tlbflush_entry(caddr_t addr);
440Sstevel@tonic-gate 
450Sstevel@tonic-gate /*
460Sstevel@tonic-gate  * Each hardware page table has an htable_t describing it.
470Sstevel@tonic-gate  *
480Sstevel@tonic-gate  * We use a reference counter mechanism to detect when we can free an htable.
490Sstevel@tonic-gate  * In the implmentation the reference count is split into 2 separate counters:
500Sstevel@tonic-gate  *
510Sstevel@tonic-gate  *	ht_busy is a traditional reference count of uses of the htable pointer
520Sstevel@tonic-gate  *
530Sstevel@tonic-gate  *	ht_valid_cnt is a count of how references are implied by valid PTE/PTP
540Sstevel@tonic-gate  *	         entries in the pagetable
550Sstevel@tonic-gate  *
560Sstevel@tonic-gate  * ht_busy is only incremented by htable_lookup() or htable_create()
570Sstevel@tonic-gate  * while holding the appropriate hash_table mutex. While installing a new
580Sstevel@tonic-gate  * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have
590Sstevel@tonic-gate  * done an htable_lookup() or htable_create() but not the htable_release yet.
600Sstevel@tonic-gate  *
610Sstevel@tonic-gate  * htable_release(), while holding the mutex, can know that if
620Sstevel@tonic-gate  * busy == 1 and valid_cnt == 0, the htable can be free'd.
630Sstevel@tonic-gate  *
640Sstevel@tonic-gate  * The fields have been ordered to make htable_lookup() fast. Hence,
650Sstevel@tonic-gate  * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together.
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate struct htable {
680Sstevel@tonic-gate 	struct htable	*ht_next;	/* forward link for hash table */
690Sstevel@tonic-gate 	struct hat	*ht_hat;	/* hat this mapping comes from */
700Sstevel@tonic-gate 	uintptr_t	ht_vaddr;	/* virt addr at start of this table */
713446Smrj 	int8_t		ht_level;	/* page table level: 0=4K, 1=2M, ... */
723446Smrj 	uint8_t		ht_flags;	/* see below */
730Sstevel@tonic-gate 	int16_t		ht_busy;	/* implements locking protocol */
740Sstevel@tonic-gate 	int16_t		ht_valid_cnt;	/* # of valid entries in this table */
750Sstevel@tonic-gate 	uint32_t	ht_lock_cnt;	/* # of locked entries in this table */
760Sstevel@tonic-gate 					/* never used for kernel hat */
770Sstevel@tonic-gate 	pfn_t		ht_pfn;		/* pfn of page of the pagetable */
780Sstevel@tonic-gate 	struct htable	*ht_prev;	/* backward link for hash table */
790Sstevel@tonic-gate 	struct htable	*ht_parent;	/* htable that points to this htable */
800Sstevel@tonic-gate 	struct htable	*ht_shares;	/* for HTABLE_SHARED_PFN only */
810Sstevel@tonic-gate };
820Sstevel@tonic-gate typedef struct htable htable_t;
830Sstevel@tonic-gate 
840Sstevel@tonic-gate /*
850Sstevel@tonic-gate  * Flags values for htable ht_flags field:
860Sstevel@tonic-gate  *
870Sstevel@tonic-gate  * HTABLE_VLP - this is the top level htable of a VLP HAT.
880Sstevel@tonic-gate  *
893446Smrj  * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another
900Sstevel@tonic-gate  * 	htable. Used by hat_share() for ISM.
910Sstevel@tonic-gate  */
923446Smrj #define	HTABLE_VLP		(0x01)
933446Smrj #define	HTABLE_SHARED_PFN	(0x02)
940Sstevel@tonic-gate 
950Sstevel@tonic-gate /*
960Sstevel@tonic-gate  * The htable hash table hashing function.  The 28 is so that high
970Sstevel@tonic-gate  * order bits are include in the hash index to skew the wrap
982293Sjosephb  * around of addresses. Even though the hash buckets are stored per
992293Sjosephb  * hat we include the value of hat pointer in the hash function so
1002293Sjosephb  * that the secondary hash for the htable mutex winds up begin different in
1012293Sjosephb  * every address space.
1020Sstevel@tonic-gate  */
1030Sstevel@tonic-gate #define	HTABLE_HASH(hat, va, lvl)					\
1042293Sjosephb 	((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) +		\
1052293Sjosephb 	((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1))
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate /*
1080Sstevel@tonic-gate  * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info.
1090Sstevel@tonic-gate  */
1100Sstevel@tonic-gate struct hat_cpu_info {
1110Sstevel@tonic-gate 	kmutex_t hci_mutex;		/* mutex to ensure sequential usage */
1120Sstevel@tonic-gate #if defined(__amd64)
1130Sstevel@tonic-gate 	pfn_t	hci_vlp_pfn;		/* pfn of hci_vlp_l3ptes */
1140Sstevel@tonic-gate 	x86pte_t *hci_vlp_l3ptes;	/* VLP Level==3 pagetable (top) */
1150Sstevel@tonic-gate 	x86pte_t *hci_vlp_l2ptes;	/* VLP Level==2 pagetable */
1160Sstevel@tonic-gate #endif	/* __amd64 */
1170Sstevel@tonic-gate };
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate /*
1210Sstevel@tonic-gate  * Compute the last page aligned VA mapped by an htable.
1220Sstevel@tonic-gate  *
1230Sstevel@tonic-gate  * Given a va and a level, compute the virtual address of the start of the
1240Sstevel@tonic-gate  * next page at that level.
1250Sstevel@tonic-gate  *
1260Sstevel@tonic-gate  * XX64 - The check for the VA hole needs to be better generalized.
1270Sstevel@tonic-gate  */
1280Sstevel@tonic-gate #if defined(__amd64)
129*5084Sjohnlev #define	HTABLE_NUM_PTES(ht)	(((ht)->ht_flags & HTABLE_VLP) ? 4 : 512)
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate #define	HTABLE_LAST_PAGE(ht)						\
1320Sstevel@tonic-gate 	((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\
1330Sstevel@tonic-gate 	((ht)->ht_vaddr - MMU_PAGESIZE +				\
1343446Smrj 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))))
1350Sstevel@tonic-gate 
1360Sstevel@tonic-gate #define	NEXT_ENTRY_VA(va, l)	\
1370Sstevel@tonic-gate 	((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ?	\
1380Sstevel@tonic-gate 	mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate #elif defined(__i386)
1410Sstevel@tonic-gate 
142*5084Sjohnlev #define	HTABLE_NUM_PTES(ht)	\
143*5084Sjohnlev 	(!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512))
1443446Smrj 
1450Sstevel@tonic-gate #define	HTABLE_LAST_PAGE(ht)	((ht)->ht_vaddr - MMU_PAGESIZE + \
1463446Smrj 	((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate #define	NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l))
1490Sstevel@tonic-gate 
1500Sstevel@tonic-gate #endif
1510Sstevel@tonic-gate 
1520Sstevel@tonic-gate #if defined(_KERNEL)
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate /*
1550Sstevel@tonic-gate  * initialization function called from hat_init()
1560Sstevel@tonic-gate  */
1570Sstevel@tonic-gate extern void htable_init(void);
1580Sstevel@tonic-gate 
1590Sstevel@tonic-gate /*
1600Sstevel@tonic-gate  * Functions to lookup, or "lookup and create", the htable corresponding
1610Sstevel@tonic-gate  * to the virtual address "vaddr"  in the "hat" at the given "level" of
1620Sstevel@tonic-gate  * page tables. htable_lookup() may return NULL if no such entry exists.
1630Sstevel@tonic-gate  *
1640Sstevel@tonic-gate  * On return the given htable is marked busy (a shared lock) - this prevents
1650Sstevel@tonic-gate  * the htable from being stolen or freed) until htable_release() is called.
1660Sstevel@tonic-gate  *
1670Sstevel@tonic-gate  * If kalloc_flag is set on an htable_create() we can't call kmem allocation
1680Sstevel@tonic-gate  * routines for this htable, since it's for the kernel hat itself.
1690Sstevel@tonic-gate  *
1700Sstevel@tonic-gate  * htable_acquire() is used when an htable pointer has been extracted from
1710Sstevel@tonic-gate  * an hment and we need to get a reference to the htable.
1720Sstevel@tonic-gate  */
1730Sstevel@tonic-gate extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level);
1740Sstevel@tonic-gate extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level,
1750Sstevel@tonic-gate 	htable_t *shared);
1760Sstevel@tonic-gate extern void htable_acquire(htable_t *);
1770Sstevel@tonic-gate 
1780Sstevel@tonic-gate extern void htable_release(htable_t *ht);
1793446Smrj extern void htable_destroy(htable_t *ht);
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * Code to free all remaining htables for a hat. Called after the hat is no
1830Sstevel@tonic-gate  * longer in use by any thread.
1840Sstevel@tonic-gate  */
1850Sstevel@tonic-gate extern void htable_purge_hat(struct hat *hat);
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate /*
1880Sstevel@tonic-gate  * Find the htable, page table entry index, and PTE of the given virtual
1890Sstevel@tonic-gate  * address.  If not found returns NULL. When found, returns the htable_t *,
1900Sstevel@tonic-gate  * sets entry, and has a hold on the htable.
1910Sstevel@tonic-gate  */
1920Sstevel@tonic-gate extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *,
1930Sstevel@tonic-gate 	level_t);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate /*
1960Sstevel@tonic-gate  * Similar to hat_getpte(), except that this only succeeds if a valid
1970Sstevel@tonic-gate  * page mapping is present.
1980Sstevel@tonic-gate  */
1990Sstevel@tonic-gate extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry);
2000Sstevel@tonic-gate 
2010Sstevel@tonic-gate /*
2020Sstevel@tonic-gate  * Called to allocate initial/additional htables for reserve.
2030Sstevel@tonic-gate  */
2040Sstevel@tonic-gate extern void htable_initial_reserve(uint_t);
2050Sstevel@tonic-gate extern void htable_reserve(uint_t);
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate /*
2080Sstevel@tonic-gate  * Used to readjust the htable reserve after the reserve list has been used.
2090Sstevel@tonic-gate  * Also called after boot to release left over boot reserves.
2100Sstevel@tonic-gate  */
2110Sstevel@tonic-gate extern void htable_adjust_reserve(void);
2120Sstevel@tonic-gate 
2130Sstevel@tonic-gate /*
214*5084Sjohnlev  * return number of bytes mapped by all the htables in a given hat
215*5084Sjohnlev  */
216*5084Sjohnlev extern size_t htable_mapped(struct hat *);
217*5084Sjohnlev 
218*5084Sjohnlev 
219*5084Sjohnlev /*
2203446Smrj  * Attach initial pagetables as htables
2213446Smrj  */
2223446Smrj extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *,
2233446Smrj     pfn_t);
2243446Smrj 
2253446Smrj /*
2260Sstevel@tonic-gate  * Routine to find the next populated htable at or above a given virtual
2270Sstevel@tonic-gate  * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate
2280Sstevel@tonic-gate  * that it should search the entire address space.  Similar to
2290Sstevel@tonic-gate  * hat_getpte(), but used for walking through address ranges. It can be
2300Sstevel@tonic-gate  * used like this:
2310Sstevel@tonic-gate  *
2320Sstevel@tonic-gate  *	va = ...
2330Sstevel@tonic-gate  *	ht = NULL;
2340Sstevel@tonic-gate  *	while (va < end_va) {
2350Sstevel@tonic-gate  *		pte = htable_walk(hat, &ht, &va, end_va);
2360Sstevel@tonic-gate  *		if (!pte)
2370Sstevel@tonic-gate  *			break;
2380Sstevel@tonic-gate  *
2390Sstevel@tonic-gate  *		... code to operate on page at va ...
2400Sstevel@tonic-gate  *
2410Sstevel@tonic-gate  *		va += LEVEL_SIZE(ht->ht_level);
2420Sstevel@tonic-gate  *	}
2430Sstevel@tonic-gate  *	if (ht)
2440Sstevel@tonic-gate  *		htable_release(ht);
2450Sstevel@tonic-gate  *
2460Sstevel@tonic-gate  */
2470Sstevel@tonic-gate extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va,
2480Sstevel@tonic-gate 	uintptr_t eaddr);
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate #define	HTABLE_WALK_TO_END ((uintptr_t)-1)
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate /*
2530Sstevel@tonic-gate  * Utilities convert between virtual addresses and page table entry indeces.
2540Sstevel@tonic-gate  */
2550Sstevel@tonic-gate extern uint_t htable_va2entry(uintptr_t va, htable_t *ht);
2560Sstevel@tonic-gate extern uintptr_t htable_e2va(htable_t *ht, uint_t entry);
2570Sstevel@tonic-gate 
2580Sstevel@tonic-gate /*
2590Sstevel@tonic-gate  * Interfaces that provide access to page table entries via the htable.
2600Sstevel@tonic-gate  *
2610Sstevel@tonic-gate  * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic.
2620Sstevel@tonic-gate  */
2633446Smrj extern void	x86pte_cpu_init(cpu_t *);
2643446Smrj extern void	x86pte_cpu_fini(cpu_t *);
2650Sstevel@tonic-gate 
2660Sstevel@tonic-gate extern x86pte_t	x86pte_get(htable_t *, uint_t entry);
2670Sstevel@tonic-gate 
2683446Smrj /*
2693446Smrj  * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table
2703446Smrj  * link with a large page mapping.
2713446Smrj  */
2723446Smrj #define	LPAGE_ERROR (-(x86pte_t)1)
2730Sstevel@tonic-gate extern x86pte_t	x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *);
2740Sstevel@tonic-gate 
2753446Smrj extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry,
2763446Smrj 	x86pte_t old, x86pte_t *ptr);
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate extern x86pte_t x86pte_update(htable_t *ht, uint_t entry,
2790Sstevel@tonic-gate 	x86pte_t old, x86pte_t new);
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate extern void	x86pte_copy(htable_t *src, htable_t *dest, uint_t entry,
2820Sstevel@tonic-gate 	uint_t cnt);
2830Sstevel@tonic-gate 
2843446Smrj /*
2853446Smrj  * access to a pagetable knowing only the pfn
2863446Smrj  */
2873446Smrj extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *);
2883446Smrj extern void x86pte_mapout(void);
2890Sstevel@tonic-gate 
2900Sstevel@tonic-gate /*
2910Sstevel@tonic-gate  * these are actually inlines for "lock; incw", "lock; decw", etc. instructions.
2920Sstevel@tonic-gate  */
2930Sstevel@tonic-gate #define	HTABLE_INC(x)	atomic_inc16((uint16_t *)&x)
2940Sstevel@tonic-gate #define	HTABLE_DEC(x)	atomic_dec16((uint16_t *)&x)
2950Sstevel@tonic-gate #define	HTABLE_LOCK_INC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, 1)
2960Sstevel@tonic-gate #define	HTABLE_LOCK_DEC(ht)	atomic_add_32(&(ht)->ht_lock_cnt, -1)
2970Sstevel@tonic-gate 
298*5084Sjohnlev #ifdef __xpv
299*5084Sjohnlev extern void xen_flush_va(caddr_t va);
300*5084Sjohnlev extern void xen_gflush_va(caddr_t va, cpuset_t);
301*5084Sjohnlev extern void xen_flush_tlb(void);
302*5084Sjohnlev extern void xen_gflush_tlb(cpuset_t);
303*5084Sjohnlev extern void xen_pin(pfn_t, level_t);
304*5084Sjohnlev extern void xen_unpin(pfn_t);
305*5084Sjohnlev extern int xen_kpm_page(pfn_t, uint_t);
306*5084Sjohnlev 
307*5084Sjohnlev /*
308*5084Sjohnlev  * The hypervisor maps all page tables into our address space read-only.
309*5084Sjohnlev  * Under normal circumstances, the hypervisor then handles all updates to
310*5084Sjohnlev  * the page tables underneath the covers for us.  However, when we are
311*5084Sjohnlev  * trying to dump core after a hypervisor panic, the hypervisor is no
312*5084Sjohnlev  * longer available to do these updates.  To work around the protection
313*5084Sjohnlev  * problem, we simply disable write-protect checking for the duration of a
314*5084Sjohnlev  * pagetable update operation.
315*5084Sjohnlev  */
316*5084Sjohnlev #define	XPV_ALLOW_PAGETABLE_UPDATES()					\
317*5084Sjohnlev 	{								\
318*5084Sjohnlev 		if (IN_XPV_PANIC())					\
319*5084Sjohnlev 			setcr0((getcr0() & ~CR0_WP) & 0xffffffff); 	\
320*5084Sjohnlev 	}
321*5084Sjohnlev #define	XPV_DISALLOW_PAGETABLE_UPDATES()				\
322*5084Sjohnlev 	{								\
323*5084Sjohnlev 		if (IN_XPV_PANIC() > 0)					\
324*5084Sjohnlev 			setcr0((getcr0() | CR0_WP) & 0xffffffff);	\
325*5084Sjohnlev 	}
326*5084Sjohnlev 
327*5084Sjohnlev #else /* __xpv */
328*5084Sjohnlev 
329*5084Sjohnlev #define	XPV_ALLOW_PAGETABLE_UPDATES()
330*5084Sjohnlev #define	XPV_DISALLOW_PAGETABLE_UPDATES()
331*5084Sjohnlev 
332*5084Sjohnlev #endif
333*5084Sjohnlev 
3340Sstevel@tonic-gate #endif	/* _KERNEL */
3350Sstevel@tonic-gate 
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate #ifdef	__cplusplus
3380Sstevel@tonic-gate }
3390Sstevel@tonic-gate #endif
3400Sstevel@tonic-gate 
3410Sstevel@tonic-gate #endif	/* _VM_HTABLE_H */
342