10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53446Smrj * Common Development and Distribution License (the "License"). 63446Smrj * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 223446Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #ifndef _VM_HTABLE_H 270Sstevel@tonic-gate #define _VM_HTABLE_H 280Sstevel@tonic-gate 290Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 300Sstevel@tonic-gate 310Sstevel@tonic-gate #ifdef __cplusplus 320Sstevel@tonic-gate extern "C" { 330Sstevel@tonic-gate #endif 340Sstevel@tonic-gate 350Sstevel@tonic-gate #if defined(__GNUC__) && defined(_ASM_INLINES) && defined(_KERNEL) 360Sstevel@tonic-gate #include <asm/htable.h> 370Sstevel@tonic-gate #endif 380Sstevel@tonic-gate 390Sstevel@tonic-gate extern void atomic_andb(uint8_t *addr, uint8_t value); 400Sstevel@tonic-gate extern void atomic_orb(uint8_t *addr, uint8_t value); 410Sstevel@tonic-gate extern void atomic_inc16(uint16_t *addr); 420Sstevel@tonic-gate extern void atomic_dec16(uint16_t *addr); 430Sstevel@tonic-gate extern void mmu_tlbflush_entry(caddr_t addr); 440Sstevel@tonic-gate 450Sstevel@tonic-gate /* 460Sstevel@tonic-gate * Each hardware page table has an htable_t describing it. 470Sstevel@tonic-gate * 480Sstevel@tonic-gate * We use a reference counter mechanism to detect when we can free an htable. 490Sstevel@tonic-gate * In the implmentation the reference count is split into 2 separate counters: 500Sstevel@tonic-gate * 510Sstevel@tonic-gate * ht_busy is a traditional reference count of uses of the htable pointer 520Sstevel@tonic-gate * 530Sstevel@tonic-gate * ht_valid_cnt is a count of how references are implied by valid PTE/PTP 540Sstevel@tonic-gate * entries in the pagetable 550Sstevel@tonic-gate * 560Sstevel@tonic-gate * ht_busy is only incremented by htable_lookup() or htable_create() 570Sstevel@tonic-gate * while holding the appropriate hash_table mutex. While installing a new 580Sstevel@tonic-gate * valid PTE or PTP, in order to increment ht_valid_cnt a thread must have 590Sstevel@tonic-gate * done an htable_lookup() or htable_create() but not the htable_release yet. 600Sstevel@tonic-gate * 610Sstevel@tonic-gate * htable_release(), while holding the mutex, can know that if 620Sstevel@tonic-gate * busy == 1 and valid_cnt == 0, the htable can be free'd. 630Sstevel@tonic-gate * 640Sstevel@tonic-gate * The fields have been ordered to make htable_lookup() fast. Hence, 650Sstevel@tonic-gate * ht_hat, ht_vaddr, ht_level and ht_next need to be clustered together. 660Sstevel@tonic-gate */ 670Sstevel@tonic-gate struct htable { 680Sstevel@tonic-gate struct htable *ht_next; /* forward link for hash table */ 690Sstevel@tonic-gate struct hat *ht_hat; /* hat this mapping comes from */ 700Sstevel@tonic-gate uintptr_t ht_vaddr; /* virt addr at start of this table */ 713446Smrj int8_t ht_level; /* page table level: 0=4K, 1=2M, ... */ 723446Smrj uint8_t ht_flags; /* see below */ 730Sstevel@tonic-gate int16_t ht_busy; /* implements locking protocol */ 740Sstevel@tonic-gate int16_t ht_valid_cnt; /* # of valid entries in this table */ 750Sstevel@tonic-gate uint32_t ht_lock_cnt; /* # of locked entries in this table */ 760Sstevel@tonic-gate /* never used for kernel hat */ 770Sstevel@tonic-gate pfn_t ht_pfn; /* pfn of page of the pagetable */ 780Sstevel@tonic-gate struct htable *ht_prev; /* backward link for hash table */ 790Sstevel@tonic-gate struct htable *ht_parent; /* htable that points to this htable */ 800Sstevel@tonic-gate struct htable *ht_shares; /* for HTABLE_SHARED_PFN only */ 810Sstevel@tonic-gate }; 820Sstevel@tonic-gate typedef struct htable htable_t; 830Sstevel@tonic-gate 840Sstevel@tonic-gate /* 850Sstevel@tonic-gate * Flags values for htable ht_flags field: 860Sstevel@tonic-gate * 870Sstevel@tonic-gate * HTABLE_VLP - this is the top level htable of a VLP HAT. 880Sstevel@tonic-gate * 893446Smrj * HTABLE_SHARED_PFN - this htable had its PFN assigned from sharing another 900Sstevel@tonic-gate * htable. Used by hat_share() for ISM. 910Sstevel@tonic-gate */ 923446Smrj #define HTABLE_VLP (0x01) 933446Smrj #define HTABLE_SHARED_PFN (0x02) 940Sstevel@tonic-gate 950Sstevel@tonic-gate /* 960Sstevel@tonic-gate * The htable hash table hashing function. The 28 is so that high 970Sstevel@tonic-gate * order bits are include in the hash index to skew the wrap 982293Sjosephb * around of addresses. Even though the hash buckets are stored per 992293Sjosephb * hat we include the value of hat pointer in the hash function so 1002293Sjosephb * that the secondary hash for the htable mutex winds up begin different in 1012293Sjosephb * every address space. 1020Sstevel@tonic-gate */ 1030Sstevel@tonic-gate #define HTABLE_HASH(hat, va, lvl) \ 1042293Sjosephb ((((va) >> LEVEL_SHIFT(1)) + ((va) >> 28) + (lvl) + \ 1052293Sjosephb ((uintptr_t)(hat) >> 4)) & ((hat)->hat_num_hash - 1)) 1060Sstevel@tonic-gate 1070Sstevel@tonic-gate /* 1080Sstevel@tonic-gate * Each CPU gets a unique hat_cpu_info structure in cpu_hat_info. 1090Sstevel@tonic-gate */ 1100Sstevel@tonic-gate struct hat_cpu_info { 1110Sstevel@tonic-gate kmutex_t hci_mutex; /* mutex to ensure sequential usage */ 1120Sstevel@tonic-gate #if defined(__amd64) 1130Sstevel@tonic-gate pfn_t hci_vlp_pfn; /* pfn of hci_vlp_l3ptes */ 1140Sstevel@tonic-gate x86pte_t *hci_vlp_l3ptes; /* VLP Level==3 pagetable (top) */ 1150Sstevel@tonic-gate x86pte_t *hci_vlp_l2ptes; /* VLP Level==2 pagetable */ 1160Sstevel@tonic-gate #endif /* __amd64 */ 1170Sstevel@tonic-gate }; 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate /* 1210Sstevel@tonic-gate * Compute the last page aligned VA mapped by an htable. 1220Sstevel@tonic-gate * 1230Sstevel@tonic-gate * Given a va and a level, compute the virtual address of the start of the 1240Sstevel@tonic-gate * next page at that level. 1250Sstevel@tonic-gate * 1260Sstevel@tonic-gate * XX64 - The check for the VA hole needs to be better generalized. 1270Sstevel@tonic-gate */ 1280Sstevel@tonic-gate #if defined(__amd64) 129*5084Sjohnlev #define HTABLE_NUM_PTES(ht) (((ht)->ht_flags & HTABLE_VLP) ? 4 : 512) 1300Sstevel@tonic-gate 1310Sstevel@tonic-gate #define HTABLE_LAST_PAGE(ht) \ 1320Sstevel@tonic-gate ((ht)->ht_level == mmu.max_level ? ((uintptr_t)0UL - MMU_PAGESIZE) :\ 1330Sstevel@tonic-gate ((ht)->ht_vaddr - MMU_PAGESIZE + \ 1343446Smrj ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level)))) 1350Sstevel@tonic-gate 1360Sstevel@tonic-gate #define NEXT_ENTRY_VA(va, l) \ 1370Sstevel@tonic-gate ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l) == mmu.hole_start ? \ 1380Sstevel@tonic-gate mmu.hole_end : (va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 1390Sstevel@tonic-gate 1400Sstevel@tonic-gate #elif defined(__i386) 1410Sstevel@tonic-gate 142*5084Sjohnlev #define HTABLE_NUM_PTES(ht) \ 143*5084Sjohnlev (!mmu.pae_hat ? 1024 : ((ht)->ht_level == 2 ? 4 : 512)) 1443446Smrj 1450Sstevel@tonic-gate #define HTABLE_LAST_PAGE(ht) ((ht)->ht_vaddr - MMU_PAGESIZE + \ 1463446Smrj ((uintptr_t)HTABLE_NUM_PTES(ht) << LEVEL_SHIFT((ht)->ht_level))) 1470Sstevel@tonic-gate 1480Sstevel@tonic-gate #define NEXT_ENTRY_VA(va, l) ((va & LEVEL_MASK(l)) + LEVEL_SIZE(l)) 1490Sstevel@tonic-gate 1500Sstevel@tonic-gate #endif 1510Sstevel@tonic-gate 1520Sstevel@tonic-gate #if defined(_KERNEL) 1530Sstevel@tonic-gate 1540Sstevel@tonic-gate /* 1550Sstevel@tonic-gate * initialization function called from hat_init() 1560Sstevel@tonic-gate */ 1570Sstevel@tonic-gate extern void htable_init(void); 1580Sstevel@tonic-gate 1590Sstevel@tonic-gate /* 1600Sstevel@tonic-gate * Functions to lookup, or "lookup and create", the htable corresponding 1610Sstevel@tonic-gate * to the virtual address "vaddr" in the "hat" at the given "level" of 1620Sstevel@tonic-gate * page tables. htable_lookup() may return NULL if no such entry exists. 1630Sstevel@tonic-gate * 1640Sstevel@tonic-gate * On return the given htable is marked busy (a shared lock) - this prevents 1650Sstevel@tonic-gate * the htable from being stolen or freed) until htable_release() is called. 1660Sstevel@tonic-gate * 1670Sstevel@tonic-gate * If kalloc_flag is set on an htable_create() we can't call kmem allocation 1680Sstevel@tonic-gate * routines for this htable, since it's for the kernel hat itself. 1690Sstevel@tonic-gate * 1700Sstevel@tonic-gate * htable_acquire() is used when an htable pointer has been extracted from 1710Sstevel@tonic-gate * an hment and we need to get a reference to the htable. 1720Sstevel@tonic-gate */ 1730Sstevel@tonic-gate extern htable_t *htable_lookup(struct hat *hat, uintptr_t vaddr, level_t level); 1740Sstevel@tonic-gate extern htable_t *htable_create(struct hat *hat, uintptr_t vaddr, level_t level, 1750Sstevel@tonic-gate htable_t *shared); 1760Sstevel@tonic-gate extern void htable_acquire(htable_t *); 1770Sstevel@tonic-gate 1780Sstevel@tonic-gate extern void htable_release(htable_t *ht); 1793446Smrj extern void htable_destroy(htable_t *ht); 1800Sstevel@tonic-gate 1810Sstevel@tonic-gate /* 1820Sstevel@tonic-gate * Code to free all remaining htables for a hat. Called after the hat is no 1830Sstevel@tonic-gate * longer in use by any thread. 1840Sstevel@tonic-gate */ 1850Sstevel@tonic-gate extern void htable_purge_hat(struct hat *hat); 1860Sstevel@tonic-gate 1870Sstevel@tonic-gate /* 1880Sstevel@tonic-gate * Find the htable, page table entry index, and PTE of the given virtual 1890Sstevel@tonic-gate * address. If not found returns NULL. When found, returns the htable_t *, 1900Sstevel@tonic-gate * sets entry, and has a hold on the htable. 1910Sstevel@tonic-gate */ 1920Sstevel@tonic-gate extern htable_t *htable_getpte(struct hat *, uintptr_t, uint_t *, x86pte_t *, 1930Sstevel@tonic-gate level_t); 1940Sstevel@tonic-gate 1950Sstevel@tonic-gate /* 1960Sstevel@tonic-gate * Similar to hat_getpte(), except that this only succeeds if a valid 1970Sstevel@tonic-gate * page mapping is present. 1980Sstevel@tonic-gate */ 1990Sstevel@tonic-gate extern htable_t *htable_getpage(struct hat *hat, uintptr_t va, uint_t *entry); 2000Sstevel@tonic-gate 2010Sstevel@tonic-gate /* 2020Sstevel@tonic-gate * Called to allocate initial/additional htables for reserve. 2030Sstevel@tonic-gate */ 2040Sstevel@tonic-gate extern void htable_initial_reserve(uint_t); 2050Sstevel@tonic-gate extern void htable_reserve(uint_t); 2060Sstevel@tonic-gate 2070Sstevel@tonic-gate /* 2080Sstevel@tonic-gate * Used to readjust the htable reserve after the reserve list has been used. 2090Sstevel@tonic-gate * Also called after boot to release left over boot reserves. 2100Sstevel@tonic-gate */ 2110Sstevel@tonic-gate extern void htable_adjust_reserve(void); 2120Sstevel@tonic-gate 2130Sstevel@tonic-gate /* 214*5084Sjohnlev * return number of bytes mapped by all the htables in a given hat 215*5084Sjohnlev */ 216*5084Sjohnlev extern size_t htable_mapped(struct hat *); 217*5084Sjohnlev 218*5084Sjohnlev 219*5084Sjohnlev /* 2203446Smrj * Attach initial pagetables as htables 2213446Smrj */ 2223446Smrj extern void htable_attach(struct hat *, uintptr_t, level_t, struct htable *, 2233446Smrj pfn_t); 2243446Smrj 2253446Smrj /* 2260Sstevel@tonic-gate * Routine to find the next populated htable at or above a given virtual 2270Sstevel@tonic-gate * address. Can specify an upper limit, or HTABLE_WALK_TO_END to indicate 2280Sstevel@tonic-gate * that it should search the entire address space. Similar to 2290Sstevel@tonic-gate * hat_getpte(), but used for walking through address ranges. It can be 2300Sstevel@tonic-gate * used like this: 2310Sstevel@tonic-gate * 2320Sstevel@tonic-gate * va = ... 2330Sstevel@tonic-gate * ht = NULL; 2340Sstevel@tonic-gate * while (va < end_va) { 2350Sstevel@tonic-gate * pte = htable_walk(hat, &ht, &va, end_va); 2360Sstevel@tonic-gate * if (!pte) 2370Sstevel@tonic-gate * break; 2380Sstevel@tonic-gate * 2390Sstevel@tonic-gate * ... code to operate on page at va ... 2400Sstevel@tonic-gate * 2410Sstevel@tonic-gate * va += LEVEL_SIZE(ht->ht_level); 2420Sstevel@tonic-gate * } 2430Sstevel@tonic-gate * if (ht) 2440Sstevel@tonic-gate * htable_release(ht); 2450Sstevel@tonic-gate * 2460Sstevel@tonic-gate */ 2470Sstevel@tonic-gate extern x86pte_t htable_walk(struct hat *hat, htable_t **ht, uintptr_t *va, 2480Sstevel@tonic-gate uintptr_t eaddr); 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate #define HTABLE_WALK_TO_END ((uintptr_t)-1) 2510Sstevel@tonic-gate 2520Sstevel@tonic-gate /* 2530Sstevel@tonic-gate * Utilities convert between virtual addresses and page table entry indeces. 2540Sstevel@tonic-gate */ 2550Sstevel@tonic-gate extern uint_t htable_va2entry(uintptr_t va, htable_t *ht); 2560Sstevel@tonic-gate extern uintptr_t htable_e2va(htable_t *ht, uint_t entry); 2570Sstevel@tonic-gate 2580Sstevel@tonic-gate /* 2590Sstevel@tonic-gate * Interfaces that provide access to page table entries via the htable. 2600Sstevel@tonic-gate * 2610Sstevel@tonic-gate * Note that all accesses except x86pte_copy() and x86pte_zero() are atomic. 2620Sstevel@tonic-gate */ 2633446Smrj extern void x86pte_cpu_init(cpu_t *); 2643446Smrj extern void x86pte_cpu_fini(cpu_t *); 2650Sstevel@tonic-gate 2660Sstevel@tonic-gate extern x86pte_t x86pte_get(htable_t *, uint_t entry); 2670Sstevel@tonic-gate 2683446Smrj /* 2693446Smrj * x86pte_set returns LPAGE_ERROR if it's asked to overwrite a page table 2703446Smrj * link with a large page mapping. 2713446Smrj */ 2723446Smrj #define LPAGE_ERROR (-(x86pte_t)1) 2730Sstevel@tonic-gate extern x86pte_t x86pte_set(htable_t *, uint_t entry, x86pte_t new, void *); 2740Sstevel@tonic-gate 2753446Smrj extern x86pte_t x86pte_inval(htable_t *ht, uint_t entry, 2763446Smrj x86pte_t old, x86pte_t *ptr); 2770Sstevel@tonic-gate 2780Sstevel@tonic-gate extern x86pte_t x86pte_update(htable_t *ht, uint_t entry, 2790Sstevel@tonic-gate x86pte_t old, x86pte_t new); 2800Sstevel@tonic-gate 2810Sstevel@tonic-gate extern void x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, 2820Sstevel@tonic-gate uint_t cnt); 2830Sstevel@tonic-gate 2843446Smrj /* 2853446Smrj * access to a pagetable knowing only the pfn 2863446Smrj */ 2873446Smrj extern x86pte_t *x86pte_mapin(pfn_t, uint_t, htable_t *); 2883446Smrj extern void x86pte_mapout(void); 2890Sstevel@tonic-gate 2900Sstevel@tonic-gate /* 2910Sstevel@tonic-gate * these are actually inlines for "lock; incw", "lock; decw", etc. instructions. 2920Sstevel@tonic-gate */ 2930Sstevel@tonic-gate #define HTABLE_INC(x) atomic_inc16((uint16_t *)&x) 2940Sstevel@tonic-gate #define HTABLE_DEC(x) atomic_dec16((uint16_t *)&x) 2950Sstevel@tonic-gate #define HTABLE_LOCK_INC(ht) atomic_add_32(&(ht)->ht_lock_cnt, 1) 2960Sstevel@tonic-gate #define HTABLE_LOCK_DEC(ht) atomic_add_32(&(ht)->ht_lock_cnt, -1) 2970Sstevel@tonic-gate 298*5084Sjohnlev #ifdef __xpv 299*5084Sjohnlev extern void xen_flush_va(caddr_t va); 300*5084Sjohnlev extern void xen_gflush_va(caddr_t va, cpuset_t); 301*5084Sjohnlev extern void xen_flush_tlb(void); 302*5084Sjohnlev extern void xen_gflush_tlb(cpuset_t); 303*5084Sjohnlev extern void xen_pin(pfn_t, level_t); 304*5084Sjohnlev extern void xen_unpin(pfn_t); 305*5084Sjohnlev extern int xen_kpm_page(pfn_t, uint_t); 306*5084Sjohnlev 307*5084Sjohnlev /* 308*5084Sjohnlev * The hypervisor maps all page tables into our address space read-only. 309*5084Sjohnlev * Under normal circumstances, the hypervisor then handles all updates to 310*5084Sjohnlev * the page tables underneath the covers for us. However, when we are 311*5084Sjohnlev * trying to dump core after a hypervisor panic, the hypervisor is no 312*5084Sjohnlev * longer available to do these updates. To work around the protection 313*5084Sjohnlev * problem, we simply disable write-protect checking for the duration of a 314*5084Sjohnlev * pagetable update operation. 315*5084Sjohnlev */ 316*5084Sjohnlev #define XPV_ALLOW_PAGETABLE_UPDATES() \ 317*5084Sjohnlev { \ 318*5084Sjohnlev if (IN_XPV_PANIC()) \ 319*5084Sjohnlev setcr0((getcr0() & ~CR0_WP) & 0xffffffff); \ 320*5084Sjohnlev } 321*5084Sjohnlev #define XPV_DISALLOW_PAGETABLE_UPDATES() \ 322*5084Sjohnlev { \ 323*5084Sjohnlev if (IN_XPV_PANIC() > 0) \ 324*5084Sjohnlev setcr0((getcr0() | CR0_WP) & 0xffffffff); \ 325*5084Sjohnlev } 326*5084Sjohnlev 327*5084Sjohnlev #else /* __xpv */ 328*5084Sjohnlev 329*5084Sjohnlev #define XPV_ALLOW_PAGETABLE_UPDATES() 330*5084Sjohnlev #define XPV_DISALLOW_PAGETABLE_UPDATES() 331*5084Sjohnlev 332*5084Sjohnlev #endif 333*5084Sjohnlev 3340Sstevel@tonic-gate #endif /* _KERNEL */ 3350Sstevel@tonic-gate 3360Sstevel@tonic-gate 3370Sstevel@tonic-gate #ifdef __cplusplus 3380Sstevel@tonic-gate } 3390Sstevel@tonic-gate #endif 3400Sstevel@tonic-gate 3410Sstevel@tonic-gate #endif /* _VM_HTABLE_H */ 342