1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/types.h> 30*0Sstevel@tonic-gate #include <sys/sysmacros.h> 31*0Sstevel@tonic-gate #include <sys/kmem.h> 32*0Sstevel@tonic-gate #include <sys/atomic.h> 33*0Sstevel@tonic-gate #include <sys/bitmap.h> 34*0Sstevel@tonic-gate #include <sys/machparam.h> 35*0Sstevel@tonic-gate #include <sys/machsystm.h> 36*0Sstevel@tonic-gate #include <sys/mman.h> 37*0Sstevel@tonic-gate #include <sys/systm.h> 38*0Sstevel@tonic-gate #include <sys/cpuvar.h> 39*0Sstevel@tonic-gate #include <sys/thread.h> 40*0Sstevel@tonic-gate #include <sys/proc.h> 41*0Sstevel@tonic-gate #include <sys/cpu.h> 42*0Sstevel@tonic-gate #include <sys/kmem.h> 43*0Sstevel@tonic-gate #include <sys/disp.h> 44*0Sstevel@tonic-gate #include <sys/vmem.h> 45*0Sstevel@tonic-gate #include <sys/vmsystm.h> 46*0Sstevel@tonic-gate #include <sys/promif.h> 47*0Sstevel@tonic-gate #include <sys/var.h> 48*0Sstevel@tonic-gate #include <sys/x86_archext.h> 49*0Sstevel@tonic-gate #include <sys/bootconf.h> 50*0Sstevel@tonic-gate #include <sys/dumphdr.h> 51*0Sstevel@tonic-gate #include <vm/seg_kmem.h> 52*0Sstevel@tonic-gate #include <vm/seg_kpm.h> 53*0Sstevel@tonic-gate #include <vm/hat.h> 54*0Sstevel@tonic-gate #include <vm/hat_i86.h> 55*0Sstevel@tonic-gate #include <sys/cmn_err.h> 56*0Sstevel@tonic-gate 57*0Sstevel@tonic-gate kmem_cache_t *htable_cache; 58*0Sstevel@tonic-gate extern cpuset_t khat_cpuset; 59*0Sstevel@tonic-gate 60*0Sstevel@tonic-gate /* 61*0Sstevel@tonic-gate * The variable htable_reserve_amount, rather than HTABLE_RESERVE_AMOUNT, 62*0Sstevel@tonic-gate * is used in order to facilitate testing of the htable_steal() code. 63*0Sstevel@tonic-gate * By resetting htable_reserve_amount to a lower value, we can force 64*0Sstevel@tonic-gate * stealing to occur. The reserve amount is a guess to get us through boot. 65*0Sstevel@tonic-gate */ 66*0Sstevel@tonic-gate #define HTABLE_RESERVE_AMOUNT (200) 67*0Sstevel@tonic-gate uint_t htable_reserve_amount = HTABLE_RESERVE_AMOUNT; 68*0Sstevel@tonic-gate kmutex_t htable_reserve_mutex; 69*0Sstevel@tonic-gate uint_t htable_reserve_cnt; 70*0Sstevel@tonic-gate htable_t *htable_reserve_pool; 71*0Sstevel@tonic-gate 72*0Sstevel@tonic-gate /* 73*0Sstevel@tonic-gate * This variable is so that we can tune this via /etc/system 74*0Sstevel@tonic-gate */ 75*0Sstevel@tonic-gate uint_t htable_steal_passes = 10; 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate /* 78*0Sstevel@tonic-gate * mutex stuff for access to htable hash 79*0Sstevel@tonic-gate */ 80*0Sstevel@tonic-gate #define NUM_HTABLE_MUTEX 128 81*0Sstevel@tonic-gate kmutex_t htable_mutex[NUM_HTABLE_MUTEX]; 82*0Sstevel@tonic-gate #define HTABLE_MUTEX_HASH(h) ((h) & (NUM_HTABLE_MUTEX - 1)) 83*0Sstevel@tonic-gate 84*0Sstevel@tonic-gate #define HTABLE_ENTER(h) mutex_enter(&htable_mutex[HTABLE_MUTEX_HASH(h)]); 85*0Sstevel@tonic-gate #define HTABLE_EXIT(h) mutex_exit(&htable_mutex[HTABLE_MUTEX_HASH(h)]); 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate /* 88*0Sstevel@tonic-gate * forward declarations 89*0Sstevel@tonic-gate */ 90*0Sstevel@tonic-gate static void link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr); 91*0Sstevel@tonic-gate static void unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr); 92*0Sstevel@tonic-gate static void htable_free(htable_t *ht); 93*0Sstevel@tonic-gate static x86pte_t *x86pte_access_pagetable(htable_t *ht); 94*0Sstevel@tonic-gate static void x86pte_release_pagetable(htable_t *ht); 95*0Sstevel@tonic-gate static x86pte_t x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, 96*0Sstevel@tonic-gate x86pte_t new); 97*0Sstevel@tonic-gate 98*0Sstevel@tonic-gate /* 99*0Sstevel@tonic-gate * Address used for kernel page tables. See ptable_alloc() below. 100*0Sstevel@tonic-gate */ 101*0Sstevel@tonic-gate uintptr_t ptable_va = 0; 102*0Sstevel@tonic-gate size_t ptable_sz = 2 * MMU_PAGESIZE; 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate /* 105*0Sstevel@tonic-gate * A counter to track if we are stealing or reaping htables. When non-zero 106*0Sstevel@tonic-gate * htable_free() will directly free htables (either to the reserve or kmem) 107*0Sstevel@tonic-gate * instead of putting them in a hat's htable cache. 108*0Sstevel@tonic-gate */ 109*0Sstevel@tonic-gate uint32_t htable_dont_cache = 0; 110*0Sstevel@tonic-gate 111*0Sstevel@tonic-gate /* 112*0Sstevel@tonic-gate * Track the number of active pagetables, so we can know how many to reap 113*0Sstevel@tonic-gate */ 114*0Sstevel@tonic-gate static uint32_t active_ptables = 0; 115*0Sstevel@tonic-gate 116*0Sstevel@tonic-gate /* 117*0Sstevel@tonic-gate * Allocate a memory page for a hardware page table. 118*0Sstevel@tonic-gate * 119*0Sstevel@tonic-gate * The pages allocated for page tables are currently gotten in a hacked up 120*0Sstevel@tonic-gate * way. It works for now, but really needs to be fixed up a bit. 121*0Sstevel@tonic-gate * 122*0Sstevel@tonic-gate * During boot: The boot loader controls physical memory allocation via 123*0Sstevel@tonic-gate * boot_alloc(). To avoid conflict with vmem, we just do boot_alloc()s with 124*0Sstevel@tonic-gate * addresses less than kernelbase. These addresses are ignored when we take 125*0Sstevel@tonic-gate * over mappings from the boot loader. 126*0Sstevel@tonic-gate * 127*0Sstevel@tonic-gate * Post-boot: we currently use page_create_va() on the kvp with fake offsets, 128*0Sstevel@tonic-gate * segments and virt address. This is pretty bogus, but was copied from the 129*0Sstevel@tonic-gate * old hat_i86.c code. A better approach would be to have a custom 130*0Sstevel@tonic-gate * page_get_physical() interface that can specify either mnode random or 131*0Sstevel@tonic-gate * mnode local and takes a page from whatever color has the MOST available - 132*0Sstevel@tonic-gate * this would have a minimal impact on page coloring. 133*0Sstevel@tonic-gate * 134*0Sstevel@tonic-gate * For now the htable pointer in ht is only used to compute a unique vnode 135*0Sstevel@tonic-gate * offset for the page. 136*0Sstevel@tonic-gate */ 137*0Sstevel@tonic-gate static void 138*0Sstevel@tonic-gate ptable_alloc(htable_t *ht) 139*0Sstevel@tonic-gate { 140*0Sstevel@tonic-gate pfn_t pfn; 141*0Sstevel@tonic-gate page_t *pp; 142*0Sstevel@tonic-gate u_offset_t offset; 143*0Sstevel@tonic-gate static struct seg tmpseg; 144*0Sstevel@tonic-gate static int first_time = 1; 145*0Sstevel@tonic-gate 146*0Sstevel@tonic-gate /* 147*0Sstevel@tonic-gate * Allocating the associated hardware page table is very different 148*0Sstevel@tonic-gate * before boot has finished. We get a physical page to from boot 149*0Sstevel@tonic-gate * w/o eating up any kernel address space. 150*0Sstevel@tonic-gate */ 151*0Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 152*0Sstevel@tonic-gate HATSTAT_INC(hs_ptable_allocs); 153*0Sstevel@tonic-gate atomic_add_32(&active_ptables, 1); 154*0Sstevel@tonic-gate 155*0Sstevel@tonic-gate if (use_boot_reserve) { 156*0Sstevel@tonic-gate ASSERT(ptable_va != 0); 157*0Sstevel@tonic-gate 158*0Sstevel@tonic-gate /* 159*0Sstevel@tonic-gate * Allocate, then demap the ptable_va, so that we're 160*0Sstevel@tonic-gate * sure there exist page table entries for the addresses 161*0Sstevel@tonic-gate */ 162*0Sstevel@tonic-gate if (first_time) { 163*0Sstevel@tonic-gate first_time = 0; 164*0Sstevel@tonic-gate if ((uintptr_t)BOP_ALLOC(bootops, (caddr_t)ptable_va, 165*0Sstevel@tonic-gate ptable_sz, BO_NO_ALIGN) != ptable_va) 166*0Sstevel@tonic-gate panic("BOP_ALLOC failed"); 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate hat_boot_demap(ptable_va); 169*0Sstevel@tonic-gate hat_boot_demap(ptable_va + MMU_PAGESIZE); 170*0Sstevel@tonic-gate } 171*0Sstevel@tonic-gate 172*0Sstevel@tonic-gate pfn = ((uintptr_t)BOP_EALLOC(bootops, 0, MMU_PAGESIZE, 173*0Sstevel@tonic-gate BO_NO_ALIGN, BOPF_X86_ALLOC_PHYS)) >> MMU_PAGESHIFT; 174*0Sstevel@tonic-gate if (page_resv(1, KM_NOSLEEP) == 0) 175*0Sstevel@tonic-gate panic("page_resv() failed in ptable alloc"); 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 178*0Sstevel@tonic-gate ASSERT(pp != NULL); 179*0Sstevel@tonic-gate if (pp->p_szc != 0) 180*0Sstevel@tonic-gate page_boot_demote(pp); 181*0Sstevel@tonic-gate pp = page_numtopp(pfn, SE_EXCL); 182*0Sstevel@tonic-gate ASSERT(pp != NULL); 183*0Sstevel@tonic-gate 184*0Sstevel@tonic-gate } else { 185*0Sstevel@tonic-gate /* 186*0Sstevel@tonic-gate * Post boot get a page for the table. 187*0Sstevel@tonic-gate * 188*0Sstevel@tonic-gate * The first check is to see if there is memory in 189*0Sstevel@tonic-gate * the system. If we drop to throttlefree, then fail 190*0Sstevel@tonic-gate * the ptable_alloc() and let the stealing code kick in. 191*0Sstevel@tonic-gate * Note that we have to do this test here, since the test in 192*0Sstevel@tonic-gate * page_create_throttle() would let the NOSLEEP allocation 193*0Sstevel@tonic-gate * go through and deplete the page reserves. 194*0Sstevel@tonic-gate */ 195*0Sstevel@tonic-gate if (freemem <= throttlefree + 1) 196*0Sstevel@tonic-gate return; 197*0Sstevel@tonic-gate 198*0Sstevel@tonic-gate /* 199*0Sstevel@tonic-gate * This code is temporary, so don't review too critically. 200*0Sstevel@tonic-gate * I'm awaiting a new phys page allocator from Kit -- Joe 201*0Sstevel@tonic-gate * 202*0Sstevel@tonic-gate * We need assign an offset for the page to call 203*0Sstevel@tonic-gate * page_create_va. To avoid conflicts with other pages, 204*0Sstevel@tonic-gate * we get creative with the offset. 205*0Sstevel@tonic-gate * for 32 bits, we pic an offset > 4Gig 206*0Sstevel@tonic-gate * for 64 bits, pic an offset somewhere in the VA hole. 207*0Sstevel@tonic-gate */ 208*0Sstevel@tonic-gate offset = (uintptr_t)ht - kernelbase; 209*0Sstevel@tonic-gate offset <<= MMU_PAGESHIFT; 210*0Sstevel@tonic-gate #if defined(__amd64) 211*0Sstevel@tonic-gate offset += mmu.hole_start; /* something in VA hole */ 212*0Sstevel@tonic-gate #else 213*0Sstevel@tonic-gate offset += 1ULL << 40; /* something > 4 Gig */ 214*0Sstevel@tonic-gate #endif 215*0Sstevel@tonic-gate 216*0Sstevel@tonic-gate if (page_resv(1, KM_NOSLEEP) == 0) 217*0Sstevel@tonic-gate return; 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate #ifdef DEBUG 220*0Sstevel@tonic-gate pp = page_exists(&kvp, offset); 221*0Sstevel@tonic-gate if (pp != NULL) 222*0Sstevel@tonic-gate panic("ptable already exists %p", pp); 223*0Sstevel@tonic-gate #endif 224*0Sstevel@tonic-gate pp = page_create_va(&kvp, offset, MMU_PAGESIZE, 225*0Sstevel@tonic-gate PG_EXCL | PG_NORELOC, &tmpseg, 226*0Sstevel@tonic-gate (void *)((uintptr_t)ht << MMU_PAGESHIFT)); 227*0Sstevel@tonic-gate if (pp == NULL) 228*0Sstevel@tonic-gate return; 229*0Sstevel@tonic-gate page_io_unlock(pp); 230*0Sstevel@tonic-gate page_hashout(pp, NULL); 231*0Sstevel@tonic-gate pfn = pp->p_pagenum; 232*0Sstevel@tonic-gate } 233*0Sstevel@tonic-gate page_downgrade(pp); 234*0Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate if (pfn == PFN_INVALID) 237*0Sstevel@tonic-gate panic("ptable_alloc(): Invalid PFN!!"); 238*0Sstevel@tonic-gate ht->ht_pfn = pfn; 239*0Sstevel@tonic-gate } 240*0Sstevel@tonic-gate 241*0Sstevel@tonic-gate /* 242*0Sstevel@tonic-gate * Free an htable's associated page table page. See the comments 243*0Sstevel@tonic-gate * for ptable_alloc(). 244*0Sstevel@tonic-gate */ 245*0Sstevel@tonic-gate static void 246*0Sstevel@tonic-gate ptable_free(htable_t *ht) 247*0Sstevel@tonic-gate { 248*0Sstevel@tonic-gate pfn_t pfn = ht->ht_pfn; 249*0Sstevel@tonic-gate page_t *pp; 250*0Sstevel@tonic-gate 251*0Sstevel@tonic-gate /* 252*0Sstevel@tonic-gate * need to destroy the page used for the pagetable 253*0Sstevel@tonic-gate */ 254*0Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 255*0Sstevel@tonic-gate HATSTAT_INC(hs_ptable_frees); 256*0Sstevel@tonic-gate atomic_add_32(&active_ptables, -1); 257*0Sstevel@tonic-gate pp = page_numtopp_nolock(pfn); 258*0Sstevel@tonic-gate if (pp == NULL) 259*0Sstevel@tonic-gate panic("ptable_free(): no page for pfn!"); 260*0Sstevel@tonic-gate ASSERT(PAGE_SHARED(pp)); 261*0Sstevel@tonic-gate ASSERT(pfn == pp->p_pagenum); 262*0Sstevel@tonic-gate 263*0Sstevel@tonic-gate /* 264*0Sstevel@tonic-gate * Get an exclusive lock, might have to wait for a kmem reader. 265*0Sstevel@tonic-gate */ 266*0Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 267*0Sstevel@tonic-gate page_unlock(pp); 268*0Sstevel@tonic-gate /* 269*0Sstevel@tonic-gate * RFE: we could change this to not loop forever 270*0Sstevel@tonic-gate * George Cameron had some idea on how to do that. 271*0Sstevel@tonic-gate * For now looping works - it's just like sfmmu. 272*0Sstevel@tonic-gate */ 273*0Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM)) 274*0Sstevel@tonic-gate continue; 275*0Sstevel@tonic-gate } 276*0Sstevel@tonic-gate page_free(pp, 1); 277*0Sstevel@tonic-gate page_unresv(1); 278*0Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 279*0Sstevel@tonic-gate } 280*0Sstevel@tonic-gate 281*0Sstevel@tonic-gate /* 282*0Sstevel@tonic-gate * Put one htable on the reserve list. 283*0Sstevel@tonic-gate */ 284*0Sstevel@tonic-gate static void 285*0Sstevel@tonic-gate htable_put_reserve(htable_t *ht) 286*0Sstevel@tonic-gate { 287*0Sstevel@tonic-gate ht->ht_hat = NULL; /* no longer tied to a hat */ 288*0Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 289*0Sstevel@tonic-gate HATSTAT_INC(hs_htable_rputs); 290*0Sstevel@tonic-gate mutex_enter(&htable_reserve_mutex); 291*0Sstevel@tonic-gate ht->ht_next = htable_reserve_pool; 292*0Sstevel@tonic-gate htable_reserve_pool = ht; 293*0Sstevel@tonic-gate ++htable_reserve_cnt; 294*0Sstevel@tonic-gate mutex_exit(&htable_reserve_mutex); 295*0Sstevel@tonic-gate } 296*0Sstevel@tonic-gate 297*0Sstevel@tonic-gate /* 298*0Sstevel@tonic-gate * Take one htable from the reserve. 299*0Sstevel@tonic-gate */ 300*0Sstevel@tonic-gate static htable_t * 301*0Sstevel@tonic-gate htable_get_reserve(void) 302*0Sstevel@tonic-gate { 303*0Sstevel@tonic-gate htable_t *ht = NULL; 304*0Sstevel@tonic-gate 305*0Sstevel@tonic-gate mutex_enter(&htable_reserve_mutex); 306*0Sstevel@tonic-gate if (htable_reserve_cnt != 0) { 307*0Sstevel@tonic-gate ht = htable_reserve_pool; 308*0Sstevel@tonic-gate ASSERT(ht != NULL); 309*0Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 310*0Sstevel@tonic-gate htable_reserve_pool = ht->ht_next; 311*0Sstevel@tonic-gate --htable_reserve_cnt; 312*0Sstevel@tonic-gate HATSTAT_INC(hs_htable_rgets); 313*0Sstevel@tonic-gate } 314*0Sstevel@tonic-gate mutex_exit(&htable_reserve_mutex); 315*0Sstevel@tonic-gate return (ht); 316*0Sstevel@tonic-gate } 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate /* 319*0Sstevel@tonic-gate * Allocate initial htables with page tables and put them on the kernel hat's 320*0Sstevel@tonic-gate * cache list. 321*0Sstevel@tonic-gate */ 322*0Sstevel@tonic-gate void 323*0Sstevel@tonic-gate htable_initial_reserve(uint_t count) 324*0Sstevel@tonic-gate { 325*0Sstevel@tonic-gate htable_t *ht; 326*0Sstevel@tonic-gate hat_t *hat = kas.a_hat; 327*0Sstevel@tonic-gate 328*0Sstevel@tonic-gate count += HTABLE_RESERVE_AMOUNT; 329*0Sstevel@tonic-gate while (count > 0) { 330*0Sstevel@tonic-gate ht = kmem_cache_alloc(htable_cache, KM_NOSLEEP); 331*0Sstevel@tonic-gate ASSERT(ht != NULL); 332*0Sstevel@tonic-gate 333*0Sstevel@tonic-gate ASSERT(use_boot_reserve); 334*0Sstevel@tonic-gate ht->ht_hat = kas.a_hat; /* so htable_free() works */ 335*0Sstevel@tonic-gate ht->ht_flags = 0; /* so x86pte_zero works */ 336*0Sstevel@tonic-gate ptable_alloc(ht); 337*0Sstevel@tonic-gate if (ht->ht_pfn == PFN_INVALID) 338*0Sstevel@tonic-gate panic("ptable_alloc() failed"); 339*0Sstevel@tonic-gate 340*0Sstevel@tonic-gate x86pte_zero(ht, 0, mmu.ptes_per_table); 341*0Sstevel@tonic-gate 342*0Sstevel@tonic-gate ht->ht_next = hat->hat_ht_cached; 343*0Sstevel@tonic-gate hat->hat_ht_cached = ht; 344*0Sstevel@tonic-gate --count; 345*0Sstevel@tonic-gate } 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate /* 349*0Sstevel@tonic-gate * Readjust the reserves after a thread finishes using them. 350*0Sstevel@tonic-gate * 351*0Sstevel@tonic-gate * The first time this is called post boot, we'll also clear out the 352*0Sstevel@tonic-gate * extra boot htables that were put in the kernel hat's cache list. 353*0Sstevel@tonic-gate */ 354*0Sstevel@tonic-gate void 355*0Sstevel@tonic-gate htable_adjust_reserve() 356*0Sstevel@tonic-gate { 357*0Sstevel@tonic-gate static int first_time = 1; 358*0Sstevel@tonic-gate htable_t *ht; 359*0Sstevel@tonic-gate 360*0Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 361*0Sstevel@tonic-gate 362*0Sstevel@tonic-gate /* 363*0Sstevel@tonic-gate * The first time this is called after we can steal, we free up the 364*0Sstevel@tonic-gate * the kernel's cache htable list. It has lots of extra htable/page 365*0Sstevel@tonic-gate * tables that were allocated for boot up. 366*0Sstevel@tonic-gate */ 367*0Sstevel@tonic-gate if (first_time) { 368*0Sstevel@tonic-gate first_time = 0; 369*0Sstevel@tonic-gate while ((ht = kas.a_hat->hat_ht_cached) != NULL) { 370*0Sstevel@tonic-gate kas.a_hat->hat_ht_cached = ht->ht_next; 371*0Sstevel@tonic-gate ASSERT(ht->ht_hat == kas.a_hat); 372*0Sstevel@tonic-gate ptable_free(ht); 373*0Sstevel@tonic-gate htable_put_reserve(ht); 374*0Sstevel@tonic-gate } 375*0Sstevel@tonic-gate return; 376*0Sstevel@tonic-gate } 377*0Sstevel@tonic-gate 378*0Sstevel@tonic-gate /* 379*0Sstevel@tonic-gate * Free any excess htables in the reserve list 380*0Sstevel@tonic-gate */ 381*0Sstevel@tonic-gate while (htable_reserve_cnt > htable_reserve_amount) { 382*0Sstevel@tonic-gate ht = htable_get_reserve(); 383*0Sstevel@tonic-gate if (ht == NULL) 384*0Sstevel@tonic-gate return; 385*0Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 386*0Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 387*0Sstevel@tonic-gate } 388*0Sstevel@tonic-gate } 389*0Sstevel@tonic-gate 390*0Sstevel@tonic-gate 391*0Sstevel@tonic-gate /* 392*0Sstevel@tonic-gate * This routine steals htables from user processes for htable_alloc() or 393*0Sstevel@tonic-gate * for htable_reap(). 394*0Sstevel@tonic-gate */ 395*0Sstevel@tonic-gate static htable_t * 396*0Sstevel@tonic-gate htable_steal(uint_t cnt) 397*0Sstevel@tonic-gate { 398*0Sstevel@tonic-gate hat_t *hat = kas.a_hat; /* list starts with khat */ 399*0Sstevel@tonic-gate htable_t *list = NULL; 400*0Sstevel@tonic-gate htable_t *ht; 401*0Sstevel@tonic-gate htable_t *higher; 402*0Sstevel@tonic-gate uint_t h; 403*0Sstevel@tonic-gate uint_t e; 404*0Sstevel@tonic-gate uintptr_t va; 405*0Sstevel@tonic-gate x86pte_t pte; 406*0Sstevel@tonic-gate uint_t stolen = 0; 407*0Sstevel@tonic-gate uint_t pass; 408*0Sstevel@tonic-gate uint_t threshhold; 409*0Sstevel@tonic-gate 410*0Sstevel@tonic-gate /* 411*0Sstevel@tonic-gate * Limit htable_steal_passes to something reasonable 412*0Sstevel@tonic-gate */ 413*0Sstevel@tonic-gate if (htable_steal_passes == 0) 414*0Sstevel@tonic-gate htable_steal_passes = 1; 415*0Sstevel@tonic-gate if (htable_steal_passes > mmu.ptes_per_table) 416*0Sstevel@tonic-gate htable_steal_passes = mmu.ptes_per_table; 417*0Sstevel@tonic-gate 418*0Sstevel@tonic-gate /* 419*0Sstevel@tonic-gate * Loop through all hats. The 1st pass takes cached htables that 420*0Sstevel@tonic-gate * aren't in use. The later passes steal by removing mappings, too. 421*0Sstevel@tonic-gate */ 422*0Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, 1); 423*0Sstevel@tonic-gate for (pass = 1; pass <= htable_steal_passes && stolen < cnt; ++pass) { 424*0Sstevel@tonic-gate threshhold = pass / htable_steal_passes; 425*0Sstevel@tonic-gate hat = kas.a_hat->hat_next; 426*0Sstevel@tonic-gate for (;;) { 427*0Sstevel@tonic-gate 428*0Sstevel@tonic-gate /* 429*0Sstevel@tonic-gate * move to next hat 430*0Sstevel@tonic-gate */ 431*0Sstevel@tonic-gate mutex_enter(&hat_list_lock); 432*0Sstevel@tonic-gate hat->hat_flags &= ~HAT_VICTIM; 433*0Sstevel@tonic-gate cv_broadcast(&hat_list_cv); 434*0Sstevel@tonic-gate do { 435*0Sstevel@tonic-gate hat = hat->hat_prev; 436*0Sstevel@tonic-gate } while (hat->hat_flags & HAT_VICTIM); 437*0Sstevel@tonic-gate if (stolen == cnt || hat == kas.a_hat->hat_next) { 438*0Sstevel@tonic-gate mutex_exit(&hat_list_lock); 439*0Sstevel@tonic-gate break; 440*0Sstevel@tonic-gate } 441*0Sstevel@tonic-gate hat->hat_flags |= HAT_VICTIM; 442*0Sstevel@tonic-gate mutex_exit(&hat_list_lock); 443*0Sstevel@tonic-gate 444*0Sstevel@tonic-gate /* 445*0Sstevel@tonic-gate * Take any htables from the hat's cached "free" list. 446*0Sstevel@tonic-gate */ 447*0Sstevel@tonic-gate hat_enter(hat); 448*0Sstevel@tonic-gate while ((ht = hat->hat_ht_cached) != NULL && 449*0Sstevel@tonic-gate stolen < cnt) { 450*0Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 451*0Sstevel@tonic-gate ht->ht_next = list; 452*0Sstevel@tonic-gate list = ht; 453*0Sstevel@tonic-gate ++stolen; 454*0Sstevel@tonic-gate } 455*0Sstevel@tonic-gate hat_exit(hat); 456*0Sstevel@tonic-gate 457*0Sstevel@tonic-gate /* 458*0Sstevel@tonic-gate * Don't steal on first pass. 459*0Sstevel@tonic-gate */ 460*0Sstevel@tonic-gate if (pass == 1 || stolen == cnt) 461*0Sstevel@tonic-gate continue; 462*0Sstevel@tonic-gate 463*0Sstevel@tonic-gate /* 464*0Sstevel@tonic-gate * search the active htables for one to steal 465*0Sstevel@tonic-gate */ 466*0Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash && stolen < cnt; 467*0Sstevel@tonic-gate ++h) { 468*0Sstevel@tonic-gate higher = NULL; 469*0Sstevel@tonic-gate HTABLE_ENTER(h); 470*0Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; 471*0Sstevel@tonic-gate ht = ht->ht_next) { 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gate /* 474*0Sstevel@tonic-gate * Can we rule out reaping? 475*0Sstevel@tonic-gate */ 476*0Sstevel@tonic-gate if (ht->ht_busy != 0 || 477*0Sstevel@tonic-gate (ht->ht_flags & HTABLE_SHARED_PFN)|| 478*0Sstevel@tonic-gate ht->ht_level == TOP_LEVEL(hat) || 479*0Sstevel@tonic-gate (ht->ht_level >= 480*0Sstevel@tonic-gate mmu.max_page_level && 481*0Sstevel@tonic-gate ht->ht_valid_cnt > 0) || 482*0Sstevel@tonic-gate ht->ht_valid_cnt < threshhold || 483*0Sstevel@tonic-gate ht->ht_lock_cnt != 0) 484*0Sstevel@tonic-gate continue; 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate /* 487*0Sstevel@tonic-gate * Increment busy so the htable can't 488*0Sstevel@tonic-gate * disappear. We drop the htable mutex 489*0Sstevel@tonic-gate * to avoid deadlocks with 490*0Sstevel@tonic-gate * hat_pageunload() and the hment mutex 491*0Sstevel@tonic-gate * while we call hat_pte_unmap() 492*0Sstevel@tonic-gate */ 493*0Sstevel@tonic-gate ++ht->ht_busy; 494*0Sstevel@tonic-gate HTABLE_EXIT(h); 495*0Sstevel@tonic-gate 496*0Sstevel@tonic-gate /* 497*0Sstevel@tonic-gate * Try stealing. 498*0Sstevel@tonic-gate * - unload and invalidate all PTEs 499*0Sstevel@tonic-gate */ 500*0Sstevel@tonic-gate for (e = 0, va = ht->ht_vaddr; 501*0Sstevel@tonic-gate e < ht->ht_num_ptes && 502*0Sstevel@tonic-gate ht->ht_valid_cnt > 0 && 503*0Sstevel@tonic-gate ht->ht_busy == 1 && 504*0Sstevel@tonic-gate ht->ht_lock_cnt == 0; 505*0Sstevel@tonic-gate ++e, va += MMU_PAGESIZE) { 506*0Sstevel@tonic-gate pte = x86pte_get(ht, e); 507*0Sstevel@tonic-gate if (!PTE_ISVALID(pte)) 508*0Sstevel@tonic-gate continue; 509*0Sstevel@tonic-gate hat_pte_unmap(ht, e, 510*0Sstevel@tonic-gate HAT_UNLOAD, pte, NULL); 511*0Sstevel@tonic-gate } 512*0Sstevel@tonic-gate 513*0Sstevel@tonic-gate /* 514*0Sstevel@tonic-gate * Reacquire htable lock. If we didn't 515*0Sstevel@tonic-gate * remove all mappings in the table, 516*0Sstevel@tonic-gate * or another thread added a new mapping 517*0Sstevel@tonic-gate * behind us, give up on this table. 518*0Sstevel@tonic-gate */ 519*0Sstevel@tonic-gate HTABLE_ENTER(h); 520*0Sstevel@tonic-gate if (ht->ht_busy != 1 || 521*0Sstevel@tonic-gate ht->ht_valid_cnt != 0 || 522*0Sstevel@tonic-gate ht->ht_lock_cnt != 0) { 523*0Sstevel@tonic-gate --ht->ht_busy; 524*0Sstevel@tonic-gate continue; 525*0Sstevel@tonic-gate } 526*0Sstevel@tonic-gate 527*0Sstevel@tonic-gate /* 528*0Sstevel@tonic-gate * Steal it and unlink the page table. 529*0Sstevel@tonic-gate */ 530*0Sstevel@tonic-gate higher = ht->ht_parent; 531*0Sstevel@tonic-gate unlink_ptp(higher, ht, ht->ht_vaddr); 532*0Sstevel@tonic-gate 533*0Sstevel@tonic-gate /* 534*0Sstevel@tonic-gate * remove from the hash list 535*0Sstevel@tonic-gate */ 536*0Sstevel@tonic-gate if (ht->ht_next) 537*0Sstevel@tonic-gate ht->ht_next->ht_prev = 538*0Sstevel@tonic-gate ht->ht_prev; 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate if (ht->ht_prev) { 541*0Sstevel@tonic-gate ht->ht_prev->ht_next = 542*0Sstevel@tonic-gate ht->ht_next; 543*0Sstevel@tonic-gate } else { 544*0Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[h] == 545*0Sstevel@tonic-gate ht); 546*0Sstevel@tonic-gate hat->hat_ht_hash[h] = 547*0Sstevel@tonic-gate ht->ht_next; 548*0Sstevel@tonic-gate } 549*0Sstevel@tonic-gate 550*0Sstevel@tonic-gate /* 551*0Sstevel@tonic-gate * Break to outer loop to release the 552*0Sstevel@tonic-gate * higher (ht_parent) pagtable. This 553*0Sstevel@tonic-gate * spreads out the pain caused by 554*0Sstevel@tonic-gate * pagefaults. 555*0Sstevel@tonic-gate */ 556*0Sstevel@tonic-gate ht->ht_next = list; 557*0Sstevel@tonic-gate list = ht; 558*0Sstevel@tonic-gate ++stolen; 559*0Sstevel@tonic-gate 560*0Sstevel@tonic-gate /* 561*0Sstevel@tonic-gate * If this is the last steal, then move 562*0Sstevel@tonic-gate * the hat list head, so that we start 563*0Sstevel@tonic-gate * here next time. 564*0Sstevel@tonic-gate */ 565*0Sstevel@tonic-gate if (stolen == cnt) { 566*0Sstevel@tonic-gate mutex_enter(&hat_list_lock); 567*0Sstevel@tonic-gate kas.a_hat->hat_next = hat; 568*0Sstevel@tonic-gate mutex_exit(&hat_list_lock); 569*0Sstevel@tonic-gate } 570*0Sstevel@tonic-gate break; 571*0Sstevel@tonic-gate } 572*0Sstevel@tonic-gate HTABLE_EXIT(h); 573*0Sstevel@tonic-gate if (higher != NULL) 574*0Sstevel@tonic-gate htable_release(higher); 575*0Sstevel@tonic-gate } 576*0Sstevel@tonic-gate } 577*0Sstevel@tonic-gate } 578*0Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, -1); 579*0Sstevel@tonic-gate return (list); 580*0Sstevel@tonic-gate } 581*0Sstevel@tonic-gate 582*0Sstevel@tonic-gate 583*0Sstevel@tonic-gate /* 584*0Sstevel@tonic-gate * This is invoked from kmem when the system is low on memory. We try 585*0Sstevel@tonic-gate * to free hments, htables, and ptables to improve the memory situation. 586*0Sstevel@tonic-gate */ 587*0Sstevel@tonic-gate /*ARGSUSED*/ 588*0Sstevel@tonic-gate static void 589*0Sstevel@tonic-gate htable_reap(void *handle) 590*0Sstevel@tonic-gate { 591*0Sstevel@tonic-gate uint_t reap_cnt; 592*0Sstevel@tonic-gate htable_t *list; 593*0Sstevel@tonic-gate htable_t *ht; 594*0Sstevel@tonic-gate 595*0Sstevel@tonic-gate HATSTAT_INC(hs_reap_attempts); 596*0Sstevel@tonic-gate if (!can_steal_post_boot) 597*0Sstevel@tonic-gate return; 598*0Sstevel@tonic-gate 599*0Sstevel@tonic-gate /* 600*0Sstevel@tonic-gate * Try to reap 5% of the page tables bounded by a maximum of 601*0Sstevel@tonic-gate * 5% of physmem and a minimum of 10. 602*0Sstevel@tonic-gate */ 603*0Sstevel@tonic-gate reap_cnt = MIN(MAX(physmem / 20, active_ptables / 20), 10); 604*0Sstevel@tonic-gate 605*0Sstevel@tonic-gate /* 606*0Sstevel@tonic-gate * Let htable_steal() do the work, we just call htable_free() 607*0Sstevel@tonic-gate */ 608*0Sstevel@tonic-gate list = htable_steal(reap_cnt); 609*0Sstevel@tonic-gate while ((ht = list) != NULL) { 610*0Sstevel@tonic-gate list = ht->ht_next; 611*0Sstevel@tonic-gate HATSTAT_INC(hs_reaped); 612*0Sstevel@tonic-gate htable_free(ht); 613*0Sstevel@tonic-gate } 614*0Sstevel@tonic-gate 615*0Sstevel@tonic-gate /* 616*0Sstevel@tonic-gate * Free up excess reserves 617*0Sstevel@tonic-gate */ 618*0Sstevel@tonic-gate htable_adjust_reserve(); 619*0Sstevel@tonic-gate hment_adjust_reserve(); 620*0Sstevel@tonic-gate } 621*0Sstevel@tonic-gate 622*0Sstevel@tonic-gate /* 623*0Sstevel@tonic-gate * allocate an htable, stealing one or using the reserve if necessary 624*0Sstevel@tonic-gate */ 625*0Sstevel@tonic-gate static htable_t * 626*0Sstevel@tonic-gate htable_alloc( 627*0Sstevel@tonic-gate hat_t *hat, 628*0Sstevel@tonic-gate uintptr_t vaddr, 629*0Sstevel@tonic-gate level_t level, 630*0Sstevel@tonic-gate htable_t *shared) 631*0Sstevel@tonic-gate { 632*0Sstevel@tonic-gate htable_t *ht = NULL; 633*0Sstevel@tonic-gate uint_t is_vlp; 634*0Sstevel@tonic-gate uint_t is_bare = 0; 635*0Sstevel@tonic-gate uint_t need_to_zero = 1; 636*0Sstevel@tonic-gate int kmflags = (can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP); 637*0Sstevel@tonic-gate 638*0Sstevel@tonic-gate if (level < 0 || level > TOP_LEVEL(hat)) 639*0Sstevel@tonic-gate panic("htable_alloc(): level %d out of range\n", level); 640*0Sstevel@tonic-gate 641*0Sstevel@tonic-gate is_vlp = (hat->hat_flags & HAT_VLP) && level == VLP_LEVEL; 642*0Sstevel@tonic-gate if (is_vlp || shared != NULL) 643*0Sstevel@tonic-gate is_bare = 1; 644*0Sstevel@tonic-gate 645*0Sstevel@tonic-gate /* 646*0Sstevel@tonic-gate * First reuse a cached htable from the hat_ht_cached field, this 647*0Sstevel@tonic-gate * avoids unnecessary trips through kmem/page allocators. This is also 648*0Sstevel@tonic-gate * what happens during use_boot_reserve. 649*0Sstevel@tonic-gate */ 650*0Sstevel@tonic-gate if (hat->hat_ht_cached != NULL && !is_bare) { 651*0Sstevel@tonic-gate hat_enter(hat); 652*0Sstevel@tonic-gate ht = hat->hat_ht_cached; 653*0Sstevel@tonic-gate if (ht != NULL) { 654*0Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 655*0Sstevel@tonic-gate need_to_zero = 0; 656*0Sstevel@tonic-gate /* XX64 ASSERT() they're all zero somehow */ 657*0Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 658*0Sstevel@tonic-gate } 659*0Sstevel@tonic-gate hat_exit(hat); 660*0Sstevel@tonic-gate } 661*0Sstevel@tonic-gate 662*0Sstevel@tonic-gate if (ht == NULL) { 663*0Sstevel@tonic-gate ASSERT(!use_boot_reserve); 664*0Sstevel@tonic-gate /* 665*0Sstevel@tonic-gate * When allocating for hat_memload_arena, we use the reserve. 666*0Sstevel@tonic-gate * Also use reserves if we are in a panic(). 667*0Sstevel@tonic-gate */ 668*0Sstevel@tonic-gate if (curthread == hat_reserves_thread || panicstr != NULL) { 669*0Sstevel@tonic-gate ASSERT(panicstr != NULL || !is_bare); 670*0Sstevel@tonic-gate ASSERT(panicstr != NULL || 671*0Sstevel@tonic-gate curthread == hat_reserves_thread); 672*0Sstevel@tonic-gate ht = htable_get_reserve(); 673*0Sstevel@tonic-gate } else { 674*0Sstevel@tonic-gate /* 675*0Sstevel@tonic-gate * Donate successful htable allocations to the reserve. 676*0Sstevel@tonic-gate */ 677*0Sstevel@tonic-gate for (;;) { 678*0Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 679*0Sstevel@tonic-gate ht = kmem_cache_alloc(htable_cache, kmflags); 680*0Sstevel@tonic-gate if (ht == NULL) 681*0Sstevel@tonic-gate break; 682*0Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 683*0Sstevel@tonic-gate if (curthread == hat_reserves_thread || 684*0Sstevel@tonic-gate panicstr != NULL || 685*0Sstevel@tonic-gate htable_reserve_cnt >= htable_reserve_amount) 686*0Sstevel@tonic-gate break; 687*0Sstevel@tonic-gate htable_put_reserve(ht); 688*0Sstevel@tonic-gate } 689*0Sstevel@tonic-gate } 690*0Sstevel@tonic-gate 691*0Sstevel@tonic-gate /* 692*0Sstevel@tonic-gate * allocate a page for the hardware page table if needed 693*0Sstevel@tonic-gate */ 694*0Sstevel@tonic-gate if (ht != NULL && !is_bare) { 695*0Sstevel@tonic-gate ptable_alloc(ht); 696*0Sstevel@tonic-gate if (ht->ht_pfn == PFN_INVALID) { 697*0Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 698*0Sstevel@tonic-gate ht = NULL; 699*0Sstevel@tonic-gate } 700*0Sstevel@tonic-gate } 701*0Sstevel@tonic-gate } 702*0Sstevel@tonic-gate 703*0Sstevel@tonic-gate /* 704*0Sstevel@tonic-gate * if allocations failed resort to stealing 705*0Sstevel@tonic-gate */ 706*0Sstevel@tonic-gate if (ht == NULL && can_steal_post_boot) { 707*0Sstevel@tonic-gate ht = htable_steal(1); 708*0Sstevel@tonic-gate HATSTAT_INC(hs_steals); 709*0Sstevel@tonic-gate 710*0Sstevel@tonic-gate /* 711*0Sstevel@tonic-gate * if we had to steal for a bare htable, release the 712*0Sstevel@tonic-gate * page for the pagetable 713*0Sstevel@tonic-gate */ 714*0Sstevel@tonic-gate if (ht != NULL && is_bare) 715*0Sstevel@tonic-gate ptable_free(ht); 716*0Sstevel@tonic-gate } 717*0Sstevel@tonic-gate 718*0Sstevel@tonic-gate /* 719*0Sstevel@tonic-gate * All attempts to allocate or steal failed... 720*0Sstevel@tonic-gate */ 721*0Sstevel@tonic-gate if (ht == NULL) 722*0Sstevel@tonic-gate panic("htable_alloc(): couldn't steal\n"); 723*0Sstevel@tonic-gate 724*0Sstevel@tonic-gate /* 725*0Sstevel@tonic-gate * Shared page tables have all entries locked and entries may not 726*0Sstevel@tonic-gate * be added or deleted. 727*0Sstevel@tonic-gate */ 728*0Sstevel@tonic-gate ht->ht_flags = 0; 729*0Sstevel@tonic-gate if (shared != NULL) { 730*0Sstevel@tonic-gate ASSERT(level == 0); 731*0Sstevel@tonic-gate ASSERT(shared->ht_valid_cnt > 0); 732*0Sstevel@tonic-gate ht->ht_flags |= HTABLE_SHARED_PFN; 733*0Sstevel@tonic-gate ht->ht_pfn = shared->ht_pfn; 734*0Sstevel@tonic-gate ht->ht_lock_cnt = 0; 735*0Sstevel@tonic-gate ht->ht_valid_cnt = 0; /* updated in hat_share() */ 736*0Sstevel@tonic-gate ht->ht_shares = shared; 737*0Sstevel@tonic-gate need_to_zero = 0; 738*0Sstevel@tonic-gate } else { 739*0Sstevel@tonic-gate ht->ht_shares = NULL; 740*0Sstevel@tonic-gate ht->ht_lock_cnt = 0; 741*0Sstevel@tonic-gate ht->ht_valid_cnt = 0; 742*0Sstevel@tonic-gate } 743*0Sstevel@tonic-gate 744*0Sstevel@tonic-gate /* 745*0Sstevel@tonic-gate * setup flags, etc. for VLP htables 746*0Sstevel@tonic-gate */ 747*0Sstevel@tonic-gate if (is_vlp) { 748*0Sstevel@tonic-gate ht->ht_flags |= HTABLE_VLP; 749*0Sstevel@tonic-gate ht->ht_num_ptes = VLP_NUM_PTES; 750*0Sstevel@tonic-gate ASSERT(ht->ht_pfn == PFN_INVALID); 751*0Sstevel@tonic-gate need_to_zero = 0; 752*0Sstevel@tonic-gate } else if (level == mmu.max_level) { 753*0Sstevel@tonic-gate ht->ht_num_ptes = mmu.top_level_count; 754*0Sstevel@tonic-gate } else { 755*0Sstevel@tonic-gate ht->ht_num_ptes = mmu.ptes_per_table; 756*0Sstevel@tonic-gate } 757*0Sstevel@tonic-gate 758*0Sstevel@tonic-gate /* 759*0Sstevel@tonic-gate * fill in the htable 760*0Sstevel@tonic-gate */ 761*0Sstevel@tonic-gate ht->ht_hat = hat; 762*0Sstevel@tonic-gate ht->ht_parent = NULL; 763*0Sstevel@tonic-gate ht->ht_vaddr = vaddr; 764*0Sstevel@tonic-gate ht->ht_level = level; 765*0Sstevel@tonic-gate ht->ht_busy = 1; 766*0Sstevel@tonic-gate ht->ht_next = NULL; 767*0Sstevel@tonic-gate ht->ht_prev = NULL; 768*0Sstevel@tonic-gate 769*0Sstevel@tonic-gate /* 770*0Sstevel@tonic-gate * Zero out any freshly allocated page table 771*0Sstevel@tonic-gate */ 772*0Sstevel@tonic-gate if (need_to_zero) 773*0Sstevel@tonic-gate x86pte_zero(ht, 0, mmu.ptes_per_table); 774*0Sstevel@tonic-gate return (ht); 775*0Sstevel@tonic-gate } 776*0Sstevel@tonic-gate 777*0Sstevel@tonic-gate /* 778*0Sstevel@tonic-gate * Free up an htable, either to a hat's cached list, the reserves or 779*0Sstevel@tonic-gate * back to kmem. 780*0Sstevel@tonic-gate */ 781*0Sstevel@tonic-gate static void 782*0Sstevel@tonic-gate htable_free(htable_t *ht) 783*0Sstevel@tonic-gate { 784*0Sstevel@tonic-gate hat_t *hat = ht->ht_hat; 785*0Sstevel@tonic-gate 786*0Sstevel@tonic-gate /* 787*0Sstevel@tonic-gate * If the process isn't exiting, cache the free htable in the hat 788*0Sstevel@tonic-gate * structure. We always do this for the boot reserve. We don't 789*0Sstevel@tonic-gate * do this if the hat is exiting or we are stealing/reaping htables. 790*0Sstevel@tonic-gate */ 791*0Sstevel@tonic-gate if (hat != NULL && 792*0Sstevel@tonic-gate !(ht->ht_flags & HTABLE_SHARED_PFN) && 793*0Sstevel@tonic-gate (use_boot_reserve || 794*0Sstevel@tonic-gate (!(hat->hat_flags & HAT_FREEING) && !htable_dont_cache))) { 795*0Sstevel@tonic-gate ASSERT((ht->ht_flags & HTABLE_VLP) == 0); 796*0Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 797*0Sstevel@tonic-gate hat_enter(hat); 798*0Sstevel@tonic-gate ht->ht_next = hat->hat_ht_cached; 799*0Sstevel@tonic-gate hat->hat_ht_cached = ht; 800*0Sstevel@tonic-gate hat_exit(hat); 801*0Sstevel@tonic-gate return; 802*0Sstevel@tonic-gate } 803*0Sstevel@tonic-gate 804*0Sstevel@tonic-gate /* 805*0Sstevel@tonic-gate * If we have a hardware page table, free it. 806*0Sstevel@tonic-gate * We don't free page tables that are accessed by sharing someone else. 807*0Sstevel@tonic-gate */ 808*0Sstevel@tonic-gate if (ht->ht_flags & HTABLE_SHARED_PFN) { 809*0Sstevel@tonic-gate ASSERT(ht->ht_pfn != PFN_INVALID); 810*0Sstevel@tonic-gate ht->ht_pfn = PFN_INVALID; 811*0Sstevel@tonic-gate } else if (!(ht->ht_flags & HTABLE_VLP)) { 812*0Sstevel@tonic-gate ptable_free(ht); 813*0Sstevel@tonic-gate } 814*0Sstevel@tonic-gate 815*0Sstevel@tonic-gate /* 816*0Sstevel@tonic-gate * If we are the thread using the reserves, put free htables 817*0Sstevel@tonic-gate * into reserves. 818*0Sstevel@tonic-gate */ 819*0Sstevel@tonic-gate if (curthread == hat_reserves_thread || 820*0Sstevel@tonic-gate htable_reserve_cnt < htable_reserve_amount) 821*0Sstevel@tonic-gate htable_put_reserve(ht); 822*0Sstevel@tonic-gate else 823*0Sstevel@tonic-gate kmem_cache_free(htable_cache, ht); 824*0Sstevel@tonic-gate } 825*0Sstevel@tonic-gate 826*0Sstevel@tonic-gate 827*0Sstevel@tonic-gate /* 828*0Sstevel@tonic-gate * This is called when a hat is being destroyed or swapped out. We reap all 829*0Sstevel@tonic-gate * the remaining htables in the hat cache. If destroying all left over 830*0Sstevel@tonic-gate * htables are also destroyed. 831*0Sstevel@tonic-gate * 832*0Sstevel@tonic-gate * We also don't need to invalidate any of the PTPs nor do any demapping. 833*0Sstevel@tonic-gate */ 834*0Sstevel@tonic-gate void 835*0Sstevel@tonic-gate htable_purge_hat(hat_t *hat) 836*0Sstevel@tonic-gate { 837*0Sstevel@tonic-gate htable_t *ht; 838*0Sstevel@tonic-gate int h; 839*0Sstevel@tonic-gate 840*0Sstevel@tonic-gate /* 841*0Sstevel@tonic-gate * Purge the htable cache if just reaping. 842*0Sstevel@tonic-gate */ 843*0Sstevel@tonic-gate if (!(hat->hat_flags & HAT_FREEING)) { 844*0Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, 1); 845*0Sstevel@tonic-gate for (;;) { 846*0Sstevel@tonic-gate hat_enter(hat); 847*0Sstevel@tonic-gate ht = hat->hat_ht_cached; 848*0Sstevel@tonic-gate if (ht == NULL) { 849*0Sstevel@tonic-gate hat_exit(hat); 850*0Sstevel@tonic-gate break; 851*0Sstevel@tonic-gate } 852*0Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 853*0Sstevel@tonic-gate hat_exit(hat); 854*0Sstevel@tonic-gate htable_free(ht); 855*0Sstevel@tonic-gate } 856*0Sstevel@tonic-gate atomic_add_32(&htable_dont_cache, -1); 857*0Sstevel@tonic-gate return; 858*0Sstevel@tonic-gate } 859*0Sstevel@tonic-gate 860*0Sstevel@tonic-gate /* 861*0Sstevel@tonic-gate * if freeing, no locking is needed 862*0Sstevel@tonic-gate */ 863*0Sstevel@tonic-gate while ((ht = hat->hat_ht_cached) != NULL) { 864*0Sstevel@tonic-gate hat->hat_ht_cached = ht->ht_next; 865*0Sstevel@tonic-gate htable_free(ht); 866*0Sstevel@tonic-gate } 867*0Sstevel@tonic-gate 868*0Sstevel@tonic-gate /* 869*0Sstevel@tonic-gate * walk thru the htable hash table and free all the htables in it. 870*0Sstevel@tonic-gate */ 871*0Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash; ++h) { 872*0Sstevel@tonic-gate while ((ht = hat->hat_ht_hash[h]) != NULL) { 873*0Sstevel@tonic-gate if (ht->ht_next) 874*0Sstevel@tonic-gate ht->ht_next->ht_prev = ht->ht_prev; 875*0Sstevel@tonic-gate 876*0Sstevel@tonic-gate if (ht->ht_prev) { 877*0Sstevel@tonic-gate ht->ht_prev->ht_next = ht->ht_next; 878*0Sstevel@tonic-gate } else { 879*0Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[h] == ht); 880*0Sstevel@tonic-gate hat->hat_ht_hash[h] = ht->ht_next; 881*0Sstevel@tonic-gate } 882*0Sstevel@tonic-gate htable_free(ht); 883*0Sstevel@tonic-gate } 884*0Sstevel@tonic-gate } 885*0Sstevel@tonic-gate } 886*0Sstevel@tonic-gate 887*0Sstevel@tonic-gate /* 888*0Sstevel@tonic-gate * Unlink an entry for a table at vaddr and level out of the existing table 889*0Sstevel@tonic-gate * one level higher. We are always holding the HASH_ENTER() when doing this. 890*0Sstevel@tonic-gate */ 891*0Sstevel@tonic-gate static void 892*0Sstevel@tonic-gate unlink_ptp(htable_t *higher, htable_t *old, uintptr_t vaddr) 893*0Sstevel@tonic-gate { 894*0Sstevel@tonic-gate uint_t entry = htable_va2entry(vaddr, higher); 895*0Sstevel@tonic-gate x86pte_t expect = MAKEPTP(old->ht_pfn, old->ht_level); 896*0Sstevel@tonic-gate x86pte_t found; 897*0Sstevel@tonic-gate 898*0Sstevel@tonic-gate ASSERT(higher->ht_busy > 0); 899*0Sstevel@tonic-gate ASSERT(higher->ht_valid_cnt > 0); 900*0Sstevel@tonic-gate ASSERT(old->ht_valid_cnt == 0); 901*0Sstevel@tonic-gate found = x86pte_cas(higher, entry, expect, 0); 902*0Sstevel@tonic-gate if (found != expect) 903*0Sstevel@tonic-gate panic("Bad PTP found=" FMT_PTE ", expected=" FMT_PTE, 904*0Sstevel@tonic-gate found, expect); 905*0Sstevel@tonic-gate HTABLE_DEC(higher->ht_valid_cnt); 906*0Sstevel@tonic-gate } 907*0Sstevel@tonic-gate 908*0Sstevel@tonic-gate /* 909*0Sstevel@tonic-gate * Link an entry for a new table at vaddr and level into the existing table 910*0Sstevel@tonic-gate * one level higher. We are always holding the HASH_ENTER() when doing this. 911*0Sstevel@tonic-gate */ 912*0Sstevel@tonic-gate static void 913*0Sstevel@tonic-gate link_ptp(htable_t *higher, htable_t *new, uintptr_t vaddr) 914*0Sstevel@tonic-gate { 915*0Sstevel@tonic-gate uint_t entry = htable_va2entry(vaddr, higher); 916*0Sstevel@tonic-gate x86pte_t newptp = MAKEPTP(new->ht_pfn, new->ht_level); 917*0Sstevel@tonic-gate x86pte_t found; 918*0Sstevel@tonic-gate 919*0Sstevel@tonic-gate ASSERT(higher->ht_busy > 0); 920*0Sstevel@tonic-gate 921*0Sstevel@tonic-gate ASSERT(new->ht_level != mmu.max_level); 922*0Sstevel@tonic-gate 923*0Sstevel@tonic-gate HTABLE_INC(higher->ht_valid_cnt); 924*0Sstevel@tonic-gate 925*0Sstevel@tonic-gate found = x86pte_cas(higher, entry, 0, newptp); 926*0Sstevel@tonic-gate if (found != 0) 927*0Sstevel@tonic-gate panic("HAT: ptp not 0, found=" FMT_PTE, found); 928*0Sstevel@tonic-gate } 929*0Sstevel@tonic-gate 930*0Sstevel@tonic-gate /* 931*0Sstevel@tonic-gate * Release of an htable. 932*0Sstevel@tonic-gate * 933*0Sstevel@tonic-gate * During process exit, some empty page tables are not unlinked - hat_free_end() 934*0Sstevel@tonic-gate * cleans them up. Upper level pagetable (mmu.max_page_level and higher) are 935*0Sstevel@tonic-gate * only released during hat_free_end() or by htable_steal(). We always 936*0Sstevel@tonic-gate * release SHARED page tables. 937*0Sstevel@tonic-gate */ 938*0Sstevel@tonic-gate void 939*0Sstevel@tonic-gate htable_release(htable_t *ht) 940*0Sstevel@tonic-gate { 941*0Sstevel@tonic-gate uint_t hashval; 942*0Sstevel@tonic-gate htable_t *shared; 943*0Sstevel@tonic-gate htable_t *higher; 944*0Sstevel@tonic-gate hat_t *hat; 945*0Sstevel@tonic-gate uintptr_t va; 946*0Sstevel@tonic-gate level_t level; 947*0Sstevel@tonic-gate 948*0Sstevel@tonic-gate while (ht != NULL) { 949*0Sstevel@tonic-gate shared = NULL; 950*0Sstevel@tonic-gate for (;;) { 951*0Sstevel@tonic-gate hat = ht->ht_hat; 952*0Sstevel@tonic-gate va = ht->ht_vaddr; 953*0Sstevel@tonic-gate level = ht->ht_level; 954*0Sstevel@tonic-gate hashval = HTABLE_HASH(hat, va, level); 955*0Sstevel@tonic-gate 956*0Sstevel@tonic-gate /* 957*0Sstevel@tonic-gate * The common case is that this isn't the last use of 958*0Sstevel@tonic-gate * an htable so we don't want to free the htable. 959*0Sstevel@tonic-gate */ 960*0Sstevel@tonic-gate HTABLE_ENTER(hashval); 961*0Sstevel@tonic-gate ASSERT(ht->ht_lock_cnt == 0 || ht->ht_valid_cnt > 0); 962*0Sstevel@tonic-gate ASSERT(ht->ht_valid_cnt >= 0); 963*0Sstevel@tonic-gate ASSERT(ht->ht_busy > 0); 964*0Sstevel@tonic-gate if (ht->ht_valid_cnt > 0) 965*0Sstevel@tonic-gate break; 966*0Sstevel@tonic-gate if (ht->ht_busy > 1) 967*0Sstevel@tonic-gate break; 968*0Sstevel@tonic-gate 969*0Sstevel@tonic-gate /* 970*0Sstevel@tonic-gate * we always release empty shared htables 971*0Sstevel@tonic-gate */ 972*0Sstevel@tonic-gate if (!(ht->ht_flags & HTABLE_SHARED_PFN)) { 973*0Sstevel@tonic-gate 974*0Sstevel@tonic-gate /* 975*0Sstevel@tonic-gate * don't release if in address space tear down 976*0Sstevel@tonic-gate */ 977*0Sstevel@tonic-gate if (hat->hat_flags & HAT_FREEING) 978*0Sstevel@tonic-gate break; 979*0Sstevel@tonic-gate 980*0Sstevel@tonic-gate /* 981*0Sstevel@tonic-gate * At and above max_page_level, free if it's for 982*0Sstevel@tonic-gate * a boot-time kernel mapping below kernelbase. 983*0Sstevel@tonic-gate */ 984*0Sstevel@tonic-gate if (level >= mmu.max_page_level && 985*0Sstevel@tonic-gate (hat != kas.a_hat || va >= kernelbase)) 986*0Sstevel@tonic-gate break; 987*0Sstevel@tonic-gate } 988*0Sstevel@tonic-gate 989*0Sstevel@tonic-gate /* 990*0Sstevel@tonic-gate * remember if we destroy an htable that shares its PFN 991*0Sstevel@tonic-gate * from elsewhere 992*0Sstevel@tonic-gate */ 993*0Sstevel@tonic-gate if (ht->ht_flags & HTABLE_SHARED_PFN) { 994*0Sstevel@tonic-gate ASSERT(ht->ht_level == 0); 995*0Sstevel@tonic-gate ASSERT(shared == NULL); 996*0Sstevel@tonic-gate shared = ht->ht_shares; 997*0Sstevel@tonic-gate HATSTAT_INC(hs_htable_unshared); 998*0Sstevel@tonic-gate } 999*0Sstevel@tonic-gate 1000*0Sstevel@tonic-gate /* 1001*0Sstevel@tonic-gate * Handle release of a table and freeing the htable_t. 1002*0Sstevel@tonic-gate * Unlink it from the table higher (ie. ht_parent). 1003*0Sstevel@tonic-gate */ 1004*0Sstevel@tonic-gate ASSERT(ht->ht_lock_cnt == 0); 1005*0Sstevel@tonic-gate higher = ht->ht_parent; 1006*0Sstevel@tonic-gate ASSERT(higher != NULL); 1007*0Sstevel@tonic-gate 1008*0Sstevel@tonic-gate /* 1009*0Sstevel@tonic-gate * Unlink the pagetable. 1010*0Sstevel@tonic-gate */ 1011*0Sstevel@tonic-gate unlink_ptp(higher, ht, va); 1012*0Sstevel@tonic-gate 1013*0Sstevel@tonic-gate /* 1014*0Sstevel@tonic-gate * When any top level VLP page table entry changes, we 1015*0Sstevel@tonic-gate * must issue a reload of cr3 on all processors. 1016*0Sstevel@tonic-gate */ 1017*0Sstevel@tonic-gate if ((hat->hat_flags & HAT_VLP) && 1018*0Sstevel@tonic-gate level == VLP_LEVEL - 1) 1019*0Sstevel@tonic-gate hat_demap(hat, DEMAP_ALL_ADDR); 1020*0Sstevel@tonic-gate 1021*0Sstevel@tonic-gate /* 1022*0Sstevel@tonic-gate * remove this htable from its hash list 1023*0Sstevel@tonic-gate */ 1024*0Sstevel@tonic-gate if (ht->ht_next) 1025*0Sstevel@tonic-gate ht->ht_next->ht_prev = ht->ht_prev; 1026*0Sstevel@tonic-gate 1027*0Sstevel@tonic-gate if (ht->ht_prev) { 1028*0Sstevel@tonic-gate ht->ht_prev->ht_next = ht->ht_next; 1029*0Sstevel@tonic-gate } else { 1030*0Sstevel@tonic-gate ASSERT(hat->hat_ht_hash[hashval] == ht); 1031*0Sstevel@tonic-gate hat->hat_ht_hash[hashval] = ht->ht_next; 1032*0Sstevel@tonic-gate } 1033*0Sstevel@tonic-gate HTABLE_EXIT(hashval); 1034*0Sstevel@tonic-gate htable_free(ht); 1035*0Sstevel@tonic-gate ht = higher; 1036*0Sstevel@tonic-gate } 1037*0Sstevel@tonic-gate 1038*0Sstevel@tonic-gate ASSERT(ht->ht_busy >= 1); 1039*0Sstevel@tonic-gate --ht->ht_busy; 1040*0Sstevel@tonic-gate HTABLE_EXIT(hashval); 1041*0Sstevel@tonic-gate 1042*0Sstevel@tonic-gate /* 1043*0Sstevel@tonic-gate * If we released a shared htable, do a release on the htable 1044*0Sstevel@tonic-gate * from which it shared 1045*0Sstevel@tonic-gate */ 1046*0Sstevel@tonic-gate ht = shared; 1047*0Sstevel@tonic-gate } 1048*0Sstevel@tonic-gate } 1049*0Sstevel@tonic-gate 1050*0Sstevel@tonic-gate /* 1051*0Sstevel@tonic-gate * Find the htable for the pagetable at the given level for the given address. 1052*0Sstevel@tonic-gate * If found acquires a hold that eventually needs to be htable_release()d 1053*0Sstevel@tonic-gate */ 1054*0Sstevel@tonic-gate htable_t * 1055*0Sstevel@tonic-gate htable_lookup(hat_t *hat, uintptr_t vaddr, level_t level) 1056*0Sstevel@tonic-gate { 1057*0Sstevel@tonic-gate uintptr_t base; 1058*0Sstevel@tonic-gate uint_t hashval; 1059*0Sstevel@tonic-gate htable_t *ht = NULL; 1060*0Sstevel@tonic-gate 1061*0Sstevel@tonic-gate ASSERT(level >= 0); 1062*0Sstevel@tonic-gate ASSERT(level <= TOP_LEVEL(hat)); 1063*0Sstevel@tonic-gate 1064*0Sstevel@tonic-gate if (level == TOP_LEVEL(hat)) 1065*0Sstevel@tonic-gate base = 0; 1066*0Sstevel@tonic-gate else 1067*0Sstevel@tonic-gate base = vaddr & LEVEL_MASK(level + 1); 1068*0Sstevel@tonic-gate 1069*0Sstevel@tonic-gate hashval = HTABLE_HASH(hat, base, level); 1070*0Sstevel@tonic-gate HTABLE_ENTER(hashval); 1071*0Sstevel@tonic-gate for (ht = hat->hat_ht_hash[hashval]; ht; ht = ht->ht_next) { 1072*0Sstevel@tonic-gate if (ht->ht_hat == hat && 1073*0Sstevel@tonic-gate ht->ht_vaddr == base && 1074*0Sstevel@tonic-gate ht->ht_level == level) 1075*0Sstevel@tonic-gate break; 1076*0Sstevel@tonic-gate } 1077*0Sstevel@tonic-gate if (ht) 1078*0Sstevel@tonic-gate ++ht->ht_busy; 1079*0Sstevel@tonic-gate 1080*0Sstevel@tonic-gate HTABLE_EXIT(hashval); 1081*0Sstevel@tonic-gate return (ht); 1082*0Sstevel@tonic-gate } 1083*0Sstevel@tonic-gate 1084*0Sstevel@tonic-gate /* 1085*0Sstevel@tonic-gate * Acquires a hold on a known htable (from a locked hment entry). 1086*0Sstevel@tonic-gate */ 1087*0Sstevel@tonic-gate void 1088*0Sstevel@tonic-gate htable_acquire(htable_t *ht) 1089*0Sstevel@tonic-gate { 1090*0Sstevel@tonic-gate hat_t *hat = ht->ht_hat; 1091*0Sstevel@tonic-gate level_t level = ht->ht_level; 1092*0Sstevel@tonic-gate uintptr_t base = ht->ht_vaddr; 1093*0Sstevel@tonic-gate uint_t hashval = HTABLE_HASH(hat, base, level); 1094*0Sstevel@tonic-gate 1095*0Sstevel@tonic-gate HTABLE_ENTER(hashval); 1096*0Sstevel@tonic-gate #ifdef DEBUG 1097*0Sstevel@tonic-gate /* 1098*0Sstevel@tonic-gate * make sure the htable is there 1099*0Sstevel@tonic-gate */ 1100*0Sstevel@tonic-gate { 1101*0Sstevel@tonic-gate htable_t *h; 1102*0Sstevel@tonic-gate 1103*0Sstevel@tonic-gate for (h = hat->hat_ht_hash[hashval]; 1104*0Sstevel@tonic-gate h && h != ht; 1105*0Sstevel@tonic-gate h = h->ht_next) 1106*0Sstevel@tonic-gate ; 1107*0Sstevel@tonic-gate ASSERT(h == ht); 1108*0Sstevel@tonic-gate } 1109*0Sstevel@tonic-gate #endif /* DEBUG */ 1110*0Sstevel@tonic-gate ++ht->ht_busy; 1111*0Sstevel@tonic-gate HTABLE_EXIT(hashval); 1112*0Sstevel@tonic-gate } 1113*0Sstevel@tonic-gate 1114*0Sstevel@tonic-gate /* 1115*0Sstevel@tonic-gate * Find the htable for the pagetable at the given level for the given address. 1116*0Sstevel@tonic-gate * If found acquires a hold that eventually needs to be htable_release()d 1117*0Sstevel@tonic-gate * If not found the table is created. 1118*0Sstevel@tonic-gate * 1119*0Sstevel@tonic-gate * Since we can't hold a hash table mutex during allocation, we have to 1120*0Sstevel@tonic-gate * drop it and redo the search on a create. Then we may have to free the newly 1121*0Sstevel@tonic-gate * allocated htable if another thread raced in and created it ahead of us. 1122*0Sstevel@tonic-gate */ 1123*0Sstevel@tonic-gate htable_t * 1124*0Sstevel@tonic-gate htable_create( 1125*0Sstevel@tonic-gate hat_t *hat, 1126*0Sstevel@tonic-gate uintptr_t vaddr, 1127*0Sstevel@tonic-gate level_t level, 1128*0Sstevel@tonic-gate htable_t *shared) 1129*0Sstevel@tonic-gate { 1130*0Sstevel@tonic-gate uint_t h; 1131*0Sstevel@tonic-gate level_t l; 1132*0Sstevel@tonic-gate uintptr_t base; 1133*0Sstevel@tonic-gate htable_t *ht; 1134*0Sstevel@tonic-gate htable_t *higher = NULL; 1135*0Sstevel@tonic-gate htable_t *new = NULL; 1136*0Sstevel@tonic-gate 1137*0Sstevel@tonic-gate if (level < 0 || level > TOP_LEVEL(hat)) 1138*0Sstevel@tonic-gate panic("htable_create(): level %d out of range\n", level); 1139*0Sstevel@tonic-gate 1140*0Sstevel@tonic-gate /* 1141*0Sstevel@tonic-gate * Create the page tables in top down order. 1142*0Sstevel@tonic-gate */ 1143*0Sstevel@tonic-gate for (l = TOP_LEVEL(hat); l >= level; --l) { 1144*0Sstevel@tonic-gate new = NULL; 1145*0Sstevel@tonic-gate if (l == TOP_LEVEL(hat)) 1146*0Sstevel@tonic-gate base = 0; 1147*0Sstevel@tonic-gate else 1148*0Sstevel@tonic-gate base = vaddr & LEVEL_MASK(l + 1); 1149*0Sstevel@tonic-gate 1150*0Sstevel@tonic-gate h = HTABLE_HASH(hat, base, l); 1151*0Sstevel@tonic-gate try_again: 1152*0Sstevel@tonic-gate /* 1153*0Sstevel@tonic-gate * look up the htable at this level 1154*0Sstevel@tonic-gate */ 1155*0Sstevel@tonic-gate HTABLE_ENTER(h); 1156*0Sstevel@tonic-gate if (l == TOP_LEVEL(hat)) { 1157*0Sstevel@tonic-gate ht = hat->hat_htable; 1158*0Sstevel@tonic-gate } else { 1159*0Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) { 1160*0Sstevel@tonic-gate ASSERT(ht->ht_hat == hat); 1161*0Sstevel@tonic-gate if (ht->ht_vaddr == base && 1162*0Sstevel@tonic-gate ht->ht_level == l) 1163*0Sstevel@tonic-gate break; 1164*0Sstevel@tonic-gate } 1165*0Sstevel@tonic-gate } 1166*0Sstevel@tonic-gate 1167*0Sstevel@tonic-gate /* 1168*0Sstevel@tonic-gate * if we found the htable, increment its busy cnt 1169*0Sstevel@tonic-gate * and if we had allocated a new htable, free it. 1170*0Sstevel@tonic-gate */ 1171*0Sstevel@tonic-gate if (ht != NULL) { 1172*0Sstevel@tonic-gate /* 1173*0Sstevel@tonic-gate * If we find a pre-existing shared table, it must 1174*0Sstevel@tonic-gate * share from the same place. 1175*0Sstevel@tonic-gate */ 1176*0Sstevel@tonic-gate if (l == level && shared && ht->ht_shares && 1177*0Sstevel@tonic-gate ht->ht_shares != shared) { 1178*0Sstevel@tonic-gate panic("htable shared from wrong place " 1179*0Sstevel@tonic-gate "found htable=%p shared=%p", ht, shared); 1180*0Sstevel@tonic-gate } 1181*0Sstevel@tonic-gate ++ht->ht_busy; 1182*0Sstevel@tonic-gate HTABLE_EXIT(h); 1183*0Sstevel@tonic-gate if (new) 1184*0Sstevel@tonic-gate htable_free(new); 1185*0Sstevel@tonic-gate if (higher != NULL) 1186*0Sstevel@tonic-gate htable_release(higher); 1187*0Sstevel@tonic-gate higher = ht; 1188*0Sstevel@tonic-gate 1189*0Sstevel@tonic-gate /* 1190*0Sstevel@tonic-gate * if we didn't find it on the first search 1191*0Sstevel@tonic-gate * allocate a new one and search again 1192*0Sstevel@tonic-gate */ 1193*0Sstevel@tonic-gate } else if (new == NULL) { 1194*0Sstevel@tonic-gate HTABLE_EXIT(h); 1195*0Sstevel@tonic-gate new = htable_alloc(hat, base, l, 1196*0Sstevel@tonic-gate l == level ? shared : NULL); 1197*0Sstevel@tonic-gate goto try_again; 1198*0Sstevel@tonic-gate 1199*0Sstevel@tonic-gate /* 1200*0Sstevel@tonic-gate * 2nd search and still not there, use "new" table 1201*0Sstevel@tonic-gate * Link new table into higher, when not at top level. 1202*0Sstevel@tonic-gate */ 1203*0Sstevel@tonic-gate } else { 1204*0Sstevel@tonic-gate ht = new; 1205*0Sstevel@tonic-gate if (higher != NULL) { 1206*0Sstevel@tonic-gate link_ptp(higher, ht, base); 1207*0Sstevel@tonic-gate ht->ht_parent = higher; 1208*0Sstevel@tonic-gate 1209*0Sstevel@tonic-gate /* 1210*0Sstevel@tonic-gate * When any top level VLP page table changes, 1211*0Sstevel@tonic-gate * we must reload cr3 on all processors. 1212*0Sstevel@tonic-gate */ 1213*0Sstevel@tonic-gate #ifdef __i386 1214*0Sstevel@tonic-gate if (mmu.pae_hat && 1215*0Sstevel@tonic-gate #else /* !__i386 */ 1216*0Sstevel@tonic-gate if ((hat->hat_flags & HAT_VLP) && 1217*0Sstevel@tonic-gate #endif /* __i386 */ 1218*0Sstevel@tonic-gate l == VLP_LEVEL - 1) 1219*0Sstevel@tonic-gate hat_demap(hat, DEMAP_ALL_ADDR); 1220*0Sstevel@tonic-gate } 1221*0Sstevel@tonic-gate ht->ht_next = hat->hat_ht_hash[h]; 1222*0Sstevel@tonic-gate ASSERT(ht->ht_prev == NULL); 1223*0Sstevel@tonic-gate if (hat->hat_ht_hash[h]) 1224*0Sstevel@tonic-gate hat->hat_ht_hash[h]->ht_prev = ht; 1225*0Sstevel@tonic-gate hat->hat_ht_hash[h] = ht; 1226*0Sstevel@tonic-gate HTABLE_EXIT(h); 1227*0Sstevel@tonic-gate 1228*0Sstevel@tonic-gate /* 1229*0Sstevel@tonic-gate * Note we don't do htable_release(higher). 1230*0Sstevel@tonic-gate * That happens recursively when "new" is removed by 1231*0Sstevel@tonic-gate * htable_release() or htable_steal(). 1232*0Sstevel@tonic-gate */ 1233*0Sstevel@tonic-gate higher = ht; 1234*0Sstevel@tonic-gate 1235*0Sstevel@tonic-gate /* 1236*0Sstevel@tonic-gate * If we just created a new shared page table we 1237*0Sstevel@tonic-gate * increment the shared htable's busy count, so that 1238*0Sstevel@tonic-gate * it can't be the victim of a steal even if it's empty. 1239*0Sstevel@tonic-gate */ 1240*0Sstevel@tonic-gate if (l == level && shared) { 1241*0Sstevel@tonic-gate (void) htable_lookup(shared->ht_hat, 1242*0Sstevel@tonic-gate shared->ht_vaddr, shared->ht_level); 1243*0Sstevel@tonic-gate HATSTAT_INC(hs_htable_shared); 1244*0Sstevel@tonic-gate } 1245*0Sstevel@tonic-gate } 1246*0Sstevel@tonic-gate } 1247*0Sstevel@tonic-gate 1248*0Sstevel@tonic-gate return (ht); 1249*0Sstevel@tonic-gate } 1250*0Sstevel@tonic-gate 1251*0Sstevel@tonic-gate /* 1252*0Sstevel@tonic-gate * Walk through a given htable looking for the first valid entry. This 1253*0Sstevel@tonic-gate * routine takes both a starting and ending address. The starting address 1254*0Sstevel@tonic-gate * is required to be within the htable provided by the caller, but there is 1255*0Sstevel@tonic-gate * no such restriction on the ending address. 1256*0Sstevel@tonic-gate * 1257*0Sstevel@tonic-gate * If the routine finds a valid entry in the htable (at or beyond the 1258*0Sstevel@tonic-gate * starting address), the PTE (and its address) will be returned. 1259*0Sstevel@tonic-gate * This PTE may correspond to either a page or a pagetable - it is the 1260*0Sstevel@tonic-gate * caller's responsibility to determine which. If no valid entry is 1261*0Sstevel@tonic-gate * found, 0 (and invalid PTE) and the next unexamined address will be 1262*0Sstevel@tonic-gate * returned. 1263*0Sstevel@tonic-gate * 1264*0Sstevel@tonic-gate * The loop has been carefully coded for optimization. 1265*0Sstevel@tonic-gate */ 1266*0Sstevel@tonic-gate static x86pte_t 1267*0Sstevel@tonic-gate htable_scan(htable_t *ht, uintptr_t *vap, uintptr_t eaddr) 1268*0Sstevel@tonic-gate { 1269*0Sstevel@tonic-gate uint_t e; 1270*0Sstevel@tonic-gate x86pte_t found_pte = (x86pte_t)0; 1271*0Sstevel@tonic-gate char *pte_ptr; 1272*0Sstevel@tonic-gate char *end_pte_ptr; 1273*0Sstevel@tonic-gate int l = ht->ht_level; 1274*0Sstevel@tonic-gate uintptr_t va = *vap & LEVEL_MASK(l); 1275*0Sstevel@tonic-gate size_t pgsize = LEVEL_SIZE(l); 1276*0Sstevel@tonic-gate 1277*0Sstevel@tonic-gate ASSERT(va >= ht->ht_vaddr); 1278*0Sstevel@tonic-gate ASSERT(va <= HTABLE_LAST_PAGE(ht)); 1279*0Sstevel@tonic-gate 1280*0Sstevel@tonic-gate /* 1281*0Sstevel@tonic-gate * Compute the starting index and ending virtual address 1282*0Sstevel@tonic-gate */ 1283*0Sstevel@tonic-gate e = htable_va2entry(va, ht); 1284*0Sstevel@tonic-gate 1285*0Sstevel@tonic-gate /* 1286*0Sstevel@tonic-gate * The following page table scan code knows that the valid 1287*0Sstevel@tonic-gate * bit of a PTE is in the lowest byte AND that x86 is little endian!! 1288*0Sstevel@tonic-gate */ 1289*0Sstevel@tonic-gate pte_ptr = (char *)x86pte_access_pagetable(ht); 1290*0Sstevel@tonic-gate end_pte_ptr = pte_ptr + (ht->ht_num_ptes << mmu.pte_size_shift); 1291*0Sstevel@tonic-gate pte_ptr += e << mmu.pte_size_shift; 1292*0Sstevel@tonic-gate while (*pte_ptr == 0) { 1293*0Sstevel@tonic-gate va += pgsize; 1294*0Sstevel@tonic-gate if (va >= eaddr) 1295*0Sstevel@tonic-gate break; 1296*0Sstevel@tonic-gate pte_ptr += mmu.pte_size; 1297*0Sstevel@tonic-gate ASSERT(pte_ptr <= end_pte_ptr); 1298*0Sstevel@tonic-gate if (pte_ptr == end_pte_ptr) 1299*0Sstevel@tonic-gate break; 1300*0Sstevel@tonic-gate } 1301*0Sstevel@tonic-gate 1302*0Sstevel@tonic-gate /* 1303*0Sstevel@tonic-gate * if we found a valid PTE, load the entire PTE 1304*0Sstevel@tonic-gate */ 1305*0Sstevel@tonic-gate if (va < eaddr && pte_ptr != end_pte_ptr) { 1306*0Sstevel@tonic-gate if (mmu.pae_hat) { 1307*0Sstevel@tonic-gate found_pte = *(x86pte_t *)pte_ptr; 1308*0Sstevel@tonic-gate #if defined(__i386) 1309*0Sstevel@tonic-gate /* 1310*0Sstevel@tonic-gate * 64 bit reads on 32 bit x86 are not atomic 1311*0Sstevel@tonic-gate */ 1312*0Sstevel@tonic-gate while (found_pte != *(volatile x86pte_t *)pte_ptr) 1313*0Sstevel@tonic-gate found_pte = *(volatile x86pte_t *)pte_ptr; 1314*0Sstevel@tonic-gate #endif 1315*0Sstevel@tonic-gate } else { 1316*0Sstevel@tonic-gate found_pte = *(x86pte32_t *)pte_ptr; 1317*0Sstevel@tonic-gate } 1318*0Sstevel@tonic-gate } 1319*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 1320*0Sstevel@tonic-gate 1321*0Sstevel@tonic-gate #if defined(__amd64) 1322*0Sstevel@tonic-gate /* 1323*0Sstevel@tonic-gate * deal with VA hole on amd64 1324*0Sstevel@tonic-gate */ 1325*0Sstevel@tonic-gate if (l == mmu.max_level && va >= mmu.hole_start && va <= mmu.hole_end) 1326*0Sstevel@tonic-gate va = mmu.hole_end + va - mmu.hole_start; 1327*0Sstevel@tonic-gate #endif /* __amd64 */ 1328*0Sstevel@tonic-gate 1329*0Sstevel@tonic-gate *vap = va; 1330*0Sstevel@tonic-gate return (found_pte); 1331*0Sstevel@tonic-gate } 1332*0Sstevel@tonic-gate 1333*0Sstevel@tonic-gate /* 1334*0Sstevel@tonic-gate * Find the address and htable for the first populated translation at or 1335*0Sstevel@tonic-gate * above the given virtual address. The caller may also specify an upper 1336*0Sstevel@tonic-gate * limit to the address range to search. Uses level information to quickly 1337*0Sstevel@tonic-gate * skip unpopulated sections of virtual address spaces. 1338*0Sstevel@tonic-gate * 1339*0Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable and virt addr 1340*0Sstevel@tonic-gate * and has a hold on the htable. 1341*0Sstevel@tonic-gate */ 1342*0Sstevel@tonic-gate x86pte_t 1343*0Sstevel@tonic-gate htable_walk( 1344*0Sstevel@tonic-gate struct hat *hat, 1345*0Sstevel@tonic-gate htable_t **htp, 1346*0Sstevel@tonic-gate uintptr_t *vaddr, 1347*0Sstevel@tonic-gate uintptr_t eaddr) 1348*0Sstevel@tonic-gate { 1349*0Sstevel@tonic-gate uintptr_t va = *vaddr; 1350*0Sstevel@tonic-gate htable_t *ht; 1351*0Sstevel@tonic-gate htable_t *prev = *htp; 1352*0Sstevel@tonic-gate level_t l; 1353*0Sstevel@tonic-gate level_t max_mapped_level; 1354*0Sstevel@tonic-gate x86pte_t pte; 1355*0Sstevel@tonic-gate 1356*0Sstevel@tonic-gate ASSERT(eaddr > va); 1357*0Sstevel@tonic-gate 1358*0Sstevel@tonic-gate /* 1359*0Sstevel@tonic-gate * If this is a user address, then we know we need not look beyond 1360*0Sstevel@tonic-gate * kernelbase. 1361*0Sstevel@tonic-gate */ 1362*0Sstevel@tonic-gate ASSERT(hat == kas.a_hat || eaddr <= kernelbase || 1363*0Sstevel@tonic-gate eaddr == HTABLE_WALK_TO_END); 1364*0Sstevel@tonic-gate if (hat != kas.a_hat && eaddr == HTABLE_WALK_TO_END) 1365*0Sstevel@tonic-gate eaddr = kernelbase; 1366*0Sstevel@tonic-gate 1367*0Sstevel@tonic-gate /* 1368*0Sstevel@tonic-gate * If we're coming in with a previous page table, search it first 1369*0Sstevel@tonic-gate * without doing an htable_lookup(), this should be frequent. 1370*0Sstevel@tonic-gate */ 1371*0Sstevel@tonic-gate if (prev) { 1372*0Sstevel@tonic-gate ASSERT(prev->ht_busy > 0); 1373*0Sstevel@tonic-gate ASSERT(prev->ht_vaddr <= va); 1374*0Sstevel@tonic-gate l = prev->ht_level; 1375*0Sstevel@tonic-gate if (va <= HTABLE_LAST_PAGE(prev)) { 1376*0Sstevel@tonic-gate pte = htable_scan(prev, &va, eaddr); 1377*0Sstevel@tonic-gate 1378*0Sstevel@tonic-gate if (PTE_ISPAGE(pte, l)) { 1379*0Sstevel@tonic-gate *vaddr = va; 1380*0Sstevel@tonic-gate *htp = prev; 1381*0Sstevel@tonic-gate return (pte); 1382*0Sstevel@tonic-gate } 1383*0Sstevel@tonic-gate } 1384*0Sstevel@tonic-gate 1385*0Sstevel@tonic-gate /* 1386*0Sstevel@tonic-gate * We found nothing in the htable provided by the caller, 1387*0Sstevel@tonic-gate * so fall through and do the full search 1388*0Sstevel@tonic-gate */ 1389*0Sstevel@tonic-gate htable_release(prev); 1390*0Sstevel@tonic-gate } 1391*0Sstevel@tonic-gate 1392*0Sstevel@tonic-gate /* 1393*0Sstevel@tonic-gate * Find the level of the largest pagesize used by this HAT. 1394*0Sstevel@tonic-gate */ 1395*0Sstevel@tonic-gate max_mapped_level = 0; 1396*0Sstevel@tonic-gate for (l = 1; l <= mmu.max_page_level; ++l) 1397*0Sstevel@tonic-gate if (hat->hat_pages_mapped[l] != 0) 1398*0Sstevel@tonic-gate max_mapped_level = l; 1399*0Sstevel@tonic-gate 1400*0Sstevel@tonic-gate while (va < eaddr && va >= *vaddr) { 1401*0Sstevel@tonic-gate ASSERT(!IN_VA_HOLE(va)); 1402*0Sstevel@tonic-gate 1403*0Sstevel@tonic-gate /* 1404*0Sstevel@tonic-gate * Find lowest table with any entry for given address. 1405*0Sstevel@tonic-gate */ 1406*0Sstevel@tonic-gate for (l = 0; l <= TOP_LEVEL(hat); ++l) { 1407*0Sstevel@tonic-gate ht = htable_lookup(hat, va, l); 1408*0Sstevel@tonic-gate if (ht != NULL) { 1409*0Sstevel@tonic-gate pte = htable_scan(ht, &va, eaddr); 1410*0Sstevel@tonic-gate if (PTE_ISPAGE(pte, l)) { 1411*0Sstevel@tonic-gate *vaddr = va; 1412*0Sstevel@tonic-gate *htp = ht; 1413*0Sstevel@tonic-gate return (pte); 1414*0Sstevel@tonic-gate } 1415*0Sstevel@tonic-gate htable_release(ht); 1416*0Sstevel@tonic-gate break; 1417*0Sstevel@tonic-gate } 1418*0Sstevel@tonic-gate 1419*0Sstevel@tonic-gate /* 1420*0Sstevel@tonic-gate * The ht is never NULL at the top level since 1421*0Sstevel@tonic-gate * the top level htable is created in hat_alloc(). 1422*0Sstevel@tonic-gate */ 1423*0Sstevel@tonic-gate ASSERT(l < TOP_LEVEL(hat)); 1424*0Sstevel@tonic-gate 1425*0Sstevel@tonic-gate /* 1426*0Sstevel@tonic-gate * No htable covers the address. If there is no 1427*0Sstevel@tonic-gate * larger page size that could cover it, we 1428*0Sstevel@tonic-gate * skip to the start of the next page table. 1429*0Sstevel@tonic-gate */ 1430*0Sstevel@tonic-gate if (l >= max_mapped_level) { 1431*0Sstevel@tonic-gate va = NEXT_ENTRY_VA(va, l + 1); 1432*0Sstevel@tonic-gate break; 1433*0Sstevel@tonic-gate } 1434*0Sstevel@tonic-gate } 1435*0Sstevel@tonic-gate } 1436*0Sstevel@tonic-gate 1437*0Sstevel@tonic-gate *vaddr = 0; 1438*0Sstevel@tonic-gate *htp = NULL; 1439*0Sstevel@tonic-gate return (0); 1440*0Sstevel@tonic-gate } 1441*0Sstevel@tonic-gate 1442*0Sstevel@tonic-gate /* 1443*0Sstevel@tonic-gate * Find the htable and page table entry index of the given virtual address 1444*0Sstevel@tonic-gate * with pagesize at or below given level. 1445*0Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable, sets 1446*0Sstevel@tonic-gate * entry, and has a hold on the htable. 1447*0Sstevel@tonic-gate */ 1448*0Sstevel@tonic-gate htable_t * 1449*0Sstevel@tonic-gate htable_getpte( 1450*0Sstevel@tonic-gate struct hat *hat, 1451*0Sstevel@tonic-gate uintptr_t vaddr, 1452*0Sstevel@tonic-gate uint_t *entry, 1453*0Sstevel@tonic-gate x86pte_t *pte, 1454*0Sstevel@tonic-gate level_t level) 1455*0Sstevel@tonic-gate { 1456*0Sstevel@tonic-gate htable_t *ht; 1457*0Sstevel@tonic-gate level_t l; 1458*0Sstevel@tonic-gate uint_t e; 1459*0Sstevel@tonic-gate 1460*0Sstevel@tonic-gate ASSERT(level <= mmu.max_page_level); 1461*0Sstevel@tonic-gate 1462*0Sstevel@tonic-gate for (l = 0; l <= level; ++l) { 1463*0Sstevel@tonic-gate ht = htable_lookup(hat, vaddr, l); 1464*0Sstevel@tonic-gate if (ht == NULL) 1465*0Sstevel@tonic-gate continue; 1466*0Sstevel@tonic-gate e = htable_va2entry(vaddr, ht); 1467*0Sstevel@tonic-gate if (entry != NULL) 1468*0Sstevel@tonic-gate *entry = e; 1469*0Sstevel@tonic-gate if (pte != NULL) 1470*0Sstevel@tonic-gate *pte = x86pte_get(ht, e); 1471*0Sstevel@tonic-gate return (ht); 1472*0Sstevel@tonic-gate } 1473*0Sstevel@tonic-gate return (NULL); 1474*0Sstevel@tonic-gate } 1475*0Sstevel@tonic-gate 1476*0Sstevel@tonic-gate /* 1477*0Sstevel@tonic-gate * Find the htable and page table entry index of the given virtual address. 1478*0Sstevel@tonic-gate * There must be a valid page mapped at the given address. 1479*0Sstevel@tonic-gate * If not found returns NULL. When found, returns the htable, sets 1480*0Sstevel@tonic-gate * entry, and has a hold on the htable. 1481*0Sstevel@tonic-gate */ 1482*0Sstevel@tonic-gate htable_t * 1483*0Sstevel@tonic-gate htable_getpage(struct hat *hat, uintptr_t vaddr, uint_t *entry) 1484*0Sstevel@tonic-gate { 1485*0Sstevel@tonic-gate htable_t *ht; 1486*0Sstevel@tonic-gate uint_t e; 1487*0Sstevel@tonic-gate x86pte_t pte; 1488*0Sstevel@tonic-gate 1489*0Sstevel@tonic-gate ht = htable_getpte(hat, vaddr, &e, &pte, mmu.max_page_level); 1490*0Sstevel@tonic-gate if (ht == NULL) 1491*0Sstevel@tonic-gate return (NULL); 1492*0Sstevel@tonic-gate 1493*0Sstevel@tonic-gate if (entry) 1494*0Sstevel@tonic-gate *entry = e; 1495*0Sstevel@tonic-gate 1496*0Sstevel@tonic-gate if (PTE_ISPAGE(pte, ht->ht_level)) 1497*0Sstevel@tonic-gate return (ht); 1498*0Sstevel@tonic-gate htable_release(ht); 1499*0Sstevel@tonic-gate return (NULL); 1500*0Sstevel@tonic-gate } 1501*0Sstevel@tonic-gate 1502*0Sstevel@tonic-gate 1503*0Sstevel@tonic-gate void 1504*0Sstevel@tonic-gate htable_init() 1505*0Sstevel@tonic-gate { 1506*0Sstevel@tonic-gate /* 1507*0Sstevel@tonic-gate * To save on kernel VA usage, we avoid debug information in 32 bit 1508*0Sstevel@tonic-gate * kernels. 1509*0Sstevel@tonic-gate */ 1510*0Sstevel@tonic-gate #if defined(__amd64) 1511*0Sstevel@tonic-gate int kmem_flags = KMC_NOHASH; 1512*0Sstevel@tonic-gate #elif defined(__i386) 1513*0Sstevel@tonic-gate int kmem_flags = KMC_NOHASH | KMC_NODEBUG; 1514*0Sstevel@tonic-gate #endif 1515*0Sstevel@tonic-gate 1516*0Sstevel@tonic-gate /* 1517*0Sstevel@tonic-gate * initialize kmem caches 1518*0Sstevel@tonic-gate */ 1519*0Sstevel@tonic-gate htable_cache = kmem_cache_create("htable_t", 1520*0Sstevel@tonic-gate sizeof (htable_t), 0, NULL, NULL, 1521*0Sstevel@tonic-gate htable_reap, NULL, hat_memload_arena, kmem_flags); 1522*0Sstevel@tonic-gate } 1523*0Sstevel@tonic-gate 1524*0Sstevel@tonic-gate /* 1525*0Sstevel@tonic-gate * get the pte index for the virtual address in the given htable's pagetable 1526*0Sstevel@tonic-gate */ 1527*0Sstevel@tonic-gate uint_t 1528*0Sstevel@tonic-gate htable_va2entry(uintptr_t va, htable_t *ht) 1529*0Sstevel@tonic-gate { 1530*0Sstevel@tonic-gate level_t l = ht->ht_level; 1531*0Sstevel@tonic-gate 1532*0Sstevel@tonic-gate ASSERT(va >= ht->ht_vaddr); 1533*0Sstevel@tonic-gate ASSERT(va <= HTABLE_LAST_PAGE(ht)); 1534*0Sstevel@tonic-gate return ((va >> LEVEL_SHIFT(l)) & (ht->ht_num_ptes - 1)); 1535*0Sstevel@tonic-gate } 1536*0Sstevel@tonic-gate 1537*0Sstevel@tonic-gate /* 1538*0Sstevel@tonic-gate * Given an htable and the index of a pte in it, return the virtual address 1539*0Sstevel@tonic-gate * of the page. 1540*0Sstevel@tonic-gate */ 1541*0Sstevel@tonic-gate uintptr_t 1542*0Sstevel@tonic-gate htable_e2va(htable_t *ht, uint_t entry) 1543*0Sstevel@tonic-gate { 1544*0Sstevel@tonic-gate level_t l = ht->ht_level; 1545*0Sstevel@tonic-gate uintptr_t va; 1546*0Sstevel@tonic-gate 1547*0Sstevel@tonic-gate ASSERT(entry < ht->ht_num_ptes); 1548*0Sstevel@tonic-gate va = ht->ht_vaddr + ((uintptr_t)entry << LEVEL_SHIFT(l)); 1549*0Sstevel@tonic-gate 1550*0Sstevel@tonic-gate /* 1551*0Sstevel@tonic-gate * Need to skip over any VA hole in top level table 1552*0Sstevel@tonic-gate */ 1553*0Sstevel@tonic-gate #if defined(__amd64) 1554*0Sstevel@tonic-gate if (ht->ht_level == mmu.max_level && va >= mmu.hole_start) 1555*0Sstevel@tonic-gate va += ((mmu.hole_end - mmu.hole_start) + 1); 1556*0Sstevel@tonic-gate #endif 1557*0Sstevel@tonic-gate 1558*0Sstevel@tonic-gate return (va); 1559*0Sstevel@tonic-gate } 1560*0Sstevel@tonic-gate 1561*0Sstevel@tonic-gate /* 1562*0Sstevel@tonic-gate * The code uses compare and swap instructions to read/write PTE's to 1563*0Sstevel@tonic-gate * avoid atomicity problems, since PTEs can be 8 bytes on 32 bit systems. 1564*0Sstevel@tonic-gate * Again this can be optimized on 64 bit systems, since aligned load/store 1565*0Sstevel@tonic-gate * will naturally be atomic. 1566*0Sstevel@tonic-gate * 1567*0Sstevel@tonic-gate * The combination of using kpreempt_disable()/_enable() and the hci_mutex 1568*0Sstevel@tonic-gate * are used to ensure that an interrupt won't overwrite a temporary mapping 1569*0Sstevel@tonic-gate * while it's in use. If an interrupt thread tries to access a PTE, it will 1570*0Sstevel@tonic-gate * yield briefly back to the pinned thread which holds the cpu's hci_mutex. 1571*0Sstevel@tonic-gate */ 1572*0Sstevel@tonic-gate 1573*0Sstevel@tonic-gate static struct hat_cpu_info init_hci; /* used for cpu 0 */ 1574*0Sstevel@tonic-gate 1575*0Sstevel@tonic-gate /* 1576*0Sstevel@tonic-gate * Initialize a CPU private window for mapping page tables. 1577*0Sstevel@tonic-gate * There will be 3 total pages of addressing needed: 1578*0Sstevel@tonic-gate * 1579*0Sstevel@tonic-gate * 1 for r/w access to pagetables 1580*0Sstevel@tonic-gate * 1 for r access when copying pagetables (hat_alloc) 1581*0Sstevel@tonic-gate * 1 that will map the PTEs for the 1st 2, so we can access them quickly 1582*0Sstevel@tonic-gate * 1583*0Sstevel@tonic-gate * We use vmem_xalloc() to get a correct alignment so that only one 1584*0Sstevel@tonic-gate * hat_mempte_setup() is needed. 1585*0Sstevel@tonic-gate */ 1586*0Sstevel@tonic-gate void 1587*0Sstevel@tonic-gate x86pte_cpu_init(cpu_t *cpu, void *pages) 1588*0Sstevel@tonic-gate { 1589*0Sstevel@tonic-gate struct hat_cpu_info *hci; 1590*0Sstevel@tonic-gate caddr_t va; 1591*0Sstevel@tonic-gate 1592*0Sstevel@tonic-gate /* 1593*0Sstevel@tonic-gate * We can't use kmem_alloc/vmem_alloc for the 1st CPU, as this is 1594*0Sstevel@tonic-gate * called before we've activated our own HAT 1595*0Sstevel@tonic-gate */ 1596*0Sstevel@tonic-gate if (pages != NULL) { 1597*0Sstevel@tonic-gate hci = &init_hci; 1598*0Sstevel@tonic-gate va = pages; 1599*0Sstevel@tonic-gate } else { 1600*0Sstevel@tonic-gate hci = kmem_alloc(sizeof (struct hat_cpu_info), KM_SLEEP); 1601*0Sstevel@tonic-gate va = vmem_xalloc(heap_arena, 3 * MMU_PAGESIZE, MMU_PAGESIZE, 0, 1602*0Sstevel@tonic-gate LEVEL_SIZE(1), NULL, NULL, VM_SLEEP); 1603*0Sstevel@tonic-gate } 1604*0Sstevel@tonic-gate mutex_init(&hci->hci_mutex, NULL, MUTEX_DEFAULT, NULL); 1605*0Sstevel@tonic-gate 1606*0Sstevel@tonic-gate /* 1607*0Sstevel@tonic-gate * If we are using segkpm, then there is no need for any of the 1608*0Sstevel@tonic-gate * mempte support. We can access the desired memory through a kpm 1609*0Sstevel@tonic-gate * mapping rather than setting up a temporary mempte mapping. 1610*0Sstevel@tonic-gate */ 1611*0Sstevel@tonic-gate if (kpm_enable == 0) { 1612*0Sstevel@tonic-gate hci->hci_mapped_pfn = PFN_INVALID; 1613*0Sstevel@tonic-gate 1614*0Sstevel@tonic-gate hci->hci_kernel_pte = 1615*0Sstevel@tonic-gate hat_mempte_kern_setup(va, va + (2 * MMU_PAGESIZE)); 1616*0Sstevel@tonic-gate hci->hci_pagetable_va = (void *)va; 1617*0Sstevel@tonic-gate } 1618*0Sstevel@tonic-gate 1619*0Sstevel@tonic-gate cpu->cpu_hat_info = hci; 1620*0Sstevel@tonic-gate } 1621*0Sstevel@tonic-gate 1622*0Sstevel@tonic-gate /* 1623*0Sstevel@tonic-gate * Macro to establish temporary mappings for x86pte_XXX routines. 1624*0Sstevel@tonic-gate */ 1625*0Sstevel@tonic-gate #define X86PTE_REMAP(addr, pte, index, perm, pfn) { \ 1626*0Sstevel@tonic-gate x86pte_t t; \ 1627*0Sstevel@tonic-gate \ 1628*0Sstevel@tonic-gate t = MAKEPTE((pfn), 0) | (perm) | mmu.pt_global | mmu.pt_nx;\ 1629*0Sstevel@tonic-gate if (mmu.pae_hat) \ 1630*0Sstevel@tonic-gate pte[index] = t; \ 1631*0Sstevel@tonic-gate else \ 1632*0Sstevel@tonic-gate ((x86pte32_t *)(pte))[index] = t; \ 1633*0Sstevel@tonic-gate mmu_tlbflush_entry((caddr_t)(addr)); \ 1634*0Sstevel@tonic-gate } 1635*0Sstevel@tonic-gate 1636*0Sstevel@tonic-gate /* 1637*0Sstevel@tonic-gate * Disable preemption and establish a mapping to the pagetable with the 1638*0Sstevel@tonic-gate * given pfn. This is optimized for there case where it's the same 1639*0Sstevel@tonic-gate * pfn as we last used referenced from this CPU. 1640*0Sstevel@tonic-gate */ 1641*0Sstevel@tonic-gate static x86pte_t * 1642*0Sstevel@tonic-gate x86pte_access_pagetable(htable_t *ht) 1643*0Sstevel@tonic-gate { 1644*0Sstevel@tonic-gate pfn_t pfn; 1645*0Sstevel@tonic-gate struct hat_cpu_info *hci; 1646*0Sstevel@tonic-gate 1647*0Sstevel@tonic-gate /* 1648*0Sstevel@tonic-gate * VLP pagetables are contained in the hat_t 1649*0Sstevel@tonic-gate */ 1650*0Sstevel@tonic-gate if (ht->ht_flags & HTABLE_VLP) 1651*0Sstevel@tonic-gate return (ht->ht_hat->hat_vlp_ptes); 1652*0Sstevel@tonic-gate 1653*0Sstevel@tonic-gate /* 1654*0Sstevel@tonic-gate * During early boot, use hat_boot_remap() of a page table adddress. 1655*0Sstevel@tonic-gate */ 1656*0Sstevel@tonic-gate pfn = ht->ht_pfn; 1657*0Sstevel@tonic-gate ASSERT(pfn != PFN_INVALID); 1658*0Sstevel@tonic-gate if (kpm_enable) 1659*0Sstevel@tonic-gate return ((x86pte_t *)hat_kpm_pfn2va(pfn)); 1660*0Sstevel@tonic-gate 1661*0Sstevel@tonic-gate if (!khat_running) { 1662*0Sstevel@tonic-gate (void) hat_boot_remap(ptable_va, pfn); 1663*0Sstevel@tonic-gate return ((x86pte_t *)ptable_va); 1664*0Sstevel@tonic-gate } 1665*0Sstevel@tonic-gate 1666*0Sstevel@tonic-gate /* 1667*0Sstevel@tonic-gate * Normally, disable preemption and grab the CPU's hci_mutex 1668*0Sstevel@tonic-gate */ 1669*0Sstevel@tonic-gate kpreempt_disable(); 1670*0Sstevel@tonic-gate hci = CPU->cpu_hat_info; 1671*0Sstevel@tonic-gate ASSERT(hci != NULL); 1672*0Sstevel@tonic-gate mutex_enter(&hci->hci_mutex); 1673*0Sstevel@tonic-gate if (hci->hci_mapped_pfn != pfn) { 1674*0Sstevel@tonic-gate /* 1675*0Sstevel@tonic-gate * The current mapping doesn't already point to this page. 1676*0Sstevel@tonic-gate * Update the CPU specific pagetable mapping to map the pfn. 1677*0Sstevel@tonic-gate */ 1678*0Sstevel@tonic-gate X86PTE_REMAP(hci->hci_pagetable_va, hci->hci_kernel_pte, 0, 1679*0Sstevel@tonic-gate PT_WRITABLE, pfn); 1680*0Sstevel@tonic-gate hci->hci_mapped_pfn = pfn; 1681*0Sstevel@tonic-gate } 1682*0Sstevel@tonic-gate return (hci->hci_pagetable_va); 1683*0Sstevel@tonic-gate } 1684*0Sstevel@tonic-gate 1685*0Sstevel@tonic-gate /* 1686*0Sstevel@tonic-gate * Release access to a page table. 1687*0Sstevel@tonic-gate */ 1688*0Sstevel@tonic-gate static void 1689*0Sstevel@tonic-gate x86pte_release_pagetable(htable_t *ht) 1690*0Sstevel@tonic-gate { 1691*0Sstevel@tonic-gate struct hat_cpu_info *hci; 1692*0Sstevel@tonic-gate 1693*0Sstevel@tonic-gate if (kpm_enable) 1694*0Sstevel@tonic-gate return; 1695*0Sstevel@tonic-gate 1696*0Sstevel@tonic-gate /* 1697*0Sstevel@tonic-gate * nothing to do for VLP htables 1698*0Sstevel@tonic-gate */ 1699*0Sstevel@tonic-gate if (ht->ht_flags & HTABLE_VLP) 1700*0Sstevel@tonic-gate return; 1701*0Sstevel@tonic-gate 1702*0Sstevel@tonic-gate /* 1703*0Sstevel@tonic-gate * During boot-up hat_kern_setup(), erase the boot loader remapping. 1704*0Sstevel@tonic-gate */ 1705*0Sstevel@tonic-gate if (!khat_running) { 1706*0Sstevel@tonic-gate hat_boot_demap(ptable_va); 1707*0Sstevel@tonic-gate return; 1708*0Sstevel@tonic-gate } 1709*0Sstevel@tonic-gate 1710*0Sstevel@tonic-gate /* 1711*0Sstevel@tonic-gate * Normal Operation: drop the CPU's hci_mutex and restore preemption 1712*0Sstevel@tonic-gate */ 1713*0Sstevel@tonic-gate hci = CPU->cpu_hat_info; 1714*0Sstevel@tonic-gate ASSERT(hci != NULL); 1715*0Sstevel@tonic-gate mutex_exit(&hci->hci_mutex); 1716*0Sstevel@tonic-gate kpreempt_enable(); 1717*0Sstevel@tonic-gate } 1718*0Sstevel@tonic-gate 1719*0Sstevel@tonic-gate /* 1720*0Sstevel@tonic-gate * Atomic retrieval of a pagetable entry 1721*0Sstevel@tonic-gate */ 1722*0Sstevel@tonic-gate x86pte_t 1723*0Sstevel@tonic-gate x86pte_get(htable_t *ht, uint_t entry) 1724*0Sstevel@tonic-gate { 1725*0Sstevel@tonic-gate x86pte_t pte; 1726*0Sstevel@tonic-gate x86pte32_t *pte32p; 1727*0Sstevel@tonic-gate volatile x86pte_t *ptep; 1728*0Sstevel@tonic-gate 1729*0Sstevel@tonic-gate /* 1730*0Sstevel@tonic-gate * 32 bit (non-pae) is always atomic. 1731*0Sstevel@tonic-gate * 64 bit is only atomic on 64 bit mode. 1732*0Sstevel@tonic-gate */ 1733*0Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1734*0Sstevel@tonic-gate if (mmu.pae_hat) { 1735*0Sstevel@tonic-gate pte = ptep[entry]; 1736*0Sstevel@tonic-gate #if defined(__i386) 1737*0Sstevel@tonic-gate while (pte != ptep[entry]) 1738*0Sstevel@tonic-gate pte = ptep[entry]; 1739*0Sstevel@tonic-gate #endif /* __i386 */ 1740*0Sstevel@tonic-gate } else { 1741*0Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1742*0Sstevel@tonic-gate pte = pte32p[entry]; 1743*0Sstevel@tonic-gate } 1744*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 1745*0Sstevel@tonic-gate return (pte); 1746*0Sstevel@tonic-gate } 1747*0Sstevel@tonic-gate 1748*0Sstevel@tonic-gate 1749*0Sstevel@tonic-gate /* 1750*0Sstevel@tonic-gate * Atomic unconditional set of a page table entry, it returns the previous 1751*0Sstevel@tonic-gate * value. 1752*0Sstevel@tonic-gate */ 1753*0Sstevel@tonic-gate x86pte_t 1754*0Sstevel@tonic-gate x86pte_set(htable_t *ht, uint_t entry, x86pte_t new, void *ptr) 1755*0Sstevel@tonic-gate { 1756*0Sstevel@tonic-gate x86pte_t old; 1757*0Sstevel@tonic-gate x86pte_t prev; 1758*0Sstevel@tonic-gate x86pte_t *ptep; 1759*0Sstevel@tonic-gate x86pte32_t *pte32p; 1760*0Sstevel@tonic-gate x86pte32_t n32, p32; 1761*0Sstevel@tonic-gate 1762*0Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 1763*0Sstevel@tonic-gate if (ptr == NULL) { 1764*0Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1765*0Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 1766*0Sstevel@tonic-gate } else { 1767*0Sstevel@tonic-gate ptep = ptr; 1768*0Sstevel@tonic-gate } 1769*0Sstevel@tonic-gate 1770*0Sstevel@tonic-gate if (mmu.pae_hat) { 1771*0Sstevel@tonic-gate for (;;) { 1772*0Sstevel@tonic-gate prev = *ptep; 1773*0Sstevel@tonic-gate if (prev == new) { 1774*0Sstevel@tonic-gate old = new; 1775*0Sstevel@tonic-gate break; 1776*0Sstevel@tonic-gate } 1777*0Sstevel@tonic-gate old = cas64(ptep, prev, new); 1778*0Sstevel@tonic-gate if (old == prev) 1779*0Sstevel@tonic-gate break; 1780*0Sstevel@tonic-gate } 1781*0Sstevel@tonic-gate } else { 1782*0Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1783*0Sstevel@tonic-gate n32 = new; 1784*0Sstevel@tonic-gate for (;;) { 1785*0Sstevel@tonic-gate p32 = *pte32p; 1786*0Sstevel@tonic-gate if (p32 == n32) { 1787*0Sstevel@tonic-gate old = new; 1788*0Sstevel@tonic-gate break; 1789*0Sstevel@tonic-gate } 1790*0Sstevel@tonic-gate old = cas32(pte32p, p32, n32); 1791*0Sstevel@tonic-gate if (old == p32) 1792*0Sstevel@tonic-gate break; 1793*0Sstevel@tonic-gate } 1794*0Sstevel@tonic-gate } 1795*0Sstevel@tonic-gate if (ptr == NULL) 1796*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 1797*0Sstevel@tonic-gate return (old); 1798*0Sstevel@tonic-gate } 1799*0Sstevel@tonic-gate 1800*0Sstevel@tonic-gate /* 1801*0Sstevel@tonic-gate * Atomic compare and swap of a page table entry. 1802*0Sstevel@tonic-gate */ 1803*0Sstevel@tonic-gate static x86pte_t 1804*0Sstevel@tonic-gate x86pte_cas(htable_t *ht, uint_t entry, x86pte_t old, x86pte_t new) 1805*0Sstevel@tonic-gate { 1806*0Sstevel@tonic-gate x86pte_t pte; 1807*0Sstevel@tonic-gate x86pte_t *ptep; 1808*0Sstevel@tonic-gate x86pte32_t pte32, o32, n32; 1809*0Sstevel@tonic-gate x86pte32_t *pte32p; 1810*0Sstevel@tonic-gate 1811*0Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 1812*0Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 1813*0Sstevel@tonic-gate if (mmu.pae_hat) { 1814*0Sstevel@tonic-gate pte = cas64(&ptep[entry], old, new); 1815*0Sstevel@tonic-gate } else { 1816*0Sstevel@tonic-gate o32 = old; 1817*0Sstevel@tonic-gate n32 = new; 1818*0Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1819*0Sstevel@tonic-gate pte32 = cas32(&pte32p[entry], o32, n32); 1820*0Sstevel@tonic-gate pte = pte32; 1821*0Sstevel@tonic-gate } 1822*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 1823*0Sstevel@tonic-gate 1824*0Sstevel@tonic-gate return (pte); 1825*0Sstevel@tonic-gate } 1826*0Sstevel@tonic-gate 1827*0Sstevel@tonic-gate /* 1828*0Sstevel@tonic-gate * data structure for cross call information 1829*0Sstevel@tonic-gate */ 1830*0Sstevel@tonic-gate typedef struct xcall_info { 1831*0Sstevel@tonic-gate x86pte_t xi_pte; 1832*0Sstevel@tonic-gate x86pte_t xi_old; 1833*0Sstevel@tonic-gate x86pte_t *xi_pteptr; 1834*0Sstevel@tonic-gate pfn_t xi_pfn; 1835*0Sstevel@tonic-gate processorid_t xi_cpuid; 1836*0Sstevel@tonic-gate level_t xi_level; 1837*0Sstevel@tonic-gate xc_func_t xi_func; 1838*0Sstevel@tonic-gate } xcall_info_t; 1839*0Sstevel@tonic-gate 1840*0Sstevel@tonic-gate /* 1841*0Sstevel@tonic-gate * Cross call service function to atomically invalidate a PTE and flush TLBs 1842*0Sstevel@tonic-gate */ 1843*0Sstevel@tonic-gate /*ARGSUSED*/ 1844*0Sstevel@tonic-gate static int 1845*0Sstevel@tonic-gate x86pte_inval_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3) 1846*0Sstevel@tonic-gate { 1847*0Sstevel@tonic-gate xcall_info_t *xi = (xcall_info_t *)a1; 1848*0Sstevel@tonic-gate caddr_t addr = (caddr_t)a2; 1849*0Sstevel@tonic-gate 1850*0Sstevel@tonic-gate /* 1851*0Sstevel@tonic-gate * Only the initiating cpu invalidates the page table entry. 1852*0Sstevel@tonic-gate * It returns the previous PTE value to the caller. 1853*0Sstevel@tonic-gate */ 1854*0Sstevel@tonic-gate if (CPU->cpu_id == xi->xi_cpuid) { 1855*0Sstevel@tonic-gate x86pte_t *ptep = xi->xi_pteptr; 1856*0Sstevel@tonic-gate pfn_t pfn = xi->xi_pfn; 1857*0Sstevel@tonic-gate level_t level = xi->xi_level; 1858*0Sstevel@tonic-gate x86pte_t old; 1859*0Sstevel@tonic-gate x86pte_t prev; 1860*0Sstevel@tonic-gate x86pte32_t *pte32p; 1861*0Sstevel@tonic-gate x86pte32_t p32; 1862*0Sstevel@tonic-gate 1863*0Sstevel@tonic-gate if (mmu.pae_hat) { 1864*0Sstevel@tonic-gate for (;;) { 1865*0Sstevel@tonic-gate prev = *ptep; 1866*0Sstevel@tonic-gate if (PTE2PFN(prev, level) != pfn) 1867*0Sstevel@tonic-gate break; 1868*0Sstevel@tonic-gate old = cas64(ptep, prev, 0); 1869*0Sstevel@tonic-gate if (old == prev) 1870*0Sstevel@tonic-gate break; 1871*0Sstevel@tonic-gate } 1872*0Sstevel@tonic-gate } else { 1873*0Sstevel@tonic-gate pte32p = (x86pte32_t *)ptep; 1874*0Sstevel@tonic-gate for (;;) { 1875*0Sstevel@tonic-gate p32 = *pte32p; 1876*0Sstevel@tonic-gate if (PTE2PFN(p32, level) != pfn) 1877*0Sstevel@tonic-gate break; 1878*0Sstevel@tonic-gate old = cas32(pte32p, p32, 0); 1879*0Sstevel@tonic-gate if (old == p32) 1880*0Sstevel@tonic-gate break; 1881*0Sstevel@tonic-gate } 1882*0Sstevel@tonic-gate prev = p32; 1883*0Sstevel@tonic-gate } 1884*0Sstevel@tonic-gate xi->xi_pte = prev; 1885*0Sstevel@tonic-gate } 1886*0Sstevel@tonic-gate 1887*0Sstevel@tonic-gate /* 1888*0Sstevel@tonic-gate * For a normal address, we just flush one page mapping 1889*0Sstevel@tonic-gate * Otherwise reload cr3 to effect a complete TLB flush. 1890*0Sstevel@tonic-gate * 1891*0Sstevel@tonic-gate * Note we don't reload VLP pte's -- this assume we never have a 1892*0Sstevel@tonic-gate * large page size at VLP_LEVEL for VLP processes. 1893*0Sstevel@tonic-gate */ 1894*0Sstevel@tonic-gate if ((uintptr_t)addr != DEMAP_ALL_ADDR) { 1895*0Sstevel@tonic-gate mmu_tlbflush_entry(addr); 1896*0Sstevel@tonic-gate } else { 1897*0Sstevel@tonic-gate reload_cr3(); 1898*0Sstevel@tonic-gate } 1899*0Sstevel@tonic-gate return (0); 1900*0Sstevel@tonic-gate } 1901*0Sstevel@tonic-gate 1902*0Sstevel@tonic-gate /* 1903*0Sstevel@tonic-gate * Cross call service function to atomically change a PTE and flush TLBs 1904*0Sstevel@tonic-gate */ 1905*0Sstevel@tonic-gate /*ARGSUSED*/ 1906*0Sstevel@tonic-gate static int 1907*0Sstevel@tonic-gate x86pte_update_func(xc_arg_t a1, xc_arg_t a2, xc_arg_t a3) 1908*0Sstevel@tonic-gate { 1909*0Sstevel@tonic-gate xcall_info_t *xi = (xcall_info_t *)a1; 1910*0Sstevel@tonic-gate caddr_t addr = (caddr_t)a2; 1911*0Sstevel@tonic-gate 1912*0Sstevel@tonic-gate /* 1913*0Sstevel@tonic-gate * Only the initiating cpu changes the page table entry. 1914*0Sstevel@tonic-gate * It returns the previous PTE value to the caller. 1915*0Sstevel@tonic-gate */ 1916*0Sstevel@tonic-gate if (CPU->cpu_id == xi->xi_cpuid) { 1917*0Sstevel@tonic-gate x86pte_t *ptep = xi->xi_pteptr; 1918*0Sstevel@tonic-gate x86pte_t new = xi->xi_pte; 1919*0Sstevel@tonic-gate x86pte_t old = xi->xi_old; 1920*0Sstevel@tonic-gate x86pte_t prev; 1921*0Sstevel@tonic-gate 1922*0Sstevel@tonic-gate if (mmu.pae_hat) { 1923*0Sstevel@tonic-gate prev = cas64(ptep, old, new); 1924*0Sstevel@tonic-gate } else { 1925*0Sstevel@tonic-gate x86pte32_t o32 = old; 1926*0Sstevel@tonic-gate x86pte32_t n32 = new; 1927*0Sstevel@tonic-gate x86pte32_t *pte32p = (x86pte32_t *)ptep; 1928*0Sstevel@tonic-gate prev = cas32(pte32p, o32, n32); 1929*0Sstevel@tonic-gate } 1930*0Sstevel@tonic-gate 1931*0Sstevel@tonic-gate xi->xi_pte = prev; 1932*0Sstevel@tonic-gate } 1933*0Sstevel@tonic-gate 1934*0Sstevel@tonic-gate /* 1935*0Sstevel@tonic-gate * Flush the TLB entry 1936*0Sstevel@tonic-gate */ 1937*0Sstevel@tonic-gate if ((uintptr_t)addr != DEMAP_ALL_ADDR) 1938*0Sstevel@tonic-gate mmu_tlbflush_entry(addr); 1939*0Sstevel@tonic-gate else 1940*0Sstevel@tonic-gate reload_cr3(); 1941*0Sstevel@tonic-gate return (0); 1942*0Sstevel@tonic-gate } 1943*0Sstevel@tonic-gate 1944*0Sstevel@tonic-gate /* 1945*0Sstevel@tonic-gate * Use cross calls to change a page table entry and invalidate TLBs. 1946*0Sstevel@tonic-gate */ 1947*0Sstevel@tonic-gate void 1948*0Sstevel@tonic-gate x86pte_xcall(hat_t *hat, xcall_info_t *xi, uintptr_t addr) 1949*0Sstevel@tonic-gate { 1950*0Sstevel@tonic-gate cpuset_t cpus; 1951*0Sstevel@tonic-gate 1952*0Sstevel@tonic-gate /* 1953*0Sstevel@tonic-gate * Given the current implementation of hat_share(), doing a 1954*0Sstevel@tonic-gate * hat_pageunload() on a shared page table requries invalidating 1955*0Sstevel@tonic-gate * all user TLB entries on all CPUs. 1956*0Sstevel@tonic-gate */ 1957*0Sstevel@tonic-gate if (hat->hat_flags & HAT_SHARED) { 1958*0Sstevel@tonic-gate hat = kas.a_hat; 1959*0Sstevel@tonic-gate addr = DEMAP_ALL_ADDR; 1960*0Sstevel@tonic-gate } 1961*0Sstevel@tonic-gate 1962*0Sstevel@tonic-gate /* 1963*0Sstevel@tonic-gate * Use a cross call to do the invalidations. 1964*0Sstevel@tonic-gate * Note the current CPU always has to be in the cross call CPU set. 1965*0Sstevel@tonic-gate */ 1966*0Sstevel@tonic-gate kpreempt_disable(); 1967*0Sstevel@tonic-gate xi->xi_cpuid = CPU->cpu_id; 1968*0Sstevel@tonic-gate CPUSET_ZERO(cpus); 1969*0Sstevel@tonic-gate if (hat == kas.a_hat) { 1970*0Sstevel@tonic-gate CPUSET_OR(cpus, khat_cpuset); 1971*0Sstevel@tonic-gate } else { 1972*0Sstevel@tonic-gate mutex_enter(&hat->hat_switch_mutex); 1973*0Sstevel@tonic-gate CPUSET_OR(cpus, hat->hat_cpus); 1974*0Sstevel@tonic-gate CPUSET_ADD(cpus, CPU->cpu_id); 1975*0Sstevel@tonic-gate } 1976*0Sstevel@tonic-gate 1977*0Sstevel@tonic-gate /* 1978*0Sstevel@tonic-gate * Use a cross call to modify the page table entry and invalidate TLBs. 1979*0Sstevel@tonic-gate * If we're panic'ing, don't bother with the cross call. 1980*0Sstevel@tonic-gate * Note the panicstr check isn't bullet proof and the panic system 1981*0Sstevel@tonic-gate * ought to be made tighter. 1982*0Sstevel@tonic-gate */ 1983*0Sstevel@tonic-gate if (panicstr == NULL) 1984*0Sstevel@tonic-gate xc_wait_sync((xc_arg_t)xi, addr, NULL, X_CALL_HIPRI, 1985*0Sstevel@tonic-gate cpus, xi->xi_func); 1986*0Sstevel@tonic-gate else 1987*0Sstevel@tonic-gate (void) xi->xi_func((xc_arg_t)xi, (xc_arg_t)addr, NULL); 1988*0Sstevel@tonic-gate if (hat != kas.a_hat) 1989*0Sstevel@tonic-gate mutex_exit(&hat->hat_switch_mutex); 1990*0Sstevel@tonic-gate kpreempt_enable(); 1991*0Sstevel@tonic-gate } 1992*0Sstevel@tonic-gate 1993*0Sstevel@tonic-gate /* 1994*0Sstevel@tonic-gate * Invalidate a page table entry if it currently maps the given pfn. 1995*0Sstevel@tonic-gate * This returns the previous value of the PTE. 1996*0Sstevel@tonic-gate */ 1997*0Sstevel@tonic-gate x86pte_t 1998*0Sstevel@tonic-gate x86pte_invalidate_pfn(htable_t *ht, uint_t entry, pfn_t pfn, void *pte_ptr) 1999*0Sstevel@tonic-gate { 2000*0Sstevel@tonic-gate xcall_info_t xi; 2001*0Sstevel@tonic-gate x86pte_t *ptep; 2002*0Sstevel@tonic-gate hat_t *hat; 2003*0Sstevel@tonic-gate uintptr_t addr; 2004*0Sstevel@tonic-gate 2005*0Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 2006*0Sstevel@tonic-gate if (pte_ptr != NULL) { 2007*0Sstevel@tonic-gate ptep = pte_ptr; 2008*0Sstevel@tonic-gate } else { 2009*0Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 2010*0Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 2011*0Sstevel@tonic-gate } 2012*0Sstevel@tonic-gate 2013*0Sstevel@tonic-gate /* 2014*0Sstevel@tonic-gate * Fill in the structure used by the cross call function to do the 2015*0Sstevel@tonic-gate * invalidation. 2016*0Sstevel@tonic-gate */ 2017*0Sstevel@tonic-gate xi.xi_pte = 0; 2018*0Sstevel@tonic-gate xi.xi_pteptr = ptep; 2019*0Sstevel@tonic-gate xi.xi_pfn = pfn; 2020*0Sstevel@tonic-gate xi.xi_level = ht->ht_level; 2021*0Sstevel@tonic-gate xi.xi_func = x86pte_inval_func; 2022*0Sstevel@tonic-gate ASSERT(xi.xi_level != VLP_LEVEL); 2023*0Sstevel@tonic-gate 2024*0Sstevel@tonic-gate hat = ht->ht_hat; 2025*0Sstevel@tonic-gate addr = htable_e2va(ht, entry); 2026*0Sstevel@tonic-gate 2027*0Sstevel@tonic-gate x86pte_xcall(hat, &xi, addr); 2028*0Sstevel@tonic-gate 2029*0Sstevel@tonic-gate if (pte_ptr == NULL) 2030*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 2031*0Sstevel@tonic-gate return (xi.xi_pte); 2032*0Sstevel@tonic-gate } 2033*0Sstevel@tonic-gate 2034*0Sstevel@tonic-gate /* 2035*0Sstevel@tonic-gate * update a PTE and invalidate any stale TLB entries. 2036*0Sstevel@tonic-gate */ 2037*0Sstevel@tonic-gate x86pte_t 2038*0Sstevel@tonic-gate x86pte_update(htable_t *ht, uint_t entry, x86pte_t expected, x86pte_t new) 2039*0Sstevel@tonic-gate { 2040*0Sstevel@tonic-gate xcall_info_t xi; 2041*0Sstevel@tonic-gate x86pte_t *ptep; 2042*0Sstevel@tonic-gate hat_t *hat; 2043*0Sstevel@tonic-gate uintptr_t addr; 2044*0Sstevel@tonic-gate 2045*0Sstevel@tonic-gate ASSERT(!(ht->ht_flags & HTABLE_SHARED_PFN)); 2046*0Sstevel@tonic-gate ptep = x86pte_access_pagetable(ht); 2047*0Sstevel@tonic-gate ptep = (void *)((caddr_t)ptep + (entry << mmu.pte_size_shift)); 2048*0Sstevel@tonic-gate 2049*0Sstevel@tonic-gate /* 2050*0Sstevel@tonic-gate * Fill in the structure used by the cross call function to do the 2051*0Sstevel@tonic-gate * invalidation. 2052*0Sstevel@tonic-gate */ 2053*0Sstevel@tonic-gate xi.xi_pte = new; 2054*0Sstevel@tonic-gate xi.xi_old = expected; 2055*0Sstevel@tonic-gate xi.xi_pteptr = ptep; 2056*0Sstevel@tonic-gate xi.xi_func = x86pte_update_func; 2057*0Sstevel@tonic-gate 2058*0Sstevel@tonic-gate hat = ht->ht_hat; 2059*0Sstevel@tonic-gate addr = htable_e2va(ht, entry); 2060*0Sstevel@tonic-gate 2061*0Sstevel@tonic-gate x86pte_xcall(hat, &xi, addr); 2062*0Sstevel@tonic-gate 2063*0Sstevel@tonic-gate x86pte_release_pagetable(ht); 2064*0Sstevel@tonic-gate return (xi.xi_pte); 2065*0Sstevel@tonic-gate } 2066*0Sstevel@tonic-gate 2067*0Sstevel@tonic-gate /* 2068*0Sstevel@tonic-gate * Copy page tables - this is just a little more complicated than the 2069*0Sstevel@tonic-gate * previous routines. Note that it's also not atomic! It also is never 2070*0Sstevel@tonic-gate * used for VLP pagetables. 2071*0Sstevel@tonic-gate */ 2072*0Sstevel@tonic-gate void 2073*0Sstevel@tonic-gate x86pte_copy(htable_t *src, htable_t *dest, uint_t entry, uint_t count) 2074*0Sstevel@tonic-gate { 2075*0Sstevel@tonic-gate struct hat_cpu_info *hci; 2076*0Sstevel@tonic-gate caddr_t src_va; 2077*0Sstevel@tonic-gate caddr_t dst_va; 2078*0Sstevel@tonic-gate size_t size; 2079*0Sstevel@tonic-gate 2080*0Sstevel@tonic-gate ASSERT(khat_running); 2081*0Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_VLP)); 2082*0Sstevel@tonic-gate ASSERT(!(src->ht_flags & HTABLE_VLP)); 2083*0Sstevel@tonic-gate ASSERT(!(src->ht_flags & HTABLE_SHARED_PFN)); 2084*0Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN)); 2085*0Sstevel@tonic-gate 2086*0Sstevel@tonic-gate /* 2087*0Sstevel@tonic-gate * Acquire access to the CPU pagetable window for the destination. 2088*0Sstevel@tonic-gate */ 2089*0Sstevel@tonic-gate dst_va = (caddr_t)x86pte_access_pagetable(dest); 2090*0Sstevel@tonic-gate if (kpm_enable) { 2091*0Sstevel@tonic-gate src_va = (caddr_t)x86pte_access_pagetable(src); 2092*0Sstevel@tonic-gate } else { 2093*0Sstevel@tonic-gate hci = CPU->cpu_hat_info; 2094*0Sstevel@tonic-gate 2095*0Sstevel@tonic-gate /* 2096*0Sstevel@tonic-gate * Finish defining the src pagetable mapping 2097*0Sstevel@tonic-gate */ 2098*0Sstevel@tonic-gate src_va = dst_va + MMU_PAGESIZE; 2099*0Sstevel@tonic-gate X86PTE_REMAP(src_va, hci->hci_kernel_pte, 1, 0, src->ht_pfn); 2100*0Sstevel@tonic-gate } 2101*0Sstevel@tonic-gate 2102*0Sstevel@tonic-gate /* 2103*0Sstevel@tonic-gate * now do the copy 2104*0Sstevel@tonic-gate */ 2105*0Sstevel@tonic-gate 2106*0Sstevel@tonic-gate dst_va += entry << mmu.pte_size_shift; 2107*0Sstevel@tonic-gate src_va += entry << mmu.pte_size_shift; 2108*0Sstevel@tonic-gate size = count << mmu.pte_size_shift; 2109*0Sstevel@tonic-gate bcopy(src_va, dst_va, size); 2110*0Sstevel@tonic-gate 2111*0Sstevel@tonic-gate x86pte_release_pagetable(dest); 2112*0Sstevel@tonic-gate } 2113*0Sstevel@tonic-gate 2114*0Sstevel@tonic-gate /* 2115*0Sstevel@tonic-gate * Zero page table entries - Note this doesn't use atomic stores! 2116*0Sstevel@tonic-gate */ 2117*0Sstevel@tonic-gate void 2118*0Sstevel@tonic-gate x86pte_zero(htable_t *dest, uint_t entry, uint_t count) 2119*0Sstevel@tonic-gate { 2120*0Sstevel@tonic-gate caddr_t dst_va; 2121*0Sstevel@tonic-gate x86pte_t *p; 2122*0Sstevel@tonic-gate x86pte32_t *p32; 2123*0Sstevel@tonic-gate size_t size; 2124*0Sstevel@tonic-gate extern void hat_pte_zero(void *, size_t); 2125*0Sstevel@tonic-gate 2126*0Sstevel@tonic-gate /* 2127*0Sstevel@tonic-gate * Map in the page table to be zeroed. 2128*0Sstevel@tonic-gate */ 2129*0Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_SHARED_PFN)); 2130*0Sstevel@tonic-gate ASSERT(!(dest->ht_flags & HTABLE_VLP)); 2131*0Sstevel@tonic-gate dst_va = (caddr_t)x86pte_access_pagetable(dest); 2132*0Sstevel@tonic-gate dst_va += entry << mmu.pte_size_shift; 2133*0Sstevel@tonic-gate size = count << mmu.pte_size_shift; 2134*0Sstevel@tonic-gate if (x86_feature & X86_SSE2) { 2135*0Sstevel@tonic-gate hat_pte_zero(dst_va, size); 2136*0Sstevel@tonic-gate } else if (khat_running) { 2137*0Sstevel@tonic-gate bzero(dst_va, size); 2138*0Sstevel@tonic-gate } else { 2139*0Sstevel@tonic-gate /* 2140*0Sstevel@tonic-gate * Can't just use bzero during boot because it checks the 2141*0Sstevel@tonic-gate * address against kernelbase. Instead just use a zero loop. 2142*0Sstevel@tonic-gate */ 2143*0Sstevel@tonic-gate if (mmu.pae_hat) { 2144*0Sstevel@tonic-gate p = (x86pte_t *)dst_va; 2145*0Sstevel@tonic-gate while (count-- > 0) 2146*0Sstevel@tonic-gate *p++ = 0; 2147*0Sstevel@tonic-gate } else { 2148*0Sstevel@tonic-gate p32 = (x86pte32_t *)dst_va; 2149*0Sstevel@tonic-gate while (count-- > 0) 2150*0Sstevel@tonic-gate *p32++ = 0; 2151*0Sstevel@tonic-gate } 2152*0Sstevel@tonic-gate } 2153*0Sstevel@tonic-gate x86pte_release_pagetable(dest); 2154*0Sstevel@tonic-gate } 2155*0Sstevel@tonic-gate 2156*0Sstevel@tonic-gate /* 2157*0Sstevel@tonic-gate * Called to ensure that all pagetables are in the system dump 2158*0Sstevel@tonic-gate */ 2159*0Sstevel@tonic-gate void 2160*0Sstevel@tonic-gate hat_dump(void) 2161*0Sstevel@tonic-gate { 2162*0Sstevel@tonic-gate hat_t *hat; 2163*0Sstevel@tonic-gate uint_t h; 2164*0Sstevel@tonic-gate htable_t *ht; 2165*0Sstevel@tonic-gate int count; 2166*0Sstevel@tonic-gate 2167*0Sstevel@tonic-gate /* 2168*0Sstevel@tonic-gate * kas.a_hat is the head of the circular list, but not an element of 2169*0Sstevel@tonic-gate * the list. Once we pass kas.a_hat->hat_next a second time, we 2170*0Sstevel@tonic-gate * know we've iterated through every hat structure. 2171*0Sstevel@tonic-gate */ 2172*0Sstevel@tonic-gate for (hat = kas.a_hat, count = 0; hat != kas.a_hat->hat_next || 2173*0Sstevel@tonic-gate count++ == 0; hat = hat->hat_next) { 2174*0Sstevel@tonic-gate for (h = 0; h < hat->hat_num_hash; ++h) { 2175*0Sstevel@tonic-gate for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) { 2176*0Sstevel@tonic-gate if ((ht->ht_flags & HTABLE_VLP) == 0) { 2177*0Sstevel@tonic-gate dump_page(ht->ht_pfn); 2178*0Sstevel@tonic-gate } 2179*0Sstevel@tonic-gate } 2180*0Sstevel@tonic-gate } 2181*0Sstevel@tonic-gate } 2182*0Sstevel@tonic-gate } 2183