10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 53308Ssudheer * Common Development and Distribution License (the "License"). 63308Ssudheer * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 210Sstevel@tonic-gate /* 22*3543Sjosephb * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 230Sstevel@tonic-gate * Use is subject to license terms. 240Sstevel@tonic-gate */ 250Sstevel@tonic-gate 260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 270Sstevel@tonic-gate 280Sstevel@tonic-gate #include <sys/types.h> 290Sstevel@tonic-gate #include <sys/sysmacros.h> 300Sstevel@tonic-gate #include <sys/kmem.h> 310Sstevel@tonic-gate #include <sys/atomic.h> 320Sstevel@tonic-gate #include <sys/bitmap.h> 330Sstevel@tonic-gate #include <sys/systm.h> 340Sstevel@tonic-gate #include <vm/seg_kmem.h> 350Sstevel@tonic-gate #include <vm/hat.h> 360Sstevel@tonic-gate #include <vm/vm_dep.h> 370Sstevel@tonic-gate #include <vm/hat_i86.h> 380Sstevel@tonic-gate #include <sys/cmn_err.h> 390Sstevel@tonic-gate 400Sstevel@tonic-gate 410Sstevel@tonic-gate /* 420Sstevel@tonic-gate * When pages are shared by more than one mapping, a list of these 430Sstevel@tonic-gate * structs hangs off of the page_t connected by the hm_next and hm_prev 440Sstevel@tonic-gate * fields. Every hment is also indexed by a system-wide hash table, using 450Sstevel@tonic-gate * hm_hashnext to connect it to the chain of hments in a single hash 460Sstevel@tonic-gate * bucket. 470Sstevel@tonic-gate */ 480Sstevel@tonic-gate struct hment { 490Sstevel@tonic-gate struct hment *hm_hashnext; /* next mapping on hash chain */ 500Sstevel@tonic-gate struct hment *hm_next; /* next mapping of same page */ 510Sstevel@tonic-gate struct hment *hm_prev; /* previous mapping of same page */ 520Sstevel@tonic-gate htable_t *hm_htable; /* corresponding htable_t */ 533308Ssudheer pfn_t hm_pfn; /* mapping page frame number */ 540Sstevel@tonic-gate uint16_t hm_entry; /* index of pte in htable */ 550Sstevel@tonic-gate uint16_t hm_pad; /* explicitly expose compiler padding */ 560Sstevel@tonic-gate #ifdef __amd64 570Sstevel@tonic-gate uint32_t hm_pad2; /* explicitly expose compiler padding */ 580Sstevel@tonic-gate #endif 590Sstevel@tonic-gate }; 600Sstevel@tonic-gate 610Sstevel@tonic-gate /* 620Sstevel@tonic-gate * Value returned by hment_walk() when dealing with a single mapping 630Sstevel@tonic-gate * embedded in the page_t. 640Sstevel@tonic-gate */ 650Sstevel@tonic-gate #define HMENT_EMBEDDED ((hment_t *)(uintptr_t)1) 660Sstevel@tonic-gate 670Sstevel@tonic-gate kmem_cache_t *hment_cache; 680Sstevel@tonic-gate 690Sstevel@tonic-gate /* 700Sstevel@tonic-gate * The hment reserve is similar to the htable reserve, with the following 710Sstevel@tonic-gate * exception. Hment's are never needed for HAT kmem allocs. 720Sstevel@tonic-gate * 730Sstevel@tonic-gate * The hment_reserve_amount variable is used, so that you can change it's 740Sstevel@tonic-gate * value to zero via a kernel debugger to force stealing to get tested. 750Sstevel@tonic-gate */ 760Sstevel@tonic-gate #define HMENT_RESERVE_AMOUNT (200) /* currently a guess at right value. */ 770Sstevel@tonic-gate uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT; 780Sstevel@tonic-gate kmutex_t hment_reserve_mutex; 790Sstevel@tonic-gate uint_t hment_reserve_count; 800Sstevel@tonic-gate hment_t *hment_reserve_pool; 810Sstevel@tonic-gate extern kthread_t *hat_reserves_thread; 820Sstevel@tonic-gate 830Sstevel@tonic-gate /* 840Sstevel@tonic-gate * Possible performance RFE: we might need to make this dynamic, perhaps 850Sstevel@tonic-gate * based on the number of pages in the system. 860Sstevel@tonic-gate */ 870Sstevel@tonic-gate #define HMENT_HASH_SIZE (64 * 1024) 880Sstevel@tonic-gate static uint_t hment_hash_entries = HMENT_HASH_SIZE; 890Sstevel@tonic-gate static hment_t **hment_hash; 900Sstevel@tonic-gate 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * Lots of highly shared pages will have the same value for "entry" (consider 930Sstevel@tonic-gate * the starting address of "xterm" or "sh"). So we'll distinguish them by 940Sstevel@tonic-gate * adding the pfn of the page table into both the high bits. 950Sstevel@tonic-gate * The shift by 9 corresponds to the range of values for entry (0..511). 960Sstevel@tonic-gate */ 970Sstevel@tonic-gate #define HMENT_HASH(pfn, entry) (uint32_t) \ 980Sstevel@tonic-gate ((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1)) 990Sstevel@tonic-gate 1000Sstevel@tonic-gate /* 1010Sstevel@tonic-gate * "mlist_lock" is a hashed mutex lock for protecting per-page mapping 1020Sstevel@tonic-gate * lists and "hash_lock" is a similar lock protecting the hment hash 1030Sstevel@tonic-gate * table. The hashed approach is taken to avoid the spatial overhead of 1040Sstevel@tonic-gate * maintaining a separate lock for each page, while still achieving better 1050Sstevel@tonic-gate * scalability than a single lock would allow. 1060Sstevel@tonic-gate */ 1070Sstevel@tonic-gate #define MLIST_NUM_LOCK 256 /* must be power of two */ 1080Sstevel@tonic-gate static kmutex_t mlist_lock[MLIST_NUM_LOCK]; 1090Sstevel@tonic-gate 1100Sstevel@tonic-gate /* 1110Sstevel@tonic-gate * the shift by 9 is so that all large pages don't use the same hash bucket 1120Sstevel@tonic-gate */ 1130Sstevel@tonic-gate #define MLIST_MUTEX(pp) \ 1140Sstevel@tonic-gate &mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \ 1150Sstevel@tonic-gate (MLIST_NUM_LOCK - 1)] 1160Sstevel@tonic-gate 1170Sstevel@tonic-gate #define HASH_NUM_LOCK 256 /* must be power of two */ 1180Sstevel@tonic-gate static kmutex_t hash_lock[HASH_NUM_LOCK]; 1190Sstevel@tonic-gate 1200Sstevel@tonic-gate #define HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)] 1210Sstevel@tonic-gate 1220Sstevel@tonic-gate static hment_t *hment_steal(void); 1230Sstevel@tonic-gate 1240Sstevel@tonic-gate /* 1250Sstevel@tonic-gate * put one hment onto the reserves list 1260Sstevel@tonic-gate */ 1270Sstevel@tonic-gate static void 1280Sstevel@tonic-gate hment_put_reserve(hment_t *hm) 1290Sstevel@tonic-gate { 1300Sstevel@tonic-gate HATSTAT_INC(hs_hm_put_reserve); 1310Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex); 1320Sstevel@tonic-gate hm->hm_next = hment_reserve_pool; 1330Sstevel@tonic-gate hment_reserve_pool = hm; 1340Sstevel@tonic-gate ++hment_reserve_count; 1350Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex); 1360Sstevel@tonic-gate } 1370Sstevel@tonic-gate 1380Sstevel@tonic-gate /* 1390Sstevel@tonic-gate * Take one hment from the reserve. 1400Sstevel@tonic-gate */ 1410Sstevel@tonic-gate static hment_t * 1420Sstevel@tonic-gate hment_get_reserve(void) 1430Sstevel@tonic-gate { 1440Sstevel@tonic-gate hment_t *hm = NULL; 1450Sstevel@tonic-gate 1460Sstevel@tonic-gate /* 1470Sstevel@tonic-gate * We rely on a "donation system" to refill the hment reserve 1480Sstevel@tonic-gate * list, which only takes place when we are allocating hments for 1490Sstevel@tonic-gate * user mappings. It is theoretically possible that an incredibly 1500Sstevel@tonic-gate * long string of kernel hment_alloc()s with no intervening user 1510Sstevel@tonic-gate * hment_alloc()s could exhaust that pool. 1520Sstevel@tonic-gate */ 1530Sstevel@tonic-gate HATSTAT_INC(hs_hm_get_reserve); 1540Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex); 1550Sstevel@tonic-gate if (hment_reserve_count != 0) { 1560Sstevel@tonic-gate hm = hment_reserve_pool; 1570Sstevel@tonic-gate hment_reserve_pool = hm->hm_next; 1580Sstevel@tonic-gate --hment_reserve_count; 1590Sstevel@tonic-gate } 1600Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex); 1610Sstevel@tonic-gate return (hm); 1620Sstevel@tonic-gate } 1630Sstevel@tonic-gate 1640Sstevel@tonic-gate /* 1650Sstevel@tonic-gate * Allocate an hment 1660Sstevel@tonic-gate */ 1670Sstevel@tonic-gate static hment_t * 1680Sstevel@tonic-gate hment_alloc() 1690Sstevel@tonic-gate { 1700Sstevel@tonic-gate int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP; 1710Sstevel@tonic-gate hment_t *hm = NULL; 1720Sstevel@tonic-gate 1730Sstevel@tonic-gate /* 1740Sstevel@tonic-gate * If we aren't using the reserves, try using kmem to get an hment. 1750Sstevel@tonic-gate * Donate any successful allocations to reserves if low. 1760Sstevel@tonic-gate * 1770Sstevel@tonic-gate * If we're in panic, resort to using the reserves. 1780Sstevel@tonic-gate */ 1790Sstevel@tonic-gate HATSTAT_INC(hs_hm_alloc); 180*3543Sjosephb if (!USE_HAT_RESERVES()) { 1810Sstevel@tonic-gate for (;;) { 1820Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, km_flag); 183*3543Sjosephb if (USE_HAT_RESERVES() || 184*3543Sjosephb hment_reserve_count >= hment_reserve_amount) 1850Sstevel@tonic-gate break; 1860Sstevel@tonic-gate hment_put_reserve(hm); 1870Sstevel@tonic-gate } 1880Sstevel@tonic-gate } 1890Sstevel@tonic-gate 1900Sstevel@tonic-gate /* 1910Sstevel@tonic-gate * If allocation failed, we need to tap the reserves or steal 1920Sstevel@tonic-gate */ 1930Sstevel@tonic-gate if (hm == NULL) { 194*3543Sjosephb if (USE_HAT_RESERVES()) 1950Sstevel@tonic-gate hm = hment_get_reserve(); 1960Sstevel@tonic-gate 1970Sstevel@tonic-gate /* 1980Sstevel@tonic-gate * If we still haven't gotten an hment, attempt to steal one by 1990Sstevel@tonic-gate * victimizing a mapping in a user htable. 2000Sstevel@tonic-gate */ 2010Sstevel@tonic-gate if (hm == NULL && can_steal_post_boot) 2020Sstevel@tonic-gate hm = hment_steal(); 2030Sstevel@tonic-gate 2040Sstevel@tonic-gate /* 2050Sstevel@tonic-gate * we're in dire straights, try the reserve 2060Sstevel@tonic-gate */ 2070Sstevel@tonic-gate if (hm == NULL) 2080Sstevel@tonic-gate hm = hment_get_reserve(); 2090Sstevel@tonic-gate 2100Sstevel@tonic-gate /* 2110Sstevel@tonic-gate * still no hment is a serious problem. 2120Sstevel@tonic-gate */ 2130Sstevel@tonic-gate if (hm == NULL) 2140Sstevel@tonic-gate panic("hment_alloc(): no reserve, couldn't steal"); 2150Sstevel@tonic-gate } 2160Sstevel@tonic-gate 2170Sstevel@tonic-gate 2180Sstevel@tonic-gate hm->hm_entry = 0; 2190Sstevel@tonic-gate hm->hm_htable = NULL; 2200Sstevel@tonic-gate hm->hm_hashnext = NULL; 2210Sstevel@tonic-gate hm->hm_next = NULL; 2220Sstevel@tonic-gate hm->hm_prev = NULL; 2233308Ssudheer hm->hm_pfn = PFN_INVALID; 2240Sstevel@tonic-gate return (hm); 2250Sstevel@tonic-gate } 2260Sstevel@tonic-gate 2270Sstevel@tonic-gate /* 2280Sstevel@tonic-gate * Free an hment, possibly to the reserves list when called from the 2290Sstevel@tonic-gate * thread using the reserves. For example, when freeing an hment during an 2300Sstevel@tonic-gate * htable_steal(), we can't recurse into the kmem allocator, so we just 2310Sstevel@tonic-gate * push the hment onto the reserve list. 2320Sstevel@tonic-gate */ 2330Sstevel@tonic-gate void 2340Sstevel@tonic-gate hment_free(hment_t *hm) 2350Sstevel@tonic-gate { 2360Sstevel@tonic-gate #ifdef DEBUG 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * zero out all fields to try and force any race conditions to segfault 2390Sstevel@tonic-gate */ 2400Sstevel@tonic-gate bzero(hm, sizeof (*hm)); 2410Sstevel@tonic-gate #endif 2420Sstevel@tonic-gate HATSTAT_INC(hs_hm_free); 243*3543Sjosephb if (USE_HAT_RESERVES() || 2440Sstevel@tonic-gate hment_reserve_count < hment_reserve_amount) 2450Sstevel@tonic-gate hment_put_reserve(hm); 2460Sstevel@tonic-gate else 2470Sstevel@tonic-gate kmem_cache_free(hment_cache, hm); 2480Sstevel@tonic-gate } 2490Sstevel@tonic-gate 2500Sstevel@tonic-gate int 2510Sstevel@tonic-gate x86_hm_held(page_t *pp) 2520Sstevel@tonic-gate { 2530Sstevel@tonic-gate ASSERT(pp != NULL); 2540Sstevel@tonic-gate return (MUTEX_HELD(MLIST_MUTEX(pp))); 2550Sstevel@tonic-gate } 2560Sstevel@tonic-gate 2570Sstevel@tonic-gate void 2580Sstevel@tonic-gate x86_hm_enter(page_t *pp) 2590Sstevel@tonic-gate { 2600Sstevel@tonic-gate ASSERT(pp != NULL); 2610Sstevel@tonic-gate mutex_enter(MLIST_MUTEX(pp)); 2620Sstevel@tonic-gate } 2630Sstevel@tonic-gate 2640Sstevel@tonic-gate void 2650Sstevel@tonic-gate x86_hm_exit(page_t *pp) 2660Sstevel@tonic-gate { 2670Sstevel@tonic-gate ASSERT(pp != NULL); 2680Sstevel@tonic-gate mutex_exit(MLIST_MUTEX(pp)); 2690Sstevel@tonic-gate } 2700Sstevel@tonic-gate 2710Sstevel@tonic-gate /* 2720Sstevel@tonic-gate * Internal routine to add a full hment to a page_t mapping list 2730Sstevel@tonic-gate */ 2740Sstevel@tonic-gate static void 2750Sstevel@tonic-gate hment_insert(hment_t *hm, page_t *pp) 2760Sstevel@tonic-gate { 2770Sstevel@tonic-gate uint_t idx; 2780Sstevel@tonic-gate 2790Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 2800Sstevel@tonic-gate ASSERT(!pp->p_embed); 2810Sstevel@tonic-gate 2820Sstevel@tonic-gate /* 2830Sstevel@tonic-gate * Add the hment to the page's mapping list. 2840Sstevel@tonic-gate */ 2850Sstevel@tonic-gate ++pp->p_share; 2860Sstevel@tonic-gate hm->hm_next = pp->p_mapping; 2870Sstevel@tonic-gate if (pp->p_mapping != NULL) 2880Sstevel@tonic-gate ((hment_t *)pp->p_mapping)->hm_prev = hm; 2890Sstevel@tonic-gate pp->p_mapping = hm; 2900Sstevel@tonic-gate 2910Sstevel@tonic-gate /* 2920Sstevel@tonic-gate * Add the hment to the system-wide hash table. 2930Sstevel@tonic-gate */ 2940Sstevel@tonic-gate idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry); 2950Sstevel@tonic-gate 2960Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx)); 2970Sstevel@tonic-gate hm->hm_hashnext = hment_hash[idx]; 2980Sstevel@tonic-gate hment_hash[idx] = hm; 2990Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx)); 3000Sstevel@tonic-gate } 3010Sstevel@tonic-gate 3020Sstevel@tonic-gate /* 3030Sstevel@tonic-gate * Prepare a mapping list entry to the given page. 3040Sstevel@tonic-gate * 3050Sstevel@tonic-gate * There are 4 different situations to deal with: 3060Sstevel@tonic-gate * 3070Sstevel@tonic-gate * - Adding the first mapping to a page_t as an embedded hment 3080Sstevel@tonic-gate * - Refaulting on an existing embedded mapping 3090Sstevel@tonic-gate * - Upgrading an embedded mapping when adding a 2nd mapping 3100Sstevel@tonic-gate * - Adding another mapping to a page_t that already has multiple mappings 3110Sstevel@tonic-gate * note we don't optimized for the refaulting case here. 3120Sstevel@tonic-gate * 3130Sstevel@tonic-gate * Due to competition with other threads that may be mapping/unmapping the 3140Sstevel@tonic-gate * same page and the need to drop all locks while allocating hments, any or 3150Sstevel@tonic-gate * all of the 3 situations can occur (and in almost any order) in any given 3160Sstevel@tonic-gate * call. Isn't this fun! 3170Sstevel@tonic-gate */ 3180Sstevel@tonic-gate hment_t * 3190Sstevel@tonic-gate hment_prepare(htable_t *htable, uint_t entry, page_t *pp) 3200Sstevel@tonic-gate { 3210Sstevel@tonic-gate hment_t *hm = NULL; 3220Sstevel@tonic-gate 3230Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 3240Sstevel@tonic-gate 3250Sstevel@tonic-gate for (;;) { 3260Sstevel@tonic-gate 3270Sstevel@tonic-gate /* 3280Sstevel@tonic-gate * The most common case is establishing the first mapping to a 3290Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated 3300Sstevel@tonic-gate * hment. 3310Sstevel@tonic-gate */ 3320Sstevel@tonic-gate if (pp->p_mapping == NULL) { 3330Sstevel@tonic-gate ASSERT(!pp->p_embed); 3340Sstevel@tonic-gate ASSERT(pp->p_share == 0); 3350Sstevel@tonic-gate if (hm == NULL) 3360Sstevel@tonic-gate break; 3370Sstevel@tonic-gate 3380Sstevel@tonic-gate /* 3390Sstevel@tonic-gate * we had an hment already, so free it and retry 3400Sstevel@tonic-gate */ 3410Sstevel@tonic-gate goto free_and_continue; 3420Sstevel@tonic-gate } 3430Sstevel@tonic-gate 3440Sstevel@tonic-gate /* 3450Sstevel@tonic-gate * If there is an embedded mapping, we may need to 3460Sstevel@tonic-gate * convert it to an hment. 3470Sstevel@tonic-gate */ 3480Sstevel@tonic-gate if (pp->p_embed) { 3490Sstevel@tonic-gate 3500Sstevel@tonic-gate /* should point to htable */ 3510Sstevel@tonic-gate ASSERT(pp->p_mapping != NULL); 3520Sstevel@tonic-gate 3530Sstevel@tonic-gate /* 3540Sstevel@tonic-gate * If we are faulting on a pre-existing mapping 3550Sstevel@tonic-gate * there is no need to promote/allocate a new hment. 3560Sstevel@tonic-gate * This happens a lot due to segmap. 3570Sstevel@tonic-gate */ 3580Sstevel@tonic-gate if (pp->p_mapping == htable && pp->p_mlentry == entry) { 3590Sstevel@tonic-gate if (hm == NULL) 3600Sstevel@tonic-gate break; 3610Sstevel@tonic-gate goto free_and_continue; 3620Sstevel@tonic-gate } 3630Sstevel@tonic-gate 3640Sstevel@tonic-gate /* 3650Sstevel@tonic-gate * If we have an hment allocated, use it to promote the 3660Sstevel@tonic-gate * existing embedded mapping. 3670Sstevel@tonic-gate */ 3680Sstevel@tonic-gate if (hm != NULL) { 3690Sstevel@tonic-gate hm->hm_htable = pp->p_mapping; 3700Sstevel@tonic-gate hm->hm_entry = pp->p_mlentry; 3713308Ssudheer hm->hm_pfn = pp->p_pagenum; 3720Sstevel@tonic-gate pp->p_mapping = NULL; 3730Sstevel@tonic-gate pp->p_share = 0; 3740Sstevel@tonic-gate pp->p_embed = 0; 3750Sstevel@tonic-gate hment_insert(hm, pp); 3760Sstevel@tonic-gate } 3770Sstevel@tonic-gate 3780Sstevel@tonic-gate /* 3790Sstevel@tonic-gate * We either didn't have an hment allocated or we just 3800Sstevel@tonic-gate * used it for the embedded mapping. In either case, 3810Sstevel@tonic-gate * allocate another hment and restart. 3820Sstevel@tonic-gate */ 3830Sstevel@tonic-gate goto allocate_and_continue; 3840Sstevel@tonic-gate } 3850Sstevel@tonic-gate 3860Sstevel@tonic-gate /* 3870Sstevel@tonic-gate * Last possibility is that we're adding an hment to a list 3880Sstevel@tonic-gate * of hments. 3890Sstevel@tonic-gate */ 3900Sstevel@tonic-gate if (hm != NULL) 3910Sstevel@tonic-gate break; 3920Sstevel@tonic-gate allocate_and_continue: 3930Sstevel@tonic-gate x86_hm_exit(pp); 3940Sstevel@tonic-gate hm = hment_alloc(); 3950Sstevel@tonic-gate x86_hm_enter(pp); 3960Sstevel@tonic-gate continue; 3970Sstevel@tonic-gate 3980Sstevel@tonic-gate free_and_continue: 3990Sstevel@tonic-gate /* 4000Sstevel@tonic-gate * we allocated an hment already, free it and retry 4010Sstevel@tonic-gate */ 4020Sstevel@tonic-gate x86_hm_exit(pp); 4030Sstevel@tonic-gate hment_free(hm); 4040Sstevel@tonic-gate hm = NULL; 4050Sstevel@tonic-gate x86_hm_enter(pp); 4060Sstevel@tonic-gate } 4070Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 4080Sstevel@tonic-gate return (hm); 4090Sstevel@tonic-gate } 4100Sstevel@tonic-gate 4110Sstevel@tonic-gate /* 4120Sstevel@tonic-gate * Record a mapping list entry for the htable/entry to the given page. 4130Sstevel@tonic-gate * 4140Sstevel@tonic-gate * hment_prepare() should have properly set up the situation. 4150Sstevel@tonic-gate */ 4160Sstevel@tonic-gate void 4170Sstevel@tonic-gate hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm) 4180Sstevel@tonic-gate { 4190Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 4200Sstevel@tonic-gate 4210Sstevel@tonic-gate /* 4220Sstevel@tonic-gate * The most common case is establishing the first mapping to a 4230Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated 4240Sstevel@tonic-gate * hment. 4250Sstevel@tonic-gate */ 4260Sstevel@tonic-gate if (pp->p_mapping == NULL) { 4270Sstevel@tonic-gate ASSERT(hm == NULL); 4280Sstevel@tonic-gate ASSERT(!pp->p_embed); 4290Sstevel@tonic-gate ASSERT(pp->p_share == 0); 4300Sstevel@tonic-gate pp->p_embed = 1; 4310Sstevel@tonic-gate pp->p_mapping = htable; 4320Sstevel@tonic-gate pp->p_mlentry = entry; 4330Sstevel@tonic-gate return; 4340Sstevel@tonic-gate } 4350Sstevel@tonic-gate 4360Sstevel@tonic-gate /* 4370Sstevel@tonic-gate * We should never get here with a pre-existing embedded maping 4380Sstevel@tonic-gate */ 4390Sstevel@tonic-gate ASSERT(!pp->p_embed); 4400Sstevel@tonic-gate 4410Sstevel@tonic-gate /* 4420Sstevel@tonic-gate * add the new hment to the mapping list 4430Sstevel@tonic-gate */ 4440Sstevel@tonic-gate ASSERT(hm != NULL); 4450Sstevel@tonic-gate hm->hm_htable = htable; 4460Sstevel@tonic-gate hm->hm_entry = entry; 4473308Ssudheer hm->hm_pfn = pp->p_pagenum; 4480Sstevel@tonic-gate hment_insert(hm, pp); 4490Sstevel@tonic-gate } 4500Sstevel@tonic-gate 4510Sstevel@tonic-gate /* 4520Sstevel@tonic-gate * Walk through the mappings for a page. 4530Sstevel@tonic-gate * 4540Sstevel@tonic-gate * must already have done an x86_hm_enter() 4550Sstevel@tonic-gate */ 4560Sstevel@tonic-gate hment_t * 4570Sstevel@tonic-gate hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev) 4580Sstevel@tonic-gate { 4590Sstevel@tonic-gate hment_t *hm; 4600Sstevel@tonic-gate 4610Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 4620Sstevel@tonic-gate 4630Sstevel@tonic-gate if (pp->p_embed) { 4640Sstevel@tonic-gate if (prev == NULL) { 4650Sstevel@tonic-gate *ht = (htable_t *)pp->p_mapping; 4660Sstevel@tonic-gate *entry = pp->p_mlentry; 4670Sstevel@tonic-gate hm = HMENT_EMBEDDED; 4680Sstevel@tonic-gate } else { 4690Sstevel@tonic-gate ASSERT(prev == HMENT_EMBEDDED); 4700Sstevel@tonic-gate hm = NULL; 4710Sstevel@tonic-gate } 4720Sstevel@tonic-gate } else { 4730Sstevel@tonic-gate if (prev == NULL) { 4740Sstevel@tonic-gate ASSERT(prev != HMENT_EMBEDDED); 4750Sstevel@tonic-gate hm = (hment_t *)pp->p_mapping; 4760Sstevel@tonic-gate } else { 4770Sstevel@tonic-gate hm = prev->hm_next; 4780Sstevel@tonic-gate } 4790Sstevel@tonic-gate 4800Sstevel@tonic-gate if (hm != NULL) { 4810Sstevel@tonic-gate *ht = hm->hm_htable; 4820Sstevel@tonic-gate *entry = hm->hm_entry; 4830Sstevel@tonic-gate } 4840Sstevel@tonic-gate } 4850Sstevel@tonic-gate return (hm); 4860Sstevel@tonic-gate } 4870Sstevel@tonic-gate 4880Sstevel@tonic-gate /* 4890Sstevel@tonic-gate * Remove a mapping to a page from its mapping list. Must have 4900Sstevel@tonic-gate * the corresponding mapping list locked. 4910Sstevel@tonic-gate * Finds the mapping list entry with the given pte_t and 4920Sstevel@tonic-gate * unlinks it from the mapping list. 4930Sstevel@tonic-gate */ 4940Sstevel@tonic-gate hment_t * 4950Sstevel@tonic-gate hment_remove(page_t *pp, htable_t *ht, uint_t entry) 4960Sstevel@tonic-gate { 4970Sstevel@tonic-gate hment_t *prev = NULL; 4980Sstevel@tonic-gate hment_t *hm; 4990Sstevel@tonic-gate uint_t idx; 5003308Ssudheer pfn_t pfn; 5010Sstevel@tonic-gate 5020Sstevel@tonic-gate ASSERT(x86_hm_held(pp)); 5030Sstevel@tonic-gate 5040Sstevel@tonic-gate /* 5050Sstevel@tonic-gate * Check if we have only one mapping embedded in the page_t. 5060Sstevel@tonic-gate */ 5070Sstevel@tonic-gate if (pp->p_embed) { 5080Sstevel@tonic-gate ASSERT(ht == (htable_t *)pp->p_mapping); 5090Sstevel@tonic-gate ASSERT(entry == pp->p_mlentry); 5100Sstevel@tonic-gate ASSERT(pp->p_share == 0); 5110Sstevel@tonic-gate pp->p_mapping = NULL; 5120Sstevel@tonic-gate pp->p_mlentry = 0; 5130Sstevel@tonic-gate pp->p_embed = 0; 5140Sstevel@tonic-gate return (NULL); 5150Sstevel@tonic-gate } 5160Sstevel@tonic-gate 5170Sstevel@tonic-gate /* 5180Sstevel@tonic-gate * Otherwise it must be in the list of hments. 5190Sstevel@tonic-gate * Find the hment in the system-wide hash table and remove it. 5200Sstevel@tonic-gate */ 5210Sstevel@tonic-gate ASSERT(pp->p_share != 0); 5223308Ssudheer pfn = pp->p_pagenum; 5230Sstevel@tonic-gate idx = HMENT_HASH(ht->ht_pfn, entry); 5240Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx)); 5250Sstevel@tonic-gate hm = hment_hash[idx]; 5263308Ssudheer while (hm && (hm->hm_htable != ht || hm->hm_entry != entry || 5273308Ssudheer hm->hm_pfn != pfn)) { 5280Sstevel@tonic-gate prev = hm; 5290Sstevel@tonic-gate hm = hm->hm_hashnext; 5300Sstevel@tonic-gate } 53147Sjosephb if (hm == NULL) { 53247Sjosephb panic("hment_remove() missing in hash table pp=%lx, ht=%lx," 53347Sjosephb "entry=0x%x hash index=0x%x", (uintptr_t)pp, (uintptr_t)ht, 53447Sjosephb entry, idx); 53547Sjosephb } 5360Sstevel@tonic-gate 5370Sstevel@tonic-gate if (prev) 5380Sstevel@tonic-gate prev->hm_hashnext = hm->hm_hashnext; 5390Sstevel@tonic-gate else 5400Sstevel@tonic-gate hment_hash[idx] = hm->hm_hashnext; 5410Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx)); 5420Sstevel@tonic-gate 5430Sstevel@tonic-gate /* 5440Sstevel@tonic-gate * Remove the hment from the page's mapping list 5450Sstevel@tonic-gate */ 5460Sstevel@tonic-gate if (hm->hm_next) 5470Sstevel@tonic-gate hm->hm_next->hm_prev = hm->hm_prev; 5480Sstevel@tonic-gate if (hm->hm_prev) 5490Sstevel@tonic-gate hm->hm_prev->hm_next = hm->hm_next; 5500Sstevel@tonic-gate else 5510Sstevel@tonic-gate pp->p_mapping = hm->hm_next; 5520Sstevel@tonic-gate 5530Sstevel@tonic-gate --pp->p_share; 5540Sstevel@tonic-gate hm->hm_hashnext = NULL; 5550Sstevel@tonic-gate hm->hm_next = NULL; 5560Sstevel@tonic-gate hm->hm_prev = NULL; 5570Sstevel@tonic-gate 5580Sstevel@tonic-gate return (hm); 5590Sstevel@tonic-gate } 5600Sstevel@tonic-gate 5610Sstevel@tonic-gate /* 5620Sstevel@tonic-gate * Put initial hment's in the reserve pool. 5630Sstevel@tonic-gate */ 5640Sstevel@tonic-gate void 5650Sstevel@tonic-gate hment_reserve(uint_t count) 5660Sstevel@tonic-gate { 5670Sstevel@tonic-gate hment_t *hm; 5680Sstevel@tonic-gate 5690Sstevel@tonic-gate count += hment_reserve_amount; 5700Sstevel@tonic-gate 5710Sstevel@tonic-gate while (hment_reserve_count < count) { 5720Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP); 5730Sstevel@tonic-gate if (hm == NULL) 5740Sstevel@tonic-gate return; 5750Sstevel@tonic-gate hment_put_reserve(hm); 5760Sstevel@tonic-gate } 5770Sstevel@tonic-gate } 5780Sstevel@tonic-gate 5790Sstevel@tonic-gate /* 5800Sstevel@tonic-gate * Readjust the hment reserves after they may have been used. 5810Sstevel@tonic-gate */ 5820Sstevel@tonic-gate void 5830Sstevel@tonic-gate hment_adjust_reserve() 5840Sstevel@tonic-gate { 5850Sstevel@tonic-gate hment_t *hm; 5860Sstevel@tonic-gate 5870Sstevel@tonic-gate /* 5880Sstevel@tonic-gate * Free up any excess reserves 5890Sstevel@tonic-gate */ 5900Sstevel@tonic-gate while (hment_reserve_count > hment_reserve_amount) { 5910Sstevel@tonic-gate ASSERT(curthread != hat_reserves_thread); 5920Sstevel@tonic-gate hm = hment_get_reserve(); 5930Sstevel@tonic-gate if (hm == NULL) 5940Sstevel@tonic-gate return; 5950Sstevel@tonic-gate hment_free(hm); 5960Sstevel@tonic-gate } 5970Sstevel@tonic-gate } 5980Sstevel@tonic-gate 5990Sstevel@tonic-gate /* 6000Sstevel@tonic-gate * initialize hment data structures 6010Sstevel@tonic-gate */ 6020Sstevel@tonic-gate void 6030Sstevel@tonic-gate hment_init(void) 6040Sstevel@tonic-gate { 6050Sstevel@tonic-gate int i; 6060Sstevel@tonic-gate int flags = KMC_NOHASH | KMC_NODEBUG; 6070Sstevel@tonic-gate 6080Sstevel@tonic-gate /* 6090Sstevel@tonic-gate * Initialize kmem caches. On 32 bit kernel's we shut off 6100Sstevel@tonic-gate * debug information to save on precious kernel VA usage. 6110Sstevel@tonic-gate */ 6120Sstevel@tonic-gate hment_cache = kmem_cache_create("hment_t", 6130Sstevel@tonic-gate sizeof (hment_t), 0, NULL, NULL, NULL, 6140Sstevel@tonic-gate NULL, hat_memload_arena, flags); 6150Sstevel@tonic-gate 6160Sstevel@tonic-gate hment_hash = kmem_zalloc(hment_hash_entries * sizeof (hment_t *), 6170Sstevel@tonic-gate KM_SLEEP); 6180Sstevel@tonic-gate 6190Sstevel@tonic-gate for (i = 0; i < MLIST_NUM_LOCK; i++) 6200Sstevel@tonic-gate mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL); 6210Sstevel@tonic-gate 6220Sstevel@tonic-gate for (i = 0; i < HASH_NUM_LOCK; i++) 6230Sstevel@tonic-gate mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL); 6240Sstevel@tonic-gate 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate } 6270Sstevel@tonic-gate 6280Sstevel@tonic-gate /* 6290Sstevel@tonic-gate * return the number of mappings to a page 6300Sstevel@tonic-gate * 6310Sstevel@tonic-gate * Note there is no ASSERT() that the MUTEX is held for this. 6320Sstevel@tonic-gate * Hence the return value might be inaccurate if this is called without 6330Sstevel@tonic-gate * doing an x86_hm_enter(). 6340Sstevel@tonic-gate */ 6350Sstevel@tonic-gate uint_t 6360Sstevel@tonic-gate hment_mapcnt(page_t *pp) 6370Sstevel@tonic-gate { 6380Sstevel@tonic-gate uint_t cnt; 6390Sstevel@tonic-gate uint_t szc; 6400Sstevel@tonic-gate page_t *larger; 6410Sstevel@tonic-gate hment_t *hm; 6420Sstevel@tonic-gate 6430Sstevel@tonic-gate x86_hm_enter(pp); 6440Sstevel@tonic-gate if (pp->p_mapping == NULL) 6450Sstevel@tonic-gate cnt = 0; 6460Sstevel@tonic-gate else if (pp->p_embed) 6470Sstevel@tonic-gate cnt = 1; 6480Sstevel@tonic-gate else 6490Sstevel@tonic-gate cnt = pp->p_share; 6500Sstevel@tonic-gate x86_hm_exit(pp); 6510Sstevel@tonic-gate 6520Sstevel@tonic-gate /* 6530Sstevel@tonic-gate * walk through all larger mapping sizes counting mappings 6540Sstevel@tonic-gate */ 6550Sstevel@tonic-gate for (szc = 1; szc <= pp->p_szc; ++szc) { 6560Sstevel@tonic-gate larger = PP_GROUPLEADER(pp, szc); 6570Sstevel@tonic-gate if (larger == pp) /* don't double count large mappings */ 6580Sstevel@tonic-gate continue; 6590Sstevel@tonic-gate 6600Sstevel@tonic-gate x86_hm_enter(larger); 6610Sstevel@tonic-gate if (larger->p_mapping != NULL) { 6620Sstevel@tonic-gate if (larger->p_embed && 6630Sstevel@tonic-gate ((htable_t *)larger->p_mapping)->ht_level == szc) { 6640Sstevel@tonic-gate ++cnt; 6650Sstevel@tonic-gate } else if (!larger->p_embed) { 6660Sstevel@tonic-gate for (hm = larger->p_mapping; hm; 6670Sstevel@tonic-gate hm = hm->hm_next) { 6680Sstevel@tonic-gate if (hm->hm_htable->ht_level == szc) 6690Sstevel@tonic-gate ++cnt; 6700Sstevel@tonic-gate } 6710Sstevel@tonic-gate } 6720Sstevel@tonic-gate } 6730Sstevel@tonic-gate x86_hm_exit(larger); 6740Sstevel@tonic-gate } 6750Sstevel@tonic-gate return (cnt); 6760Sstevel@tonic-gate } 6770Sstevel@tonic-gate 6780Sstevel@tonic-gate /* 6790Sstevel@tonic-gate * We need to steal an hment. Walk through all the page_t's until we 6800Sstevel@tonic-gate * find one that has multiple mappings. Unload one of the mappings 6810Sstevel@tonic-gate * and reclaim that hment. Note that we'll save/restart the starting 6820Sstevel@tonic-gate * page to try and spread the pain. 6830Sstevel@tonic-gate */ 6840Sstevel@tonic-gate static page_t *last_page = NULL; 6850Sstevel@tonic-gate 6860Sstevel@tonic-gate static hment_t * 6870Sstevel@tonic-gate hment_steal(void) 6880Sstevel@tonic-gate { 6890Sstevel@tonic-gate page_t *last = last_page; 6900Sstevel@tonic-gate page_t *pp = last; 6910Sstevel@tonic-gate hment_t *hm = NULL; 6920Sstevel@tonic-gate hment_t *hm2; 6930Sstevel@tonic-gate htable_t *ht; 6940Sstevel@tonic-gate uint_t found_one = 0; 6950Sstevel@tonic-gate 6960Sstevel@tonic-gate HATSTAT_INC(hs_hm_steals); 6970Sstevel@tonic-gate if (pp == NULL) 6980Sstevel@tonic-gate last = pp = page_first(); 6990Sstevel@tonic-gate 7000Sstevel@tonic-gate while (!found_one) { 7010Sstevel@tonic-gate HATSTAT_INC(hs_hm_steal_exam); 7020Sstevel@tonic-gate pp = page_next(pp); 7030Sstevel@tonic-gate if (pp == NULL) 7040Sstevel@tonic-gate pp = page_first(); 7050Sstevel@tonic-gate 7060Sstevel@tonic-gate /* 7070Sstevel@tonic-gate * The loop and function exit here if nothing found to steal. 7080Sstevel@tonic-gate */ 7090Sstevel@tonic-gate if (pp == last) 7100Sstevel@tonic-gate return (NULL); 7110Sstevel@tonic-gate 7120Sstevel@tonic-gate /* 7130Sstevel@tonic-gate * Only lock the page_t if it has hments. 7140Sstevel@tonic-gate */ 7150Sstevel@tonic-gate if (pp->p_mapping == NULL || pp->p_embed) 7160Sstevel@tonic-gate continue; 7170Sstevel@tonic-gate 7180Sstevel@tonic-gate /* 7190Sstevel@tonic-gate * Search the mapping list for a usable mapping. 7200Sstevel@tonic-gate */ 7210Sstevel@tonic-gate x86_hm_enter(pp); 7220Sstevel@tonic-gate if (!pp->p_embed) { 7230Sstevel@tonic-gate for (hm = pp->p_mapping; hm; hm = hm->hm_next) { 7240Sstevel@tonic-gate ht = hm->hm_htable; 7250Sstevel@tonic-gate if (ht->ht_hat != kas.a_hat && 7260Sstevel@tonic-gate ht->ht_busy == 0 && 7270Sstevel@tonic-gate ht->ht_lock_cnt == 0) { 7280Sstevel@tonic-gate found_one = 1; 7290Sstevel@tonic-gate break; 7300Sstevel@tonic-gate } 7310Sstevel@tonic-gate } 7320Sstevel@tonic-gate } 7330Sstevel@tonic-gate if (!found_one) 7340Sstevel@tonic-gate x86_hm_exit(pp); 7350Sstevel@tonic-gate } 7360Sstevel@tonic-gate 7370Sstevel@tonic-gate /* 7380Sstevel@tonic-gate * Steal the mapping we found. Note that hati_page_unmap() will 7390Sstevel@tonic-gate * do the x86_hm_exit(). 7400Sstevel@tonic-gate */ 7410Sstevel@tonic-gate hm2 = hati_page_unmap(pp, ht, hm->hm_entry); 7420Sstevel@tonic-gate ASSERT(hm2 == hm); 7430Sstevel@tonic-gate last_page = pp; 7440Sstevel@tonic-gate return (hm); 7450Sstevel@tonic-gate } 746