10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
53308Ssudheer * Common Development and Distribution License (the "License").
63308Ssudheer * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
223543Sjosephb * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
270Sstevel@tonic-gate
280Sstevel@tonic-gate #include <sys/types.h>
290Sstevel@tonic-gate #include <sys/sysmacros.h>
300Sstevel@tonic-gate #include <sys/kmem.h>
310Sstevel@tonic-gate #include <sys/atomic.h>
320Sstevel@tonic-gate #include <sys/bitmap.h>
330Sstevel@tonic-gate #include <sys/systm.h>
340Sstevel@tonic-gate #include <vm/seg_kmem.h>
350Sstevel@tonic-gate #include <vm/hat.h>
360Sstevel@tonic-gate #include <vm/vm_dep.h>
370Sstevel@tonic-gate #include <vm/hat_i86.h>
380Sstevel@tonic-gate #include <sys/cmn_err.h>
39*5618Sjosephb #include <sys/avl.h>
400Sstevel@tonic-gate
410Sstevel@tonic-gate
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate * When pages are shared by more than one mapping, a list of these
440Sstevel@tonic-gate * structs hangs off of the page_t connected by the hm_next and hm_prev
450Sstevel@tonic-gate * fields. Every hment is also indexed by a system-wide hash table, using
46*5618Sjosephb * hm_hashlink to connect the hments within each hash bucket.
470Sstevel@tonic-gate */
480Sstevel@tonic-gate struct hment {
49*5618Sjosephb avl_node_t hm_hashlink; /* links for hash table */
500Sstevel@tonic-gate struct hment *hm_next; /* next mapping of same page */
510Sstevel@tonic-gate struct hment *hm_prev; /* previous mapping of same page */
520Sstevel@tonic-gate htable_t *hm_htable; /* corresponding htable_t */
533308Ssudheer pfn_t hm_pfn; /* mapping page frame number */
540Sstevel@tonic-gate uint16_t hm_entry; /* index of pte in htable */
550Sstevel@tonic-gate uint16_t hm_pad; /* explicitly expose compiler padding */
560Sstevel@tonic-gate #ifdef __amd64
570Sstevel@tonic-gate uint32_t hm_pad2; /* explicitly expose compiler padding */
580Sstevel@tonic-gate #endif
590Sstevel@tonic-gate };
600Sstevel@tonic-gate
610Sstevel@tonic-gate /*
620Sstevel@tonic-gate * Value returned by hment_walk() when dealing with a single mapping
630Sstevel@tonic-gate * embedded in the page_t.
640Sstevel@tonic-gate */
650Sstevel@tonic-gate #define HMENT_EMBEDDED ((hment_t *)(uintptr_t)1)
660Sstevel@tonic-gate
670Sstevel@tonic-gate kmem_cache_t *hment_cache;
680Sstevel@tonic-gate
690Sstevel@tonic-gate /*
700Sstevel@tonic-gate * The hment reserve is similar to the htable reserve, with the following
710Sstevel@tonic-gate * exception. Hment's are never needed for HAT kmem allocs.
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * The hment_reserve_amount variable is used, so that you can change it's
740Sstevel@tonic-gate * value to zero via a kernel debugger to force stealing to get tested.
750Sstevel@tonic-gate */
760Sstevel@tonic-gate #define HMENT_RESERVE_AMOUNT (200) /* currently a guess at right value. */
770Sstevel@tonic-gate uint_t hment_reserve_amount = HMENT_RESERVE_AMOUNT;
780Sstevel@tonic-gate kmutex_t hment_reserve_mutex;
790Sstevel@tonic-gate uint_t hment_reserve_count;
800Sstevel@tonic-gate hment_t *hment_reserve_pool;
810Sstevel@tonic-gate
820Sstevel@tonic-gate /*
83*5618Sjosephb * All hments are stored in a system wide hash of AVL trees.
840Sstevel@tonic-gate */
850Sstevel@tonic-gate #define HMENT_HASH_SIZE (64 * 1024)
860Sstevel@tonic-gate static uint_t hment_hash_entries = HMENT_HASH_SIZE;
87*5618Sjosephb static avl_tree_t *hment_table;
880Sstevel@tonic-gate
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate * Lots of highly shared pages will have the same value for "entry" (consider
910Sstevel@tonic-gate * the starting address of "xterm" or "sh"). So we'll distinguish them by
920Sstevel@tonic-gate * adding the pfn of the page table into both the high bits.
930Sstevel@tonic-gate * The shift by 9 corresponds to the range of values for entry (0..511).
940Sstevel@tonic-gate */
950Sstevel@tonic-gate #define HMENT_HASH(pfn, entry) (uint32_t) \
960Sstevel@tonic-gate ((((pfn) << 9) + entry + pfn) & (hment_hash_entries - 1))
970Sstevel@tonic-gate
980Sstevel@tonic-gate /*
990Sstevel@tonic-gate * "mlist_lock" is a hashed mutex lock for protecting per-page mapping
1000Sstevel@tonic-gate * lists and "hash_lock" is a similar lock protecting the hment hash
1010Sstevel@tonic-gate * table. The hashed approach is taken to avoid the spatial overhead of
1020Sstevel@tonic-gate * maintaining a separate lock for each page, while still achieving better
1030Sstevel@tonic-gate * scalability than a single lock would allow.
1040Sstevel@tonic-gate */
105*5618Sjosephb #define MLIST_NUM_LOCK 2048 /* must be power of two */
106*5618Sjosephb static kmutex_t *mlist_lock;
1070Sstevel@tonic-gate
1080Sstevel@tonic-gate /*
1090Sstevel@tonic-gate * the shift by 9 is so that all large pages don't use the same hash bucket
1100Sstevel@tonic-gate */
1110Sstevel@tonic-gate #define MLIST_MUTEX(pp) \
1120Sstevel@tonic-gate &mlist_lock[((pp)->p_pagenum + ((pp)->p_pagenum >> 9)) & \
1130Sstevel@tonic-gate (MLIST_NUM_LOCK - 1)]
1140Sstevel@tonic-gate
115*5618Sjosephb #define HASH_NUM_LOCK 2048 /* must be power of two */
116*5618Sjosephb static kmutex_t *hash_lock;
1170Sstevel@tonic-gate
1180Sstevel@tonic-gate #define HASH_MUTEX(idx) &hash_lock[(idx) & (HASH_NUM_LOCK-1)]
1190Sstevel@tonic-gate
120*5618Sjosephb static avl_node_t null_avl_link; /* always zero */
1210Sstevel@tonic-gate static hment_t *hment_steal(void);
1220Sstevel@tonic-gate
1230Sstevel@tonic-gate /*
124*5618Sjosephb * Utility to compare hment_t's for use in AVL tree. The ordering
125*5618Sjosephb * is entirely arbitrary and is just so that the AVL algorithm works.
126*5618Sjosephb */
127*5618Sjosephb static int
hment_compare(const void * hm1,const void * hm2)128*5618Sjosephb hment_compare(const void *hm1, const void *hm2)
129*5618Sjosephb {
130*5618Sjosephb hment_t *h1 = (hment_t *)hm1;
131*5618Sjosephb hment_t *h2 = (hment_t *)hm2;
132*5618Sjosephb long diff;
133*5618Sjosephb
134*5618Sjosephb diff = (uintptr_t)h1->hm_htable - (uintptr_t)h2->hm_htable;
135*5618Sjosephb if (diff == 0) {
136*5618Sjosephb diff = h1->hm_entry - h2->hm_entry;
137*5618Sjosephb if (diff == 0)
138*5618Sjosephb diff = h1->hm_pfn - h2->hm_pfn;
139*5618Sjosephb }
140*5618Sjosephb if (diff < 0)
141*5618Sjosephb diff = -1;
142*5618Sjosephb else if (diff > 0)
143*5618Sjosephb diff = 1;
144*5618Sjosephb return (diff);
145*5618Sjosephb }
146*5618Sjosephb
147*5618Sjosephb /*
1480Sstevel@tonic-gate * put one hment onto the reserves list
1490Sstevel@tonic-gate */
1500Sstevel@tonic-gate static void
hment_put_reserve(hment_t * hm)1510Sstevel@tonic-gate hment_put_reserve(hment_t *hm)
1520Sstevel@tonic-gate {
1530Sstevel@tonic-gate HATSTAT_INC(hs_hm_put_reserve);
1540Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex);
1550Sstevel@tonic-gate hm->hm_next = hment_reserve_pool;
1560Sstevel@tonic-gate hment_reserve_pool = hm;
1570Sstevel@tonic-gate ++hment_reserve_count;
1580Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex);
1590Sstevel@tonic-gate }
1600Sstevel@tonic-gate
1610Sstevel@tonic-gate /*
1620Sstevel@tonic-gate * Take one hment from the reserve.
1630Sstevel@tonic-gate */
1640Sstevel@tonic-gate static hment_t *
hment_get_reserve(void)1650Sstevel@tonic-gate hment_get_reserve(void)
1660Sstevel@tonic-gate {
1670Sstevel@tonic-gate hment_t *hm = NULL;
1680Sstevel@tonic-gate
1690Sstevel@tonic-gate /*
1700Sstevel@tonic-gate * We rely on a "donation system" to refill the hment reserve
1710Sstevel@tonic-gate * list, which only takes place when we are allocating hments for
1720Sstevel@tonic-gate * user mappings. It is theoretically possible that an incredibly
1730Sstevel@tonic-gate * long string of kernel hment_alloc()s with no intervening user
1740Sstevel@tonic-gate * hment_alloc()s could exhaust that pool.
1750Sstevel@tonic-gate */
1760Sstevel@tonic-gate HATSTAT_INC(hs_hm_get_reserve);
1770Sstevel@tonic-gate mutex_enter(&hment_reserve_mutex);
1780Sstevel@tonic-gate if (hment_reserve_count != 0) {
1790Sstevel@tonic-gate hm = hment_reserve_pool;
1800Sstevel@tonic-gate hment_reserve_pool = hm->hm_next;
1810Sstevel@tonic-gate --hment_reserve_count;
1820Sstevel@tonic-gate }
1830Sstevel@tonic-gate mutex_exit(&hment_reserve_mutex);
1840Sstevel@tonic-gate return (hm);
1850Sstevel@tonic-gate }
1860Sstevel@tonic-gate
1870Sstevel@tonic-gate /*
1880Sstevel@tonic-gate * Allocate an hment
1890Sstevel@tonic-gate */
1900Sstevel@tonic-gate static hment_t *
hment_alloc()1910Sstevel@tonic-gate hment_alloc()
1920Sstevel@tonic-gate {
1930Sstevel@tonic-gate int km_flag = can_steal_post_boot ? KM_NOSLEEP : KM_SLEEP;
1940Sstevel@tonic-gate hment_t *hm = NULL;
1950Sstevel@tonic-gate
1960Sstevel@tonic-gate /*
1970Sstevel@tonic-gate * If we aren't using the reserves, try using kmem to get an hment.
1980Sstevel@tonic-gate * Donate any successful allocations to reserves if low.
1990Sstevel@tonic-gate *
2000Sstevel@tonic-gate * If we're in panic, resort to using the reserves.
2010Sstevel@tonic-gate */
2020Sstevel@tonic-gate HATSTAT_INC(hs_hm_alloc);
2033543Sjosephb if (!USE_HAT_RESERVES()) {
2040Sstevel@tonic-gate for (;;) {
2050Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, km_flag);
2064044Sjosephb if (hm == NULL ||
2074044Sjosephb USE_HAT_RESERVES() ||
2083543Sjosephb hment_reserve_count >= hment_reserve_amount)
2090Sstevel@tonic-gate break;
2100Sstevel@tonic-gate hment_put_reserve(hm);
2110Sstevel@tonic-gate }
2120Sstevel@tonic-gate }
2130Sstevel@tonic-gate
2140Sstevel@tonic-gate /*
2150Sstevel@tonic-gate * If allocation failed, we need to tap the reserves or steal
2160Sstevel@tonic-gate */
2170Sstevel@tonic-gate if (hm == NULL) {
2183543Sjosephb if (USE_HAT_RESERVES())
2190Sstevel@tonic-gate hm = hment_get_reserve();
2200Sstevel@tonic-gate
2210Sstevel@tonic-gate /*
2220Sstevel@tonic-gate * If we still haven't gotten an hment, attempt to steal one by
2230Sstevel@tonic-gate * victimizing a mapping in a user htable.
2240Sstevel@tonic-gate */
2250Sstevel@tonic-gate if (hm == NULL && can_steal_post_boot)
2260Sstevel@tonic-gate hm = hment_steal();
2270Sstevel@tonic-gate
2280Sstevel@tonic-gate /*
2290Sstevel@tonic-gate * we're in dire straights, try the reserve
2300Sstevel@tonic-gate */
2310Sstevel@tonic-gate if (hm == NULL)
2320Sstevel@tonic-gate hm = hment_get_reserve();
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate /*
2350Sstevel@tonic-gate * still no hment is a serious problem.
2360Sstevel@tonic-gate */
2370Sstevel@tonic-gate if (hm == NULL)
2380Sstevel@tonic-gate panic("hment_alloc(): no reserve, couldn't steal");
2390Sstevel@tonic-gate }
2400Sstevel@tonic-gate
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate hm->hm_entry = 0;
2430Sstevel@tonic-gate hm->hm_htable = NULL;
244*5618Sjosephb hm->hm_hashlink = null_avl_link;
2450Sstevel@tonic-gate hm->hm_next = NULL;
2460Sstevel@tonic-gate hm->hm_prev = NULL;
2473308Ssudheer hm->hm_pfn = PFN_INVALID;
2480Sstevel@tonic-gate return (hm);
2490Sstevel@tonic-gate }
2500Sstevel@tonic-gate
2510Sstevel@tonic-gate /*
2520Sstevel@tonic-gate * Free an hment, possibly to the reserves list when called from the
2530Sstevel@tonic-gate * thread using the reserves. For example, when freeing an hment during an
2540Sstevel@tonic-gate * htable_steal(), we can't recurse into the kmem allocator, so we just
2550Sstevel@tonic-gate * push the hment onto the reserve list.
2560Sstevel@tonic-gate */
2570Sstevel@tonic-gate void
hment_free(hment_t * hm)2580Sstevel@tonic-gate hment_free(hment_t *hm)
2590Sstevel@tonic-gate {
2600Sstevel@tonic-gate #ifdef DEBUG
2610Sstevel@tonic-gate /*
2620Sstevel@tonic-gate * zero out all fields to try and force any race conditions to segfault
2630Sstevel@tonic-gate */
2640Sstevel@tonic-gate bzero(hm, sizeof (*hm));
2650Sstevel@tonic-gate #endif
2660Sstevel@tonic-gate HATSTAT_INC(hs_hm_free);
2673543Sjosephb if (USE_HAT_RESERVES() ||
2684004Sjosephb hment_reserve_count < hment_reserve_amount) {
2690Sstevel@tonic-gate hment_put_reserve(hm);
2704004Sjosephb } else {
2710Sstevel@tonic-gate kmem_cache_free(hment_cache, hm);
2724004Sjosephb hment_adjust_reserve();
2734004Sjosephb }
2740Sstevel@tonic-gate }
2750Sstevel@tonic-gate
276*5618Sjosephb /*
277*5618Sjosephb * These must test for mlist_lock not having been allocated yet.
278*5618Sjosephb * We just ignore locking in that case, as it means were in early
279*5618Sjosephb * single threaded startup.
280*5618Sjosephb */
2810Sstevel@tonic-gate int
x86_hm_held(page_t * pp)2820Sstevel@tonic-gate x86_hm_held(page_t *pp)
2830Sstevel@tonic-gate {
2840Sstevel@tonic-gate ASSERT(pp != NULL);
285*5618Sjosephb if (mlist_lock == NULL)
286*5618Sjosephb return (1);
2870Sstevel@tonic-gate return (MUTEX_HELD(MLIST_MUTEX(pp)));
2880Sstevel@tonic-gate }
2890Sstevel@tonic-gate
2900Sstevel@tonic-gate void
x86_hm_enter(page_t * pp)2910Sstevel@tonic-gate x86_hm_enter(page_t *pp)
2920Sstevel@tonic-gate {
2930Sstevel@tonic-gate ASSERT(pp != NULL);
294*5618Sjosephb if (mlist_lock != NULL)
295*5618Sjosephb mutex_enter(MLIST_MUTEX(pp));
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate
2980Sstevel@tonic-gate void
x86_hm_exit(page_t * pp)2990Sstevel@tonic-gate x86_hm_exit(page_t *pp)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate ASSERT(pp != NULL);
302*5618Sjosephb if (mlist_lock != NULL)
303*5618Sjosephb mutex_exit(MLIST_MUTEX(pp));
3040Sstevel@tonic-gate }
3050Sstevel@tonic-gate
3060Sstevel@tonic-gate /*
3070Sstevel@tonic-gate * Internal routine to add a full hment to a page_t mapping list
3080Sstevel@tonic-gate */
3090Sstevel@tonic-gate static void
hment_insert(hment_t * hm,page_t * pp)3100Sstevel@tonic-gate hment_insert(hment_t *hm, page_t *pp)
3110Sstevel@tonic-gate {
3120Sstevel@tonic-gate uint_t idx;
3130Sstevel@tonic-gate
3140Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
3150Sstevel@tonic-gate ASSERT(!pp->p_embed);
3160Sstevel@tonic-gate
3170Sstevel@tonic-gate /*
3180Sstevel@tonic-gate * Add the hment to the page's mapping list.
3190Sstevel@tonic-gate */
3200Sstevel@tonic-gate ++pp->p_share;
3210Sstevel@tonic-gate hm->hm_next = pp->p_mapping;
3220Sstevel@tonic-gate if (pp->p_mapping != NULL)
3230Sstevel@tonic-gate ((hment_t *)pp->p_mapping)->hm_prev = hm;
3240Sstevel@tonic-gate pp->p_mapping = hm;
3250Sstevel@tonic-gate
3260Sstevel@tonic-gate /*
3270Sstevel@tonic-gate * Add the hment to the system-wide hash table.
3280Sstevel@tonic-gate */
3290Sstevel@tonic-gate idx = HMENT_HASH(hm->hm_htable->ht_pfn, hm->hm_entry);
3300Sstevel@tonic-gate
3310Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx));
332*5618Sjosephb avl_add(&hment_table[idx], hm);
3330Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx));
3340Sstevel@tonic-gate }
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate * Prepare a mapping list entry to the given page.
3380Sstevel@tonic-gate *
3390Sstevel@tonic-gate * There are 4 different situations to deal with:
3400Sstevel@tonic-gate *
3410Sstevel@tonic-gate * - Adding the first mapping to a page_t as an embedded hment
3420Sstevel@tonic-gate * - Refaulting on an existing embedded mapping
3430Sstevel@tonic-gate * - Upgrading an embedded mapping when adding a 2nd mapping
3440Sstevel@tonic-gate * - Adding another mapping to a page_t that already has multiple mappings
3450Sstevel@tonic-gate * note we don't optimized for the refaulting case here.
3460Sstevel@tonic-gate *
3470Sstevel@tonic-gate * Due to competition with other threads that may be mapping/unmapping the
3480Sstevel@tonic-gate * same page and the need to drop all locks while allocating hments, any or
3490Sstevel@tonic-gate * all of the 3 situations can occur (and in almost any order) in any given
3500Sstevel@tonic-gate * call. Isn't this fun!
3510Sstevel@tonic-gate */
3520Sstevel@tonic-gate hment_t *
hment_prepare(htable_t * htable,uint_t entry,page_t * pp)3530Sstevel@tonic-gate hment_prepare(htable_t *htable, uint_t entry, page_t *pp)
3540Sstevel@tonic-gate {
3550Sstevel@tonic-gate hment_t *hm = NULL;
3560Sstevel@tonic-gate
3570Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
3580Sstevel@tonic-gate
3590Sstevel@tonic-gate for (;;) {
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate /*
3620Sstevel@tonic-gate * The most common case is establishing the first mapping to a
3630Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated
3640Sstevel@tonic-gate * hment.
3650Sstevel@tonic-gate */
3660Sstevel@tonic-gate if (pp->p_mapping == NULL) {
3670Sstevel@tonic-gate ASSERT(!pp->p_embed);
3680Sstevel@tonic-gate ASSERT(pp->p_share == 0);
3690Sstevel@tonic-gate if (hm == NULL)
3700Sstevel@tonic-gate break;
3710Sstevel@tonic-gate
3720Sstevel@tonic-gate /*
3730Sstevel@tonic-gate * we had an hment already, so free it and retry
3740Sstevel@tonic-gate */
3750Sstevel@tonic-gate goto free_and_continue;
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate
3780Sstevel@tonic-gate /*
3790Sstevel@tonic-gate * If there is an embedded mapping, we may need to
3800Sstevel@tonic-gate * convert it to an hment.
3810Sstevel@tonic-gate */
3820Sstevel@tonic-gate if (pp->p_embed) {
3830Sstevel@tonic-gate
3840Sstevel@tonic-gate /* should point to htable */
3850Sstevel@tonic-gate ASSERT(pp->p_mapping != NULL);
3860Sstevel@tonic-gate
3870Sstevel@tonic-gate /*
3880Sstevel@tonic-gate * If we are faulting on a pre-existing mapping
3890Sstevel@tonic-gate * there is no need to promote/allocate a new hment.
3900Sstevel@tonic-gate * This happens a lot due to segmap.
3910Sstevel@tonic-gate */
3920Sstevel@tonic-gate if (pp->p_mapping == htable && pp->p_mlentry == entry) {
3930Sstevel@tonic-gate if (hm == NULL)
3940Sstevel@tonic-gate break;
3950Sstevel@tonic-gate goto free_and_continue;
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate /*
3990Sstevel@tonic-gate * If we have an hment allocated, use it to promote the
4000Sstevel@tonic-gate * existing embedded mapping.
4010Sstevel@tonic-gate */
4020Sstevel@tonic-gate if (hm != NULL) {
4030Sstevel@tonic-gate hm->hm_htable = pp->p_mapping;
4040Sstevel@tonic-gate hm->hm_entry = pp->p_mlentry;
4053308Ssudheer hm->hm_pfn = pp->p_pagenum;
4060Sstevel@tonic-gate pp->p_mapping = NULL;
4070Sstevel@tonic-gate pp->p_share = 0;
4080Sstevel@tonic-gate pp->p_embed = 0;
4090Sstevel@tonic-gate hment_insert(hm, pp);
4100Sstevel@tonic-gate }
4110Sstevel@tonic-gate
4120Sstevel@tonic-gate /*
4130Sstevel@tonic-gate * We either didn't have an hment allocated or we just
4140Sstevel@tonic-gate * used it for the embedded mapping. In either case,
4150Sstevel@tonic-gate * allocate another hment and restart.
4160Sstevel@tonic-gate */
4170Sstevel@tonic-gate goto allocate_and_continue;
4180Sstevel@tonic-gate }
4190Sstevel@tonic-gate
4200Sstevel@tonic-gate /*
4210Sstevel@tonic-gate * Last possibility is that we're adding an hment to a list
4220Sstevel@tonic-gate * of hments.
4230Sstevel@tonic-gate */
4240Sstevel@tonic-gate if (hm != NULL)
4250Sstevel@tonic-gate break;
4260Sstevel@tonic-gate allocate_and_continue:
4270Sstevel@tonic-gate x86_hm_exit(pp);
4280Sstevel@tonic-gate hm = hment_alloc();
4290Sstevel@tonic-gate x86_hm_enter(pp);
4300Sstevel@tonic-gate continue;
4310Sstevel@tonic-gate
4320Sstevel@tonic-gate free_and_continue:
4330Sstevel@tonic-gate /*
4340Sstevel@tonic-gate * we allocated an hment already, free it and retry
4350Sstevel@tonic-gate */
4360Sstevel@tonic-gate x86_hm_exit(pp);
4370Sstevel@tonic-gate hment_free(hm);
4380Sstevel@tonic-gate hm = NULL;
4390Sstevel@tonic-gate x86_hm_enter(pp);
4400Sstevel@tonic-gate }
4410Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
4420Sstevel@tonic-gate return (hm);
4430Sstevel@tonic-gate }
4440Sstevel@tonic-gate
4450Sstevel@tonic-gate /*
4460Sstevel@tonic-gate * Record a mapping list entry for the htable/entry to the given page.
4470Sstevel@tonic-gate *
4480Sstevel@tonic-gate * hment_prepare() should have properly set up the situation.
4490Sstevel@tonic-gate */
4500Sstevel@tonic-gate void
hment_assign(htable_t * htable,uint_t entry,page_t * pp,hment_t * hm)4510Sstevel@tonic-gate hment_assign(htable_t *htable, uint_t entry, page_t *pp, hment_t *hm)
4520Sstevel@tonic-gate {
4530Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
4540Sstevel@tonic-gate
4550Sstevel@tonic-gate /*
4560Sstevel@tonic-gate * The most common case is establishing the first mapping to a
4570Sstevel@tonic-gate * page, so check that first. This doesn't need any allocated
4580Sstevel@tonic-gate * hment.
4590Sstevel@tonic-gate */
4600Sstevel@tonic-gate if (pp->p_mapping == NULL) {
4610Sstevel@tonic-gate ASSERT(hm == NULL);
4620Sstevel@tonic-gate ASSERT(!pp->p_embed);
4630Sstevel@tonic-gate ASSERT(pp->p_share == 0);
4640Sstevel@tonic-gate pp->p_embed = 1;
4650Sstevel@tonic-gate pp->p_mapping = htable;
4660Sstevel@tonic-gate pp->p_mlentry = entry;
4670Sstevel@tonic-gate return;
4680Sstevel@tonic-gate }
4690Sstevel@tonic-gate
4700Sstevel@tonic-gate /*
4710Sstevel@tonic-gate * We should never get here with a pre-existing embedded maping
4720Sstevel@tonic-gate */
4730Sstevel@tonic-gate ASSERT(!pp->p_embed);
4740Sstevel@tonic-gate
4750Sstevel@tonic-gate /*
4760Sstevel@tonic-gate * add the new hment to the mapping list
4770Sstevel@tonic-gate */
4780Sstevel@tonic-gate ASSERT(hm != NULL);
4790Sstevel@tonic-gate hm->hm_htable = htable;
4800Sstevel@tonic-gate hm->hm_entry = entry;
4813308Ssudheer hm->hm_pfn = pp->p_pagenum;
4820Sstevel@tonic-gate hment_insert(hm, pp);
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate * Walk through the mappings for a page.
4870Sstevel@tonic-gate *
4880Sstevel@tonic-gate * must already have done an x86_hm_enter()
4890Sstevel@tonic-gate */
4900Sstevel@tonic-gate hment_t *
hment_walk(page_t * pp,htable_t ** ht,uint_t * entry,hment_t * prev)4910Sstevel@tonic-gate hment_walk(page_t *pp, htable_t **ht, uint_t *entry, hment_t *prev)
4920Sstevel@tonic-gate {
4930Sstevel@tonic-gate hment_t *hm;
4940Sstevel@tonic-gate
4950Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
4960Sstevel@tonic-gate
4970Sstevel@tonic-gate if (pp->p_embed) {
4980Sstevel@tonic-gate if (prev == NULL) {
4990Sstevel@tonic-gate *ht = (htable_t *)pp->p_mapping;
5000Sstevel@tonic-gate *entry = pp->p_mlentry;
5010Sstevel@tonic-gate hm = HMENT_EMBEDDED;
5020Sstevel@tonic-gate } else {
5030Sstevel@tonic-gate ASSERT(prev == HMENT_EMBEDDED);
5040Sstevel@tonic-gate hm = NULL;
5050Sstevel@tonic-gate }
5060Sstevel@tonic-gate } else {
5070Sstevel@tonic-gate if (prev == NULL) {
5080Sstevel@tonic-gate ASSERT(prev != HMENT_EMBEDDED);
5090Sstevel@tonic-gate hm = (hment_t *)pp->p_mapping;
5100Sstevel@tonic-gate } else {
5110Sstevel@tonic-gate hm = prev->hm_next;
5120Sstevel@tonic-gate }
5130Sstevel@tonic-gate
5140Sstevel@tonic-gate if (hm != NULL) {
5150Sstevel@tonic-gate *ht = hm->hm_htable;
5160Sstevel@tonic-gate *entry = hm->hm_entry;
5170Sstevel@tonic-gate }
5180Sstevel@tonic-gate }
5190Sstevel@tonic-gate return (hm);
5200Sstevel@tonic-gate }
5210Sstevel@tonic-gate
5220Sstevel@tonic-gate /*
5230Sstevel@tonic-gate * Remove a mapping to a page from its mapping list. Must have
5240Sstevel@tonic-gate * the corresponding mapping list locked.
5250Sstevel@tonic-gate * Finds the mapping list entry with the given pte_t and
5260Sstevel@tonic-gate * unlinks it from the mapping list.
5270Sstevel@tonic-gate */
5280Sstevel@tonic-gate hment_t *
hment_remove(page_t * pp,htable_t * ht,uint_t entry)5290Sstevel@tonic-gate hment_remove(page_t *pp, htable_t *ht, uint_t entry)
5300Sstevel@tonic-gate {
531*5618Sjosephb hment_t dummy;
532*5618Sjosephb avl_index_t where;
5330Sstevel@tonic-gate hment_t *hm;
5340Sstevel@tonic-gate uint_t idx;
5350Sstevel@tonic-gate
5360Sstevel@tonic-gate ASSERT(x86_hm_held(pp));
5370Sstevel@tonic-gate
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate * Check if we have only one mapping embedded in the page_t.
5400Sstevel@tonic-gate */
5410Sstevel@tonic-gate if (pp->p_embed) {
5420Sstevel@tonic-gate ASSERT(ht == (htable_t *)pp->p_mapping);
5430Sstevel@tonic-gate ASSERT(entry == pp->p_mlentry);
5440Sstevel@tonic-gate ASSERT(pp->p_share == 0);
5450Sstevel@tonic-gate pp->p_mapping = NULL;
5460Sstevel@tonic-gate pp->p_mlentry = 0;
5470Sstevel@tonic-gate pp->p_embed = 0;
5480Sstevel@tonic-gate return (NULL);
5490Sstevel@tonic-gate }
5500Sstevel@tonic-gate
5510Sstevel@tonic-gate /*
5520Sstevel@tonic-gate * Otherwise it must be in the list of hments.
5530Sstevel@tonic-gate * Find the hment in the system-wide hash table and remove it.
5540Sstevel@tonic-gate */
5550Sstevel@tonic-gate ASSERT(pp->p_share != 0);
556*5618Sjosephb dummy.hm_htable = ht;
557*5618Sjosephb dummy.hm_entry = entry;
558*5618Sjosephb dummy.hm_pfn = pp->p_pagenum;
5590Sstevel@tonic-gate idx = HMENT_HASH(ht->ht_pfn, entry);
5600Sstevel@tonic-gate mutex_enter(HASH_MUTEX(idx));
561*5618Sjosephb hm = avl_find(&hment_table[idx], &dummy, &where);
562*5618Sjosephb if (hm == NULL)
56347Sjosephb panic("hment_remove() missing in hash table pp=%lx, ht=%lx,"
56447Sjosephb "entry=0x%x hash index=0x%x", (uintptr_t)pp, (uintptr_t)ht,
56547Sjosephb entry, idx);
566*5618Sjosephb avl_remove(&hment_table[idx], hm);
5670Sstevel@tonic-gate mutex_exit(HASH_MUTEX(idx));
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate * Remove the hment from the page's mapping list
5710Sstevel@tonic-gate */
5720Sstevel@tonic-gate if (hm->hm_next)
5730Sstevel@tonic-gate hm->hm_next->hm_prev = hm->hm_prev;
5740Sstevel@tonic-gate if (hm->hm_prev)
5750Sstevel@tonic-gate hm->hm_prev->hm_next = hm->hm_next;
5760Sstevel@tonic-gate else
5770Sstevel@tonic-gate pp->p_mapping = hm->hm_next;
5780Sstevel@tonic-gate
5790Sstevel@tonic-gate --pp->p_share;
580*5618Sjosephb hm->hm_hashlink = null_avl_link;
5810Sstevel@tonic-gate hm->hm_next = NULL;
5820Sstevel@tonic-gate hm->hm_prev = NULL;
5830Sstevel@tonic-gate
5840Sstevel@tonic-gate return (hm);
5850Sstevel@tonic-gate }
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate /*
5880Sstevel@tonic-gate * Put initial hment's in the reserve pool.
5890Sstevel@tonic-gate */
5900Sstevel@tonic-gate void
hment_reserve(uint_t count)5910Sstevel@tonic-gate hment_reserve(uint_t count)
5920Sstevel@tonic-gate {
5930Sstevel@tonic-gate hment_t *hm;
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate count += hment_reserve_amount;
5960Sstevel@tonic-gate
5970Sstevel@tonic-gate while (hment_reserve_count < count) {
5980Sstevel@tonic-gate hm = kmem_cache_alloc(hment_cache, KM_NOSLEEP);
5990Sstevel@tonic-gate if (hm == NULL)
6000Sstevel@tonic-gate return;
6010Sstevel@tonic-gate hment_put_reserve(hm);
6020Sstevel@tonic-gate }
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate
6050Sstevel@tonic-gate /*
6060Sstevel@tonic-gate * Readjust the hment reserves after they may have been used.
6070Sstevel@tonic-gate */
6080Sstevel@tonic-gate void
hment_adjust_reserve()6090Sstevel@tonic-gate hment_adjust_reserve()
6100Sstevel@tonic-gate {
6110Sstevel@tonic-gate hment_t *hm;
6120Sstevel@tonic-gate
6130Sstevel@tonic-gate /*
6140Sstevel@tonic-gate * Free up any excess reserves
6150Sstevel@tonic-gate */
6164004Sjosephb while (hment_reserve_count > hment_reserve_amount &&
6174004Sjosephb !USE_HAT_RESERVES()) {
6180Sstevel@tonic-gate hm = hment_get_reserve();
6190Sstevel@tonic-gate if (hm == NULL)
6200Sstevel@tonic-gate return;
6214004Sjosephb kmem_cache_free(hment_cache, hm);
6220Sstevel@tonic-gate }
6230Sstevel@tonic-gate }
6240Sstevel@tonic-gate
6250Sstevel@tonic-gate /*
6260Sstevel@tonic-gate * initialize hment data structures
6270Sstevel@tonic-gate */
6280Sstevel@tonic-gate void
hment_init(void)6290Sstevel@tonic-gate hment_init(void)
6300Sstevel@tonic-gate {
6310Sstevel@tonic-gate int i;
6320Sstevel@tonic-gate int flags = KMC_NOHASH | KMC_NODEBUG;
6330Sstevel@tonic-gate
6340Sstevel@tonic-gate /*
6350Sstevel@tonic-gate * Initialize kmem caches. On 32 bit kernel's we shut off
6360Sstevel@tonic-gate * debug information to save on precious kernel VA usage.
6370Sstevel@tonic-gate */
6380Sstevel@tonic-gate hment_cache = kmem_cache_create("hment_t",
6390Sstevel@tonic-gate sizeof (hment_t), 0, NULL, NULL, NULL,
6400Sstevel@tonic-gate NULL, hat_memload_arena, flags);
6410Sstevel@tonic-gate
642*5618Sjosephb hment_table = kmem_zalloc(hment_hash_entries * sizeof (*hment_table),
6430Sstevel@tonic-gate KM_SLEEP);
6440Sstevel@tonic-gate
645*5618Sjosephb mlist_lock = kmem_zalloc(MLIST_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
646*5618Sjosephb
647*5618Sjosephb hash_lock = kmem_zalloc(HASH_NUM_LOCK * sizeof (kmutex_t), KM_SLEEP);
648*5618Sjosephb
649*5618Sjosephb for (i = 0; i < hment_hash_entries; ++i)
650*5618Sjosephb avl_create(&hment_table[i], hment_compare, sizeof (hment_t),
651*5618Sjosephb offsetof(hment_t, hm_hashlink));
652*5618Sjosephb
6530Sstevel@tonic-gate for (i = 0; i < MLIST_NUM_LOCK; i++)
6540Sstevel@tonic-gate mutex_init(&mlist_lock[i], NULL, MUTEX_DEFAULT, NULL);
6550Sstevel@tonic-gate
6560Sstevel@tonic-gate for (i = 0; i < HASH_NUM_LOCK; i++)
6570Sstevel@tonic-gate mutex_init(&hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
6580Sstevel@tonic-gate
6590Sstevel@tonic-gate
6600Sstevel@tonic-gate }
6610Sstevel@tonic-gate
6620Sstevel@tonic-gate /*
6630Sstevel@tonic-gate * return the number of mappings to a page
6640Sstevel@tonic-gate *
6650Sstevel@tonic-gate * Note there is no ASSERT() that the MUTEX is held for this.
6660Sstevel@tonic-gate * Hence the return value might be inaccurate if this is called without
6670Sstevel@tonic-gate * doing an x86_hm_enter().
6680Sstevel@tonic-gate */
6690Sstevel@tonic-gate uint_t
hment_mapcnt(page_t * pp)6700Sstevel@tonic-gate hment_mapcnt(page_t *pp)
6710Sstevel@tonic-gate {
6720Sstevel@tonic-gate uint_t cnt;
6730Sstevel@tonic-gate uint_t szc;
6740Sstevel@tonic-gate page_t *larger;
6750Sstevel@tonic-gate hment_t *hm;
6760Sstevel@tonic-gate
6770Sstevel@tonic-gate x86_hm_enter(pp);
6780Sstevel@tonic-gate if (pp->p_mapping == NULL)
6790Sstevel@tonic-gate cnt = 0;
6800Sstevel@tonic-gate else if (pp->p_embed)
6810Sstevel@tonic-gate cnt = 1;
6820Sstevel@tonic-gate else
6830Sstevel@tonic-gate cnt = pp->p_share;
6840Sstevel@tonic-gate x86_hm_exit(pp);
6850Sstevel@tonic-gate
6860Sstevel@tonic-gate /*
6870Sstevel@tonic-gate * walk through all larger mapping sizes counting mappings
6880Sstevel@tonic-gate */
6890Sstevel@tonic-gate for (szc = 1; szc <= pp->p_szc; ++szc) {
6900Sstevel@tonic-gate larger = PP_GROUPLEADER(pp, szc);
6910Sstevel@tonic-gate if (larger == pp) /* don't double count large mappings */
6920Sstevel@tonic-gate continue;
6930Sstevel@tonic-gate
6940Sstevel@tonic-gate x86_hm_enter(larger);
6950Sstevel@tonic-gate if (larger->p_mapping != NULL) {
6960Sstevel@tonic-gate if (larger->p_embed &&
6970Sstevel@tonic-gate ((htable_t *)larger->p_mapping)->ht_level == szc) {
6980Sstevel@tonic-gate ++cnt;
6990Sstevel@tonic-gate } else if (!larger->p_embed) {
7000Sstevel@tonic-gate for (hm = larger->p_mapping; hm;
7010Sstevel@tonic-gate hm = hm->hm_next) {
7020Sstevel@tonic-gate if (hm->hm_htable->ht_level == szc)
7030Sstevel@tonic-gate ++cnt;
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate }
7060Sstevel@tonic-gate }
7070Sstevel@tonic-gate x86_hm_exit(larger);
7080Sstevel@tonic-gate }
7090Sstevel@tonic-gate return (cnt);
7100Sstevel@tonic-gate }
7110Sstevel@tonic-gate
7120Sstevel@tonic-gate /*
7130Sstevel@tonic-gate * We need to steal an hment. Walk through all the page_t's until we
7140Sstevel@tonic-gate * find one that has multiple mappings. Unload one of the mappings
7150Sstevel@tonic-gate * and reclaim that hment. Note that we'll save/restart the starting
7160Sstevel@tonic-gate * page to try and spread the pain.
7170Sstevel@tonic-gate */
7180Sstevel@tonic-gate static page_t *last_page = NULL;
7190Sstevel@tonic-gate
7200Sstevel@tonic-gate static hment_t *
hment_steal(void)7210Sstevel@tonic-gate hment_steal(void)
7220Sstevel@tonic-gate {
7230Sstevel@tonic-gate page_t *last = last_page;
7240Sstevel@tonic-gate page_t *pp = last;
7250Sstevel@tonic-gate hment_t *hm = NULL;
7260Sstevel@tonic-gate hment_t *hm2;
7270Sstevel@tonic-gate htable_t *ht;
7280Sstevel@tonic-gate uint_t found_one = 0;
7290Sstevel@tonic-gate
7300Sstevel@tonic-gate HATSTAT_INC(hs_hm_steals);
7310Sstevel@tonic-gate if (pp == NULL)
7320Sstevel@tonic-gate last = pp = page_first();
7330Sstevel@tonic-gate
7340Sstevel@tonic-gate while (!found_one) {
7350Sstevel@tonic-gate HATSTAT_INC(hs_hm_steal_exam);
7360Sstevel@tonic-gate pp = page_next(pp);
7370Sstevel@tonic-gate if (pp == NULL)
7380Sstevel@tonic-gate pp = page_first();
7390Sstevel@tonic-gate
7400Sstevel@tonic-gate /*
7410Sstevel@tonic-gate * The loop and function exit here if nothing found to steal.
7420Sstevel@tonic-gate */
7430Sstevel@tonic-gate if (pp == last)
7440Sstevel@tonic-gate return (NULL);
7450Sstevel@tonic-gate
7460Sstevel@tonic-gate /*
7470Sstevel@tonic-gate * Only lock the page_t if it has hments.
7480Sstevel@tonic-gate */
7490Sstevel@tonic-gate if (pp->p_mapping == NULL || pp->p_embed)
7500Sstevel@tonic-gate continue;
7510Sstevel@tonic-gate
7520Sstevel@tonic-gate /*
7530Sstevel@tonic-gate * Search the mapping list for a usable mapping.
7540Sstevel@tonic-gate */
7550Sstevel@tonic-gate x86_hm_enter(pp);
7560Sstevel@tonic-gate if (!pp->p_embed) {
7570Sstevel@tonic-gate for (hm = pp->p_mapping; hm; hm = hm->hm_next) {
7580Sstevel@tonic-gate ht = hm->hm_htable;
7590Sstevel@tonic-gate if (ht->ht_hat != kas.a_hat &&
7600Sstevel@tonic-gate ht->ht_busy == 0 &&
7610Sstevel@tonic-gate ht->ht_lock_cnt == 0) {
7620Sstevel@tonic-gate found_one = 1;
7630Sstevel@tonic-gate break;
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate }
7660Sstevel@tonic-gate }
7670Sstevel@tonic-gate if (!found_one)
7680Sstevel@tonic-gate x86_hm_exit(pp);
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate
7710Sstevel@tonic-gate /*
7720Sstevel@tonic-gate * Steal the mapping we found. Note that hati_page_unmap() will
7730Sstevel@tonic-gate * do the x86_hm_exit().
7740Sstevel@tonic-gate */
7750Sstevel@tonic-gate hm2 = hati_page_unmap(pp, ht, hm->hm_entry);
7760Sstevel@tonic-gate ASSERT(hm2 == hm);
7770Sstevel@tonic-gate last_page = pp;
7780Sstevel@tonic-gate return (hm);
7790Sstevel@tonic-gate }
780