10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
52759Selowe * Common Development and Distribution License (the "License").
62759Selowe * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*12230SFrank.Rival@oracle.com * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
230Sstevel@tonic-gate */
240Sstevel@tonic-gate
250Sstevel@tonic-gate
260Sstevel@tonic-gate /*
270Sstevel@tonic-gate * VM - page locking primitives
280Sstevel@tonic-gate */
290Sstevel@tonic-gate #include <sys/param.h>
300Sstevel@tonic-gate #include <sys/t_lock.h>
310Sstevel@tonic-gate #include <sys/vtrace.h>
320Sstevel@tonic-gate #include <sys/debug.h>
330Sstevel@tonic-gate #include <sys/cmn_err.h>
340Sstevel@tonic-gate #include <sys/bitmap.h>
350Sstevel@tonic-gate #include <sys/lockstat.h>
364878Sblakej #include <sys/sysmacros.h>
370Sstevel@tonic-gate #include <sys/condvar_impl.h>
380Sstevel@tonic-gate #include <vm/page.h>
390Sstevel@tonic-gate #include <vm/seg_enum.h>
400Sstevel@tonic-gate #include <vm/vm_dep.h>
4111185SSean.McEnroe@Sun.COM #include <vm/seg_kmem.h>
420Sstevel@tonic-gate
430Sstevel@tonic-gate /*
44*12230SFrank.Rival@oracle.com * This global mutex array is for logical page locking.
450Sstevel@tonic-gate * The following fields in the page structure are protected
460Sstevel@tonic-gate * by this lock:
470Sstevel@tonic-gate *
480Sstevel@tonic-gate * p_lckcnt
490Sstevel@tonic-gate * p_cowcnt
500Sstevel@tonic-gate */
51*12230SFrank.Rival@oracle.com pad_mutex_t page_llocks[8 * NCPU_P2];
520Sstevel@tonic-gate
530Sstevel@tonic-gate /*
540Sstevel@tonic-gate * This is a global lock for the logical page free list. The
550Sstevel@tonic-gate * logical free list, in this implementation, is maintained as two
560Sstevel@tonic-gate * separate physical lists - the cache list and the free list.
570Sstevel@tonic-gate */
580Sstevel@tonic-gate kmutex_t page_freelock;
590Sstevel@tonic-gate
600Sstevel@tonic-gate /*
610Sstevel@tonic-gate * The hash table, page_hash[], the p_selock fields, and the
620Sstevel@tonic-gate * list of pages associated with vnodes are protected by arrays of mutexes.
630Sstevel@tonic-gate *
640Sstevel@tonic-gate * Unless the hashes are changed radically, the table sizes must be
650Sstevel@tonic-gate * a power of two. Also, we typically need more mutexes for the
660Sstevel@tonic-gate * vnodes since these locks are occasionally held for long periods.
670Sstevel@tonic-gate * And since there seem to be two special vnodes (kvp and swapvp),
680Sstevel@tonic-gate * we make room for private mutexes for them.
690Sstevel@tonic-gate *
700Sstevel@tonic-gate * The pse_mutex[] array holds the mutexes to protect the p_selock
710Sstevel@tonic-gate * fields of all page_t structures.
720Sstevel@tonic-gate *
730Sstevel@tonic-gate * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
740Sstevel@tonic-gate * when given a pointer to a page_t.
750Sstevel@tonic-gate *
764878Sblakej * PIO_TABLE_SIZE must be a power of two. One could argue that we
770Sstevel@tonic-gate * should go to the trouble of setting it up at run time and base it
780Sstevel@tonic-gate * on memory size rather than the number of compile time CPUs.
790Sstevel@tonic-gate *
804878Sblakej * XX64 We should be using physmem size to calculate PIO_SHIFT.
810Sstevel@tonic-gate *
820Sstevel@tonic-gate * These might break in 64 bit world.
830Sstevel@tonic-gate */
844878Sblakej #define PIO_SHIFT 7 /* log2(sizeof(page_t)) */
854878Sblakej #define PIO_TABLE_SIZE 128 /* number of io mutexes to have */
860Sstevel@tonic-gate
870Sstevel@tonic-gate pad_mutex_t ph_mutex[PH_TABLE_SIZE];
880Sstevel@tonic-gate kmutex_t pio_mutex[PIO_TABLE_SIZE];
890Sstevel@tonic-gate
900Sstevel@tonic-gate #define PAGE_IO_MUTEX(pp) \
910Sstevel@tonic-gate &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
920Sstevel@tonic-gate
934878Sblakej /*
944878Sblakej * The pse_mutex[] array is allocated in the platform startup code
954878Sblakej * based on the size of the machine at startup.
964878Sblakej */
974878Sblakej extern pad_mutex_t *pse_mutex; /* Locks protecting pp->p_selock */
984878Sblakej extern size_t pse_table_size; /* Number of mutexes in pse_mutex[] */
994878Sblakej extern int pse_shift; /* log2(pse_table_size) */
1004878Sblakej #define PAGE_SE_MUTEX(pp) &pse_mutex[ \
1014878Sblakej ((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) & \
1024878Sblakej (pse_table_size - 1)].pad_mutex
1034878Sblakej
1040Sstevel@tonic-gate #define PSZC_MTX_TABLE_SIZE 128
1050Sstevel@tonic-gate #define PSZC_MTX_TABLE_SHIFT 7
1060Sstevel@tonic-gate
1070Sstevel@tonic-gate static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE];
1080Sstevel@tonic-gate
1090Sstevel@tonic-gate #define PAGE_SZC_MUTEX(_pp) \
1100Sstevel@tonic-gate &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
1110Sstevel@tonic-gate ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
1120Sstevel@tonic-gate ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
1130Sstevel@tonic-gate (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
1140Sstevel@tonic-gate
1150Sstevel@tonic-gate /*
1160Sstevel@tonic-gate * The vph_mutex[] array holds the mutexes to protect the vnode chains,
1170Sstevel@tonic-gate * (i.e., the list of pages anchored by v_pages and connected via p_vpprev
1180Sstevel@tonic-gate * and p_vpnext).
1190Sstevel@tonic-gate *
1200Sstevel@tonic-gate * The page_vnode_mutex(vp) function returns the address of the appropriate
1210Sstevel@tonic-gate * mutex from this array given a pointer to a vnode. It is complicated
1220Sstevel@tonic-gate * by the fact that the kernel's vnode and the swapfs vnode are referenced
1230Sstevel@tonic-gate * frequently enough to warrent their own mutexes.
1240Sstevel@tonic-gate *
1250Sstevel@tonic-gate * The VP_HASH_FUNC returns the index into the vph_mutex array given
1260Sstevel@tonic-gate * an address of a vnode.
1270Sstevel@tonic-gate */
1280Sstevel@tonic-gate
1294325Sqiao #if defined(_LP64)
130*12230SFrank.Rival@oracle.com #define VPH_TABLE_SIZE (8 * NCPU_P2)
1314325Sqiao #else /* 32 bits */
132*12230SFrank.Rival@oracle.com #define VPH_TABLE_SIZE (2 * NCPU_P2)
1334325Sqiao #endif
1340Sstevel@tonic-gate
1350Sstevel@tonic-gate #define VP_HASH_FUNC(vp) \
1360Sstevel@tonic-gate ((((uintptr_t)(vp) >> 6) + \
1370Sstevel@tonic-gate ((uintptr_t)(vp) >> 8) + \
1380Sstevel@tonic-gate ((uintptr_t)(vp) >> 10) + \
1390Sstevel@tonic-gate ((uintptr_t)(vp) >> 12)) \
1400Sstevel@tonic-gate & (VPH_TABLE_SIZE - 1))
1410Sstevel@tonic-gate
1423290Sjohansen /*
1433290Sjohansen * Two slots after VPH_TABLE_SIZE are reserved in vph_mutex for kernel vnodes.
1443290Sjohansen * The lock for kvp is VPH_TABLE_SIZE + 0, and the lock for zvp is
1453290Sjohansen * VPH_TABLE_SIZE + 1.
1463290Sjohansen */
1473290Sjohansen
1480Sstevel@tonic-gate kmutex_t vph_mutex[VPH_TABLE_SIZE + 2];
1490Sstevel@tonic-gate
1500Sstevel@tonic-gate /*
1510Sstevel@tonic-gate * Initialize the locks used by the Virtual Memory Management system.
1520Sstevel@tonic-gate */
1530Sstevel@tonic-gate void
page_lock_init()1540Sstevel@tonic-gate page_lock_init()
1550Sstevel@tonic-gate {
1560Sstevel@tonic-gate }
1570Sstevel@tonic-gate
1580Sstevel@tonic-gate /*
1594878Sblakej * Return a value for pse_shift based on npg (the number of physical pages)
1604878Sblakej * and ncpu (the maximum number of CPUs). This is called by platform startup
1614878Sblakej * code.
1624878Sblakej *
1634878Sblakej * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
1644878Sblakej * locks grew approximately as the square of the number of threads executing.
1654878Sblakej * So the primary scaling factor used is NCPU^2. The size of the machine in
1664878Sblakej * megabytes is used as an upper bound, particularly for sun4v machines which
1674878Sblakej * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
1684878Sblakej * (128) is used as a minimum. Since the size of the table has to be a power
1694878Sblakej * of two, the calculated size is rounded up to the next power of two.
1704878Sblakej */
1714878Sblakej /*ARGSUSED*/
1724878Sblakej int
size_pse_array(pgcnt_t npg,int ncpu)1734878Sblakej size_pse_array(pgcnt_t npg, int ncpu)
1744878Sblakej {
1754878Sblakej size_t size;
1764878Sblakej pgcnt_t pp_per_mb = (1024 * 1024) / PAGESIZE;
1774878Sblakej
1784878Sblakej size = MAX(128, MIN(npg / pp_per_mb, 2 * ncpu * ncpu));
1794878Sblakej size += (1 << (highbit(size) - 1)) - 1;
1804878Sblakej return (highbit(size) - 1);
1814878Sblakej }
1824878Sblakej
1834878Sblakej /*
1840Sstevel@tonic-gate * At present we only use page ownership to aid debugging, so it's
1850Sstevel@tonic-gate * OK if the owner field isn't exact. In the 32-bit world two thread ids
1860Sstevel@tonic-gate * can map to the same owner because we just 'or' in 0x80000000 and
1870Sstevel@tonic-gate * then clear the second highest bit, so that (for example) 0x2faced00
1880Sstevel@tonic-gate * and 0xafaced00 both map to 0xafaced00.
1890Sstevel@tonic-gate * In the 64-bit world, p_selock may not be large enough to hold a full
1900Sstevel@tonic-gate * thread pointer. If we ever need precise ownership (e.g. if we implement
1910Sstevel@tonic-gate * priority inheritance for page locks) then p_selock should become a
1920Sstevel@tonic-gate * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
1930Sstevel@tonic-gate */
1940Sstevel@tonic-gate #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
1950Sstevel@tonic-gate #define SE_READER 1
1960Sstevel@tonic-gate
1970Sstevel@tonic-gate /*
1980Sstevel@tonic-gate * A page that is deleted must be marked as such using the
1990Sstevel@tonic-gate * page_lock_delete() function. The page must be exclusively locked.
2000Sstevel@tonic-gate * The SE_DELETED marker is put in p_selock when this function is called.
2010Sstevel@tonic-gate * SE_DELETED must be distinct from any SE_WRITER value.
2020Sstevel@tonic-gate */
2030Sstevel@tonic-gate #define SE_DELETED (1 | INT_MIN)
2040Sstevel@tonic-gate
2050Sstevel@tonic-gate #ifdef VM_STATS
2060Sstevel@tonic-gate uint_t vph_kvp_count;
2070Sstevel@tonic-gate uint_t vph_swapfsvp_count;
2080Sstevel@tonic-gate uint_t vph_other;
2090Sstevel@tonic-gate #endif /* VM_STATS */
2100Sstevel@tonic-gate
2110Sstevel@tonic-gate #ifdef VM_STATS
2120Sstevel@tonic-gate uint_t page_lock_count;
2130Sstevel@tonic-gate uint_t page_lock_miss;
2140Sstevel@tonic-gate uint_t page_lock_miss_lock;
2150Sstevel@tonic-gate uint_t page_lock_reclaim;
2160Sstevel@tonic-gate uint_t page_lock_bad_reclaim;
2170Sstevel@tonic-gate uint_t page_lock_same_page;
2180Sstevel@tonic-gate uint_t page_lock_upgrade;
219917Selowe uint_t page_lock_retired;
2200Sstevel@tonic-gate uint_t page_lock_upgrade_failed;
2210Sstevel@tonic-gate uint_t page_lock_deleted;
2220Sstevel@tonic-gate
2230Sstevel@tonic-gate uint_t page_trylock_locked;
224917Selowe uint_t page_trylock_failed;
2250Sstevel@tonic-gate uint_t page_trylock_missed;
2260Sstevel@tonic-gate
2270Sstevel@tonic-gate uint_t page_try_reclaim_upgrade;
2280Sstevel@tonic-gate #endif /* VM_STATS */
2290Sstevel@tonic-gate
2300Sstevel@tonic-gate /*
2310Sstevel@tonic-gate * Acquire the "shared/exclusive" lock on a page.
2320Sstevel@tonic-gate *
2330Sstevel@tonic-gate * Returns 1 on success and locks the page appropriately.
2340Sstevel@tonic-gate * 0 on failure and does not lock the page.
2350Sstevel@tonic-gate *
2360Sstevel@tonic-gate * If `lock' is non-NULL, it will be dropped and reacquired in the
2370Sstevel@tonic-gate * failure case. This routine can block, and if it does
2380Sstevel@tonic-gate * it will always return a failure since the page identity [vp, off]
2390Sstevel@tonic-gate * or state may have changed.
2400Sstevel@tonic-gate */
2410Sstevel@tonic-gate
2420Sstevel@tonic-gate int
page_lock(page_t * pp,se_t se,kmutex_t * lock,reclaim_t reclaim)2430Sstevel@tonic-gate page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim)
2440Sstevel@tonic-gate {
2450Sstevel@tonic-gate return (page_lock_es(pp, se, lock, reclaim, 0));
2460Sstevel@tonic-gate }
2470Sstevel@tonic-gate
2480Sstevel@tonic-gate /*
2490Sstevel@tonic-gate * With the addition of reader-writer lock semantics to page_lock_es,
2500Sstevel@tonic-gate * callers wanting an exclusive (writer) lock may prevent shared-lock
2510Sstevel@tonic-gate * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
2520Sstevel@tonic-gate * In this case, when an exclusive lock cannot be acquired, p_selock's
253917Selowe * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
254917Selowe * if the page is slated for retirement.
255917Selowe *
256917Selowe * The se and es parameters determine if the lock should be granted
257917Selowe * based on the following decision table:
258917Selowe *
259917Selowe * Lock wanted es flags p_selock/SE_EWANTED Action
260917Selowe * ----------- -------------- ------------------- ---------
261917Selowe * SE_EXCL any [1][2] unlocked/any grant lock, clear SE_EWANTED
262917Selowe * SE_EXCL SE_EWANTED any lock/any deny, set SE_EWANTED
263917Selowe * SE_EXCL none any lock/any deny
2642759Selowe * SE_SHARED n/a [2] shared/0 grant
2652759Selowe * SE_SHARED n/a [2] unlocked/0 grant
266917Selowe * SE_SHARED n/a shared/1 deny
267917Selowe * SE_SHARED n/a unlocked/1 deny
268917Selowe * SE_SHARED n/a excl/any deny
2690Sstevel@tonic-gate *
270917Selowe * Notes:
271917Selowe * [1] The code grants an exclusive lock to the caller and clears the bit
272917Selowe * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
273917Selowe * bit's value. This was deemed acceptable as we are not concerned about
274917Selowe * exclusive-lock starvation. If this ever becomes an issue, a priority or
275917Selowe * fifo mechanism should also be implemented. Meantime, the thread that
276917Selowe * set SE_EWANTED should be prepared to catch this condition and reset it
277917Selowe *
278917Selowe * [2] Retired pages may not be locked at any time, regardless of the
279917Selowe * dispostion of se, unless the es parameter has SE_RETIRED flag set.
2800Sstevel@tonic-gate *
281917Selowe * Notes on values of "es":
282917Selowe *
283917Selowe * es & 1: page_lookup_create will attempt page relocation
284917Selowe * es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
285917Selowe * memory thread); this prevents reader-starvation of waiting
286917Selowe * writer thread(s) by giving priority to writers over readers.
287917Selowe * es & SE_RETIRED: caller wants to lock pages even if they are
288917Selowe * retired. Default is to deny the lock if the page is retired.
289917Selowe *
290917Selowe * And yes, we know, the semantics of this function are too complicated.
291917Selowe * It's on the list to be cleaned up.
2920Sstevel@tonic-gate */
2930Sstevel@tonic-gate int
page_lock_es(page_t * pp,se_t se,kmutex_t * lock,reclaim_t reclaim,int es)2940Sstevel@tonic-gate page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
2950Sstevel@tonic-gate {
2960Sstevel@tonic-gate int retval;
2970Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
2980Sstevel@tonic-gate int upgraded;
2990Sstevel@tonic-gate int reclaim_it;
3000Sstevel@tonic-gate
3010Sstevel@tonic-gate ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
3020Sstevel@tonic-gate
3030Sstevel@tonic-gate VM_STAT_ADD(page_lock_count);
3040Sstevel@tonic-gate
3050Sstevel@tonic-gate upgraded = 0;
3060Sstevel@tonic-gate reclaim_it = 0;
3070Sstevel@tonic-gate
3080Sstevel@tonic-gate mutex_enter(pse);
3090Sstevel@tonic-gate
310917Selowe ASSERT(((es & SE_EXCL_WANTED) == 0) ||
311917Selowe ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
3120Sstevel@tonic-gate
313917Selowe if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
314917Selowe mutex_exit(pse);
315917Selowe VM_STAT_ADD(page_lock_retired);
316917Selowe return (0);
317917Selowe }
3180Sstevel@tonic-gate
3190Sstevel@tonic-gate if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
3200Sstevel@tonic-gate se = SE_EXCL;
3210Sstevel@tonic-gate }
3220Sstevel@tonic-gate
3230Sstevel@tonic-gate if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {
3240Sstevel@tonic-gate
3250Sstevel@tonic-gate reclaim_it = 1;
3260Sstevel@tonic-gate if (se == SE_SHARED) {
3270Sstevel@tonic-gate /*
3280Sstevel@tonic-gate * This is an interesting situation.
3290Sstevel@tonic-gate *
3300Sstevel@tonic-gate * Remember that p_free can only change if
3310Sstevel@tonic-gate * p_selock < 0.
3320Sstevel@tonic-gate * p_free does not depend on our holding `pse'.
3330Sstevel@tonic-gate * And, since we hold `pse', p_selock can not change.
3340Sstevel@tonic-gate * So, if p_free changes on us, the page is already
3350Sstevel@tonic-gate * exclusively held, and we would fail to get p_selock
3360Sstevel@tonic-gate * regardless.
3370Sstevel@tonic-gate *
3380Sstevel@tonic-gate * We want to avoid getting the share
3390Sstevel@tonic-gate * lock on a free page that needs to be reclaimed.
3400Sstevel@tonic-gate * It is possible that some other thread has the share
3410Sstevel@tonic-gate * lock and has left the free page on the cache list.
3420Sstevel@tonic-gate * pvn_vplist_dirty() does this for brief periods.
3430Sstevel@tonic-gate * If the se_share is currently SE_EXCL, we will fail
3440Sstevel@tonic-gate * to acquire p_selock anyway. Blocking is the
3450Sstevel@tonic-gate * right thing to do.
3460Sstevel@tonic-gate * If we need to reclaim this page, we must get
3470Sstevel@tonic-gate * exclusive access to it, force the upgrade now.
3480Sstevel@tonic-gate * Again, we will fail to acquire p_selock if the
3490Sstevel@tonic-gate * page is not free and block.
3500Sstevel@tonic-gate */
3510Sstevel@tonic-gate upgraded = 1;
3520Sstevel@tonic-gate se = SE_EXCL;
3530Sstevel@tonic-gate VM_STAT_ADD(page_lock_upgrade);
3540Sstevel@tonic-gate }
3550Sstevel@tonic-gate }
3560Sstevel@tonic-gate
3570Sstevel@tonic-gate if (se == SE_EXCL) {
358917Selowe if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
3590Sstevel@tonic-gate /*
3600Sstevel@tonic-gate * if the caller wants a writer lock (but did not
3610Sstevel@tonic-gate * specify exclusive access), and there is a pending
3620Sstevel@tonic-gate * writer that wants exclusive access, return failure
3630Sstevel@tonic-gate */
3640Sstevel@tonic-gate retval = 0;
3650Sstevel@tonic-gate } else if ((pp->p_selock & ~SE_EWANTED) == 0) {
3660Sstevel@tonic-gate /* no reader/writer lock held */
3670Sstevel@tonic-gate THREAD_KPRI_REQUEST();
3680Sstevel@tonic-gate /* this clears our setting of the SE_EWANTED bit */
3690Sstevel@tonic-gate pp->p_selock = SE_WRITER;
3700Sstevel@tonic-gate retval = 1;
3710Sstevel@tonic-gate } else {
3720Sstevel@tonic-gate /* page is locked */
373917Selowe if (es & SE_EXCL_WANTED) {
3740Sstevel@tonic-gate /* set the SE_EWANTED bit */
3750Sstevel@tonic-gate pp->p_selock |= SE_EWANTED;
3760Sstevel@tonic-gate }
3770Sstevel@tonic-gate retval = 0;
3780Sstevel@tonic-gate }
3790Sstevel@tonic-gate } else {
3800Sstevel@tonic-gate retval = 0;
3810Sstevel@tonic-gate if (pp->p_selock >= 0) {
382917Selowe if ((pp->p_selock & SE_EWANTED) == 0) {
3832759Selowe pp->p_selock += SE_READER;
3842759Selowe retval = 1;
3850Sstevel@tonic-gate }
3860Sstevel@tonic-gate }
3870Sstevel@tonic-gate }
3880Sstevel@tonic-gate
3890Sstevel@tonic-gate if (retval == 0) {
3900Sstevel@tonic-gate if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
3910Sstevel@tonic-gate VM_STAT_ADD(page_lock_deleted);
3920Sstevel@tonic-gate mutex_exit(pse);
3930Sstevel@tonic-gate return (retval);
3940Sstevel@tonic-gate }
3950Sstevel@tonic-gate
3960Sstevel@tonic-gate #ifdef VM_STATS
3970Sstevel@tonic-gate VM_STAT_ADD(page_lock_miss);
3980Sstevel@tonic-gate if (upgraded) {
3990Sstevel@tonic-gate VM_STAT_ADD(page_lock_upgrade_failed);
4000Sstevel@tonic-gate }
4010Sstevel@tonic-gate #endif
4020Sstevel@tonic-gate if (lock) {
4030Sstevel@tonic-gate VM_STAT_ADD(page_lock_miss_lock);
4040Sstevel@tonic-gate mutex_exit(lock);
4050Sstevel@tonic-gate }
4060Sstevel@tonic-gate
4070Sstevel@tonic-gate /*
4080Sstevel@tonic-gate * Now, wait for the page to be unlocked and
4090Sstevel@tonic-gate * release the lock protecting p_cv and p_selock.
4100Sstevel@tonic-gate */
4110Sstevel@tonic-gate cv_wait(&pp->p_cv, pse);
4120Sstevel@tonic-gate mutex_exit(pse);
4130Sstevel@tonic-gate
4140Sstevel@tonic-gate /*
4150Sstevel@tonic-gate * The page identity may have changed while we were
4160Sstevel@tonic-gate * blocked. If we are willing to depend on "pp"
4170Sstevel@tonic-gate * still pointing to a valid page structure (i.e.,
4180Sstevel@tonic-gate * assuming page structures are not dynamically allocated
4190Sstevel@tonic-gate * or freed), we could try to lock the page if its
4200Sstevel@tonic-gate * identity hasn't changed.
4210Sstevel@tonic-gate *
4220Sstevel@tonic-gate * This needs to be measured, since we come back from
4230Sstevel@tonic-gate * cv_wait holding pse (the expensive part of this
4240Sstevel@tonic-gate * operation) we might as well try the cheap part.
4250Sstevel@tonic-gate * Though we would also have to confirm that dropping
4260Sstevel@tonic-gate * `lock' did not cause any grief to the callers.
4270Sstevel@tonic-gate */
4280Sstevel@tonic-gate if (lock) {
4290Sstevel@tonic-gate mutex_enter(lock);
4300Sstevel@tonic-gate }
4310Sstevel@tonic-gate } else {
4320Sstevel@tonic-gate /*
4330Sstevel@tonic-gate * We have the page lock.
4340Sstevel@tonic-gate * If we needed to reclaim the page, and the page
4350Sstevel@tonic-gate * needed reclaiming (ie, it was free), then we
4360Sstevel@tonic-gate * have the page exclusively locked. We may need
4370Sstevel@tonic-gate * to downgrade the page.
4380Sstevel@tonic-gate */
4390Sstevel@tonic-gate ASSERT((upgraded) ?
4400Sstevel@tonic-gate ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
4410Sstevel@tonic-gate mutex_exit(pse);
4420Sstevel@tonic-gate
4430Sstevel@tonic-gate /*
4440Sstevel@tonic-gate * We now hold this page's lock, either shared or
4450Sstevel@tonic-gate * exclusive. This will prevent its identity from changing.
4460Sstevel@tonic-gate * The page, however, may or may not be free. If the caller
4470Sstevel@tonic-gate * requested, and it is free, go reclaim it from the
4480Sstevel@tonic-gate * free list. If the page can't be reclaimed, return failure
4490Sstevel@tonic-gate * so that the caller can start all over again.
4500Sstevel@tonic-gate *
4510Sstevel@tonic-gate * NOTE:page_reclaim() releases the page lock (p_selock)
4520Sstevel@tonic-gate * if it can't be reclaimed.
4530Sstevel@tonic-gate */
4540Sstevel@tonic-gate if (reclaim_it) {
4550Sstevel@tonic-gate if (!page_reclaim(pp, lock)) {
4560Sstevel@tonic-gate VM_STAT_ADD(page_lock_bad_reclaim);
4570Sstevel@tonic-gate retval = 0;
4580Sstevel@tonic-gate } else {
4590Sstevel@tonic-gate VM_STAT_ADD(page_lock_reclaim);
4600Sstevel@tonic-gate if (upgraded) {
4610Sstevel@tonic-gate page_downgrade(pp);
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate }
4640Sstevel@tonic-gate }
4650Sstevel@tonic-gate }
4660Sstevel@tonic-gate return (retval);
4670Sstevel@tonic-gate }
4680Sstevel@tonic-gate
4690Sstevel@tonic-gate /*
4700Sstevel@tonic-gate * Clear the SE_EWANTED bit from p_selock. This function allows
4710Sstevel@tonic-gate * callers of page_lock_es and page_try_reclaim_lock to clear
4720Sstevel@tonic-gate * their setting of this bit if they decide they no longer wish
4730Sstevel@tonic-gate * to gain exclusive access to the page. Currently only
4740Sstevel@tonic-gate * delete_memory_thread uses this when the delete memory
4750Sstevel@tonic-gate * operation is cancelled.
4760Sstevel@tonic-gate */
4770Sstevel@tonic-gate void
page_lock_clr_exclwanted(page_t * pp)4780Sstevel@tonic-gate page_lock_clr_exclwanted(page_t *pp)
4790Sstevel@tonic-gate {
4800Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
4810Sstevel@tonic-gate
4820Sstevel@tonic-gate mutex_enter(pse);
4830Sstevel@tonic-gate pp->p_selock &= ~SE_EWANTED;
4840Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv))
4850Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
4860Sstevel@tonic-gate mutex_exit(pse);
4870Sstevel@tonic-gate }
4880Sstevel@tonic-gate
4890Sstevel@tonic-gate /*
4900Sstevel@tonic-gate * Read the comments inside of page_lock_es() carefully.
4910Sstevel@tonic-gate *
4920Sstevel@tonic-gate * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
4930Sstevel@tonic-gate * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
4940Sstevel@tonic-gate * This is used by threads subject to reader-starvation (eg. memory delete).
4950Sstevel@tonic-gate *
4960Sstevel@tonic-gate * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
4970Sstevel@tonic-gate * it is expected that it will retry at a later time. Threads that will
4980Sstevel@tonic-gate * not retry the lock *must* call page_lock_clr_exclwanted to clear the
4990Sstevel@tonic-gate * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock,
5000Sstevel@tonic-gate * the bit is cleared.)
5010Sstevel@tonic-gate */
5020Sstevel@tonic-gate int
page_try_reclaim_lock(page_t * pp,se_t se,int es)5030Sstevel@tonic-gate page_try_reclaim_lock(page_t *pp, se_t se, int es)
5040Sstevel@tonic-gate {
5050Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
5060Sstevel@tonic-gate selock_t old;
5070Sstevel@tonic-gate
5080Sstevel@tonic-gate mutex_enter(pse);
5090Sstevel@tonic-gate
5100Sstevel@tonic-gate old = pp->p_selock;
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate ASSERT(((es & SE_EXCL_WANTED) == 0) ||
513917Selowe ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
514917Selowe
515917Selowe if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
516917Selowe mutex_exit(pse);
517917Selowe VM_STAT_ADD(page_trylock_failed);
518917Selowe return (0);
519917Selowe }
5200Sstevel@tonic-gate
5210Sstevel@tonic-gate if (se == SE_SHARED && es == 1 && old == 0) {
5220Sstevel@tonic-gate se = SE_EXCL;
5230Sstevel@tonic-gate }
5240Sstevel@tonic-gate
5250Sstevel@tonic-gate if (se == SE_SHARED) {
5260Sstevel@tonic-gate if (!PP_ISFREE(pp)) {
5270Sstevel@tonic-gate if (old >= 0) {
528917Selowe /*
529917Selowe * Readers are not allowed when excl wanted
530917Selowe */
531917Selowe if ((old & SE_EWANTED) == 0) {
5322759Selowe pp->p_selock = old + SE_READER;
5332759Selowe mutex_exit(pse);
5342759Selowe return (1);
5350Sstevel@tonic-gate }
5360Sstevel@tonic-gate }
5370Sstevel@tonic-gate mutex_exit(pse);
5380Sstevel@tonic-gate return (0);
5390Sstevel@tonic-gate }
5400Sstevel@tonic-gate /*
5410Sstevel@tonic-gate * The page is free, so we really want SE_EXCL (below)
5420Sstevel@tonic-gate */
5430Sstevel@tonic-gate VM_STAT_ADD(page_try_reclaim_upgrade);
5440Sstevel@tonic-gate }
5450Sstevel@tonic-gate
5460Sstevel@tonic-gate /*
5470Sstevel@tonic-gate * The caller wants a writer lock. We try for it only if
5480Sstevel@tonic-gate * SE_EWANTED is not set, or if the caller specified
5490Sstevel@tonic-gate * SE_EXCL_WANTED.
5500Sstevel@tonic-gate */
551917Selowe if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) {
5520Sstevel@tonic-gate if ((old & ~SE_EWANTED) == 0) {
5530Sstevel@tonic-gate /* no reader/writer lock held */
5540Sstevel@tonic-gate THREAD_KPRI_REQUEST();
5550Sstevel@tonic-gate /* this clears out our setting of the SE_EWANTED bit */
5560Sstevel@tonic-gate pp->p_selock = SE_WRITER;
5570Sstevel@tonic-gate mutex_exit(pse);
5580Sstevel@tonic-gate return (1);
5590Sstevel@tonic-gate }
5600Sstevel@tonic-gate }
561917Selowe if (es & SE_EXCL_WANTED) {
5620Sstevel@tonic-gate /* page is locked, set the SE_EWANTED bit */
5630Sstevel@tonic-gate pp->p_selock |= SE_EWANTED;
5640Sstevel@tonic-gate }
5650Sstevel@tonic-gate mutex_exit(pse);
5660Sstevel@tonic-gate return (0);
5670Sstevel@tonic-gate }
5680Sstevel@tonic-gate
5690Sstevel@tonic-gate /*
5700Sstevel@tonic-gate * Acquire a page's "shared/exclusive" lock, but never block.
5710Sstevel@tonic-gate * Returns 1 on success, 0 on failure.
5720Sstevel@tonic-gate */
5730Sstevel@tonic-gate int
page_trylock(page_t * pp,se_t se)5740Sstevel@tonic-gate page_trylock(page_t *pp, se_t se)
5750Sstevel@tonic-gate {
5760Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
5770Sstevel@tonic-gate
5780Sstevel@tonic-gate mutex_enter(pse);
579917Selowe if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) ||
580973Selowe (se == SE_SHARED && PP_PR_NOSHARE(pp))) {
581917Selowe /*
582917Selowe * Fail if a thread wants exclusive access and page is
583917Selowe * retired, if the page is slated for retirement, or a
584917Selowe * share lock is requested.
585917Selowe */
5860Sstevel@tonic-gate mutex_exit(pse);
587917Selowe VM_STAT_ADD(page_trylock_failed);
5880Sstevel@tonic-gate return (0);
5890Sstevel@tonic-gate }
5900Sstevel@tonic-gate
5910Sstevel@tonic-gate if (se == SE_EXCL) {
5920Sstevel@tonic-gate if (pp->p_selock == 0) {
5930Sstevel@tonic-gate THREAD_KPRI_REQUEST();
5940Sstevel@tonic-gate pp->p_selock = SE_WRITER;
5950Sstevel@tonic-gate mutex_exit(pse);
5960Sstevel@tonic-gate return (1);
5970Sstevel@tonic-gate }
5980Sstevel@tonic-gate } else {
5990Sstevel@tonic-gate if (pp->p_selock >= 0) {
6000Sstevel@tonic-gate pp->p_selock += SE_READER;
6010Sstevel@tonic-gate mutex_exit(pse);
6020Sstevel@tonic-gate return (1);
6030Sstevel@tonic-gate }
6040Sstevel@tonic-gate }
6050Sstevel@tonic-gate mutex_exit(pse);
6060Sstevel@tonic-gate return (0);
6070Sstevel@tonic-gate }
6080Sstevel@tonic-gate
6090Sstevel@tonic-gate /*
610917Selowe * Variant of page_unlock() specifically for the page freelist
611917Selowe * code. The mere existence of this code is a vile hack that
612917Selowe * has resulted due to the backwards locking order of the page
613917Selowe * freelist manager; please don't call it.
614917Selowe */
615917Selowe void
page_unlock_nocapture(page_t * pp)6163253Smec page_unlock_nocapture(page_t *pp)
617917Selowe {
618917Selowe kmutex_t *pse = PAGE_SE_MUTEX(pp);
619917Selowe selock_t old;
620917Selowe
621917Selowe mutex_enter(pse);
622917Selowe
623917Selowe old = pp->p_selock;
624917Selowe if ((old & ~SE_EWANTED) == SE_READER) {
625917Selowe pp->p_selock = old & ~SE_READER;
626917Selowe if (CV_HAS_WAITERS(&pp->p_cv))
627917Selowe cv_broadcast(&pp->p_cv);
628917Selowe } else if ((old & ~SE_EWANTED) == SE_DELETED) {
6297240Srh87107 panic("page_unlock_nocapture: page %p is deleted", (void *)pp);
630917Selowe } else if (old < 0) {
631917Selowe THREAD_KPRI_RELEASE();
632917Selowe pp->p_selock &= SE_EWANTED;
633917Selowe if (CV_HAS_WAITERS(&pp->p_cv))
634917Selowe cv_broadcast(&pp->p_cv);
635917Selowe } else if ((old & ~SE_EWANTED) > SE_READER) {
636917Selowe pp->p_selock = old - SE_READER;
637917Selowe } else {
6387240Srh87107 panic("page_unlock_nocapture: page %p is not locked",
6397240Srh87107 (void *)pp);
640917Selowe }
641917Selowe
642917Selowe mutex_exit(pse);
643917Selowe }
644917Selowe
645917Selowe /*
6460Sstevel@tonic-gate * Release the page's "shared/exclusive" lock and wake up anyone
6470Sstevel@tonic-gate * who might be waiting for it.
6480Sstevel@tonic-gate */
6490Sstevel@tonic-gate void
page_unlock(page_t * pp)6500Sstevel@tonic-gate page_unlock(page_t *pp)
6510Sstevel@tonic-gate {
6520Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
6530Sstevel@tonic-gate selock_t old;
6540Sstevel@tonic-gate
6550Sstevel@tonic-gate mutex_enter(pse);
656917Selowe
6570Sstevel@tonic-gate old = pp->p_selock;
6580Sstevel@tonic-gate if ((old & ~SE_EWANTED) == SE_READER) {
6590Sstevel@tonic-gate pp->p_selock = old & ~SE_READER;
6600Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv))
6610Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
6620Sstevel@tonic-gate } else if ((old & ~SE_EWANTED) == SE_DELETED) {
6637240Srh87107 panic("page_unlock: page %p is deleted", (void *)pp);
6640Sstevel@tonic-gate } else if (old < 0) {
6650Sstevel@tonic-gate THREAD_KPRI_RELEASE();
6660Sstevel@tonic-gate pp->p_selock &= SE_EWANTED;
6670Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv))
6680Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
6690Sstevel@tonic-gate } else if ((old & ~SE_EWANTED) > SE_READER) {
6700Sstevel@tonic-gate pp->p_selock = old - SE_READER;
6710Sstevel@tonic-gate } else {
6727240Srh87107 panic("page_unlock: page %p is not locked", (void *)pp);
6730Sstevel@tonic-gate }
674917Selowe
6753253Smec if (pp->p_selock == 0) {
676917Selowe /*
6773253Smec * If the T_CAPTURING bit is set, that means that we should
6783253Smec * not try and capture the page again as we could recurse
6793253Smec * which could lead to a stack overflow panic or spending a
6803253Smec * relatively long time in the kernel making no progress.
681917Selowe */
6823253Smec if ((pp->p_toxic & PR_CAPTURE) &&
6833253Smec !(curthread->t_flag & T_CAPTURING) &&
6843253Smec !PP_RETIRED(pp)) {
685917Selowe THREAD_KPRI_REQUEST();
686917Selowe pp->p_selock = SE_WRITER;
687917Selowe mutex_exit(pse);
6883253Smec page_unlock_capture(pp);
689917Selowe } else {
690917Selowe mutex_exit(pse);
691917Selowe }
692917Selowe } else {
693917Selowe mutex_exit(pse);
694917Selowe }
6950Sstevel@tonic-gate }
6960Sstevel@tonic-gate
6970Sstevel@tonic-gate /*
6980Sstevel@tonic-gate * Try to upgrade the lock on the page from a "shared" to an
6990Sstevel@tonic-gate * "exclusive" lock. Since this upgrade operation is done while
7000Sstevel@tonic-gate * holding the mutex protecting this page, no one else can acquire this page's
7010Sstevel@tonic-gate * lock and change the page. Thus, it is safe to drop the "shared"
7020Sstevel@tonic-gate * lock and attempt to acquire the "exclusive" lock.
7030Sstevel@tonic-gate *
7040Sstevel@tonic-gate * Returns 1 on success, 0 on failure.
7050Sstevel@tonic-gate */
7060Sstevel@tonic-gate int
page_tryupgrade(page_t * pp)7070Sstevel@tonic-gate page_tryupgrade(page_t *pp)
7080Sstevel@tonic-gate {
7090Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
7100Sstevel@tonic-gate
7110Sstevel@tonic-gate mutex_enter(pse);
7120Sstevel@tonic-gate if (!(pp->p_selock & SE_EWANTED)) {
7130Sstevel@tonic-gate /* no threads want exclusive access, try upgrade */
7140Sstevel@tonic-gate if (pp->p_selock == SE_READER) {
7150Sstevel@tonic-gate THREAD_KPRI_REQUEST();
7160Sstevel@tonic-gate /* convert to exclusive lock */
7170Sstevel@tonic-gate pp->p_selock = SE_WRITER;
7180Sstevel@tonic-gate mutex_exit(pse);
7190Sstevel@tonic-gate return (1);
7200Sstevel@tonic-gate }
7210Sstevel@tonic-gate }
7220Sstevel@tonic-gate mutex_exit(pse);
7230Sstevel@tonic-gate return (0);
7240Sstevel@tonic-gate }
7250Sstevel@tonic-gate
7260Sstevel@tonic-gate /*
7270Sstevel@tonic-gate * Downgrade the "exclusive" lock on the page to a "shared" lock
7280Sstevel@tonic-gate * while holding the mutex protecting this page's p_selock field.
7290Sstevel@tonic-gate */
7300Sstevel@tonic-gate void
page_downgrade(page_t * pp)7310Sstevel@tonic-gate page_downgrade(page_t *pp)
7320Sstevel@tonic-gate {
7330Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
7340Sstevel@tonic-gate int excl_waiting;
7350Sstevel@tonic-gate
7360Sstevel@tonic-gate ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
7370Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp));
7380Sstevel@tonic-gate
7390Sstevel@tonic-gate mutex_enter(pse);
7400Sstevel@tonic-gate excl_waiting = pp->p_selock & SE_EWANTED;
7410Sstevel@tonic-gate THREAD_KPRI_RELEASE();
7420Sstevel@tonic-gate pp->p_selock = SE_READER | excl_waiting;
7430Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv))
7440Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
7450Sstevel@tonic-gate mutex_exit(pse);
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate
7480Sstevel@tonic-gate void
page_lock_delete(page_t * pp)7490Sstevel@tonic-gate page_lock_delete(page_t *pp)
7500Sstevel@tonic-gate {
7510Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp);
7520Sstevel@tonic-gate
7530Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp));
7540Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL);
7550Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1);
7560Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp));
7570Sstevel@tonic-gate
7580Sstevel@tonic-gate mutex_enter(pse);
7590Sstevel@tonic-gate THREAD_KPRI_RELEASE();
7600Sstevel@tonic-gate pp->p_selock = SE_DELETED;
7610Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv))
7620Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
7630Sstevel@tonic-gate mutex_exit(pse);
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate
7663253Smec int
page_deleted(page_t * pp)7673253Smec page_deleted(page_t *pp)
7683253Smec {
7693253Smec return (pp->p_selock == SE_DELETED);
7703253Smec }
7713253Smec
7720Sstevel@tonic-gate /*
7730Sstevel@tonic-gate * Implement the io lock for pages
7740Sstevel@tonic-gate */
7750Sstevel@tonic-gate void
page_iolock_init(page_t * pp)7760Sstevel@tonic-gate page_iolock_init(page_t *pp)
7770Sstevel@tonic-gate {
7780Sstevel@tonic-gate pp->p_iolock_state = 0;
7790Sstevel@tonic-gate cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL);
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate
7820Sstevel@tonic-gate /*
7830Sstevel@tonic-gate * Acquire the i/o lock on a page.
7840Sstevel@tonic-gate */
7850Sstevel@tonic-gate void
page_io_lock(page_t * pp)7860Sstevel@tonic-gate page_io_lock(page_t *pp)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate kmutex_t *pio;
7890Sstevel@tonic-gate
7900Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp);
7910Sstevel@tonic-gate mutex_enter(pio);
7920Sstevel@tonic-gate while (pp->p_iolock_state & PAGE_IO_INUSE) {
7930Sstevel@tonic-gate cv_wait(&(pp->p_io_cv), pio);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate pp->p_iolock_state |= PAGE_IO_INUSE;
7960Sstevel@tonic-gate mutex_exit(pio);
7970Sstevel@tonic-gate }
7980Sstevel@tonic-gate
7990Sstevel@tonic-gate /*
8000Sstevel@tonic-gate * Release the i/o lock on a page.
8010Sstevel@tonic-gate */
8020Sstevel@tonic-gate void
page_io_unlock(page_t * pp)8030Sstevel@tonic-gate page_io_unlock(page_t *pp)
8040Sstevel@tonic-gate {
8050Sstevel@tonic-gate kmutex_t *pio;
8060Sstevel@tonic-gate
8070Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp);
8080Sstevel@tonic-gate mutex_enter(pio);
8092999Sstans cv_broadcast(&pp->p_io_cv);
8100Sstevel@tonic-gate pp->p_iolock_state &= ~PAGE_IO_INUSE;
8110Sstevel@tonic-gate mutex_exit(pio);
8120Sstevel@tonic-gate }
8130Sstevel@tonic-gate
8140Sstevel@tonic-gate /*
8150Sstevel@tonic-gate * Try to acquire the i/o lock on a page without blocking.
8160Sstevel@tonic-gate * Returns 1 on success, 0 on failure.
8170Sstevel@tonic-gate */
8180Sstevel@tonic-gate int
page_io_trylock(page_t * pp)8190Sstevel@tonic-gate page_io_trylock(page_t *pp)
8200Sstevel@tonic-gate {
8210Sstevel@tonic-gate kmutex_t *pio;
8220Sstevel@tonic-gate
8230Sstevel@tonic-gate if (pp->p_iolock_state & PAGE_IO_INUSE)
8240Sstevel@tonic-gate return (0);
8250Sstevel@tonic-gate
8260Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp);
8270Sstevel@tonic-gate mutex_enter(pio);
8280Sstevel@tonic-gate
8290Sstevel@tonic-gate if (pp->p_iolock_state & PAGE_IO_INUSE) {
8300Sstevel@tonic-gate mutex_exit(pio);
8310Sstevel@tonic-gate return (0);
8320Sstevel@tonic-gate }
8330Sstevel@tonic-gate pp->p_iolock_state |= PAGE_IO_INUSE;
8340Sstevel@tonic-gate mutex_exit(pio);
8350Sstevel@tonic-gate
8360Sstevel@tonic-gate return (1);
8370Sstevel@tonic-gate }
8380Sstevel@tonic-gate
8390Sstevel@tonic-gate /*
8402999Sstans * Wait until the i/o lock is not held.
8412999Sstans */
8422999Sstans void
page_io_wait(page_t * pp)8432999Sstans page_io_wait(page_t *pp)
8442999Sstans {
8452999Sstans kmutex_t *pio;
8462999Sstans
8472999Sstans pio = PAGE_IO_MUTEX(pp);
8482999Sstans mutex_enter(pio);
8492999Sstans while (pp->p_iolock_state & PAGE_IO_INUSE) {
8502999Sstans cv_wait(&(pp->p_io_cv), pio);
8512999Sstans }
8522999Sstans mutex_exit(pio);
8532999Sstans }
8542999Sstans
8552999Sstans /*
8562999Sstans * Returns 1 on success, 0 on failure.
8572999Sstans */
8582999Sstans int
page_io_locked(page_t * pp)8592999Sstans page_io_locked(page_t *pp)
8602999Sstans {
8612999Sstans return (pp->p_iolock_state & PAGE_IO_INUSE);
8622999Sstans }
8632999Sstans
8642999Sstans /*
8650Sstevel@tonic-gate * Assert that the i/o lock on a page is held.
8660Sstevel@tonic-gate * Returns 1 on success, 0 on failure.
8670Sstevel@tonic-gate */
8680Sstevel@tonic-gate int
page_iolock_assert(page_t * pp)8690Sstevel@tonic-gate page_iolock_assert(page_t *pp)
8700Sstevel@tonic-gate {
8712999Sstans return (page_io_locked(pp));
8720Sstevel@tonic-gate }
8730Sstevel@tonic-gate
8740Sstevel@tonic-gate /*
8750Sstevel@tonic-gate * Wrapper exported to kernel routines that are built
8760Sstevel@tonic-gate * platform-independent (the macro is platform-dependent;
8770Sstevel@tonic-gate * the size of vph_mutex[] is based on NCPU).
8780Sstevel@tonic-gate *
8790Sstevel@tonic-gate * Note that you can do stress testing on this by setting the
8800Sstevel@tonic-gate * variable page_vnode_mutex_stress to something other than
8810Sstevel@tonic-gate * zero in a DEBUG kernel in a debugger after loading the kernel.
8820Sstevel@tonic-gate * Setting it after the kernel is running may not work correctly.
8830Sstevel@tonic-gate */
8840Sstevel@tonic-gate #ifdef DEBUG
8850Sstevel@tonic-gate static int page_vnode_mutex_stress = 0;
8860Sstevel@tonic-gate #endif
8870Sstevel@tonic-gate
8880Sstevel@tonic-gate kmutex_t *
page_vnode_mutex(vnode_t * vp)8890Sstevel@tonic-gate page_vnode_mutex(vnode_t *vp)
8900Sstevel@tonic-gate {
8910Sstevel@tonic-gate if (vp == &kvp)
8920Sstevel@tonic-gate return (&vph_mutex[VPH_TABLE_SIZE + 0]);
8933290Sjohansen
8943290Sjohansen if (vp == &zvp)
8953290Sjohansen return (&vph_mutex[VPH_TABLE_SIZE + 1]);
8960Sstevel@tonic-gate #ifdef DEBUG
8970Sstevel@tonic-gate if (page_vnode_mutex_stress != 0)
8980Sstevel@tonic-gate return (&vph_mutex[0]);
8990Sstevel@tonic-gate #endif
9000Sstevel@tonic-gate
9010Sstevel@tonic-gate return (&vph_mutex[VP_HASH_FUNC(vp)]);
9020Sstevel@tonic-gate }
9030Sstevel@tonic-gate
9040Sstevel@tonic-gate kmutex_t *
page_se_mutex(page_t * pp)9050Sstevel@tonic-gate page_se_mutex(page_t *pp)
9060Sstevel@tonic-gate {
9070Sstevel@tonic-gate return (PAGE_SE_MUTEX(pp));
9080Sstevel@tonic-gate }
9090Sstevel@tonic-gate
9100Sstevel@tonic-gate #ifdef VM_STATS
9110Sstevel@tonic-gate uint_t pszclck_stat[4];
9120Sstevel@tonic-gate #endif
9130Sstevel@tonic-gate /*
9140Sstevel@tonic-gate * Find, take and return a mutex held by hat_page_demote().
9150Sstevel@tonic-gate * Called by page_demote_vp_pages() before hat_page_demote() call and by
9160Sstevel@tonic-gate * routines that want to block hat_page_demote() but can't do it
9170Sstevel@tonic-gate * via locking all constituent pages.
9180Sstevel@tonic-gate *
9190Sstevel@tonic-gate * Return NULL if p_szc is 0.
9200Sstevel@tonic-gate *
9210Sstevel@tonic-gate * It should only be used for pages that can be demoted by hat_page_demote()
9220Sstevel@tonic-gate * i.e. non swapfs file system pages. The logic here is lifted from
9230Sstevel@tonic-gate * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
9240Sstevel@tonic-gate * since the page is locked and not free.
9250Sstevel@tonic-gate *
9260Sstevel@tonic-gate * Hash of the root page is used to find the lock.
9270Sstevel@tonic-gate * To find the root in the presense of hat_page_demote() chageing the location
9280Sstevel@tonic-gate * of the root this routine relies on the fact that hat_page_demote() changes
9290Sstevel@tonic-gate * root last.
9300Sstevel@tonic-gate *
9310Sstevel@tonic-gate * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
9320Sstevel@tonic-gate * returned pp's p_szc may be any value.
9330Sstevel@tonic-gate */
9340Sstevel@tonic-gate kmutex_t *
page_szc_lock(page_t * pp)9350Sstevel@tonic-gate page_szc_lock(page_t *pp)
9360Sstevel@tonic-gate {
9370Sstevel@tonic-gate kmutex_t *mtx;
9380Sstevel@tonic-gate page_t *rootpp;
9390Sstevel@tonic-gate uint_t szc;
9400Sstevel@tonic-gate uint_t rszc;
9410Sstevel@tonic-gate uint_t pszc = pp->p_szc;
9420Sstevel@tonic-gate
9430Sstevel@tonic-gate ASSERT(pp != NULL);
9440Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp));
9450Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp));
9460Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL);
9470Sstevel@tonic-gate ASSERT(!IS_SWAPFSVP(pp->p_vnode));
9483290Sjohansen ASSERT(!PP_ISKAS(pp));
9490Sstevel@tonic-gate
9500Sstevel@tonic-gate again:
9510Sstevel@tonic-gate if (pszc == 0) {
9520Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[0]);
9530Sstevel@tonic-gate return (NULL);
9540Sstevel@tonic-gate }
9550Sstevel@tonic-gate
9560Sstevel@tonic-gate /* The lock lives in the root page */
9570Sstevel@tonic-gate
9580Sstevel@tonic-gate rootpp = PP_GROUPLEADER(pp, pszc);
9590Sstevel@tonic-gate mtx = PAGE_SZC_MUTEX(rootpp);
9600Sstevel@tonic-gate mutex_enter(mtx);
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate /*
9630Sstevel@tonic-gate * since p_szc can only decrease if pp == rootpp
9640Sstevel@tonic-gate * rootpp will be always the same i.e we have the right root
9650Sstevel@tonic-gate * regardless of rootpp->p_szc.
9660Sstevel@tonic-gate * If location of pp's root didn't change after we took
9670Sstevel@tonic-gate * the lock we have the right root. return mutex hashed off it.
9680Sstevel@tonic-gate */
9690Sstevel@tonic-gate if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
9700Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[1]);
9710Sstevel@tonic-gate return (mtx);
9720Sstevel@tonic-gate }
9730Sstevel@tonic-gate
9740Sstevel@tonic-gate /*
9750Sstevel@tonic-gate * root location changed because page got demoted.
9760Sstevel@tonic-gate * locate the new root.
9770Sstevel@tonic-gate */
9780Sstevel@tonic-gate if (rszc < pszc) {
9790Sstevel@tonic-gate szc = pp->p_szc;
9800Sstevel@tonic-gate ASSERT(szc < pszc);
9810Sstevel@tonic-gate mutex_exit(mtx);
9820Sstevel@tonic-gate pszc = szc;
9830Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[2]);
9840Sstevel@tonic-gate goto again;
9850Sstevel@tonic-gate }
9860Sstevel@tonic-gate
9870Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[3]);
9880Sstevel@tonic-gate /*
9890Sstevel@tonic-gate * current hat_page_demote not done yet.
9900Sstevel@tonic-gate * wait for it to finish.
9910Sstevel@tonic-gate */
9920Sstevel@tonic-gate mutex_exit(mtx);
9930Sstevel@tonic-gate rootpp = PP_GROUPLEADER(rootpp, rszc);
9940Sstevel@tonic-gate mtx = PAGE_SZC_MUTEX(rootpp);
9950Sstevel@tonic-gate mutex_enter(mtx);
9960Sstevel@tonic-gate mutex_exit(mtx);
9970Sstevel@tonic-gate ASSERT(rootpp->p_szc < rszc);
9980Sstevel@tonic-gate goto again;
9990Sstevel@tonic-gate }
10000Sstevel@tonic-gate
10010Sstevel@tonic-gate int
page_szc_lock_assert(page_t * pp)10020Sstevel@tonic-gate page_szc_lock_assert(page_t *pp)
10030Sstevel@tonic-gate {
10040Sstevel@tonic-gate page_t *rootpp = PP_PAGEROOT(pp);
10050Sstevel@tonic-gate kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp);
10060Sstevel@tonic-gate
10070Sstevel@tonic-gate return (MUTEX_HELD(mtx));
10080Sstevel@tonic-gate }
10093446Smrj
10103446Smrj /*
10113446Smrj * memseg locking
10123446Smrj */
10133446Smrj static krwlock_t memsegslock;
10143446Smrj
10153446Smrj /*
10163446Smrj * memlist (phys_install, phys_avail) locking.
10173446Smrj */
10183446Smrj static krwlock_t memlists_lock;
10193446Smrj
102011185SSean.McEnroe@Sun.COM int
memsegs_trylock(int writer)102111185SSean.McEnroe@Sun.COM memsegs_trylock(int writer)
102211185SSean.McEnroe@Sun.COM {
102311185SSean.McEnroe@Sun.COM return (rw_tryenter(&memsegslock, writer ? RW_WRITER : RW_READER));
102411185SSean.McEnroe@Sun.COM }
102511185SSean.McEnroe@Sun.COM
10263446Smrj void
memsegs_lock(int writer)10273446Smrj memsegs_lock(int writer)
10283446Smrj {
10293446Smrj rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER);
10303446Smrj }
10313446Smrj
10323446Smrj /*ARGSUSED*/
10333446Smrj void
memsegs_unlock(int writer)10343446Smrj memsegs_unlock(int writer)
10353446Smrj {
10363446Smrj rw_exit(&memsegslock);
10373446Smrj }
10383446Smrj
10393446Smrj int
memsegs_lock_held(void)10403446Smrj memsegs_lock_held(void)
10413446Smrj {
10423446Smrj return (RW_LOCK_HELD(&memsegslock));
10433446Smrj }
10443446Smrj
10453446Smrj void
memlist_read_lock(void)10463446Smrj memlist_read_lock(void)
10473446Smrj {
10483446Smrj rw_enter(&memlists_lock, RW_READER);
10493446Smrj }
10503446Smrj
10513446Smrj void
memlist_read_unlock(void)10523446Smrj memlist_read_unlock(void)
10533446Smrj {
10543446Smrj rw_exit(&memlists_lock);
10553446Smrj }
10563446Smrj
10573446Smrj void
memlist_write_lock(void)10583446Smrj memlist_write_lock(void)
10593446Smrj {
10603446Smrj rw_enter(&memlists_lock, RW_WRITER);
10613446Smrj }
10623446Smrj
10633446Smrj void
memlist_write_unlock(void)10643446Smrj memlist_write_unlock(void)
10653446Smrj {
10663446Smrj rw_exit(&memlists_lock);
10673446Smrj }
1068