xref: /onnv-gate/usr/src/uts/common/vm/vm_seg.c (revision 6695)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
53247Sgjelinek  * Common Development and Distribution License (the "License").
63247Sgjelinek  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
225928Sjj204856  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate  * The Regents of the University of California
320Sstevel@tonic-gate  * All Rights Reserved
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate  * contributors.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
400Sstevel@tonic-gate 
410Sstevel@tonic-gate /*
420Sstevel@tonic-gate  * VM - segment management.
430Sstevel@tonic-gate  */
440Sstevel@tonic-gate 
450Sstevel@tonic-gate #include <sys/types.h>
460Sstevel@tonic-gate #include <sys/inttypes.h>
470Sstevel@tonic-gate #include <sys/t_lock.h>
480Sstevel@tonic-gate #include <sys/param.h>
490Sstevel@tonic-gate #include <sys/systm.h>
500Sstevel@tonic-gate #include <sys/kmem.h>
51*6695Saguzovsk #include <sys/sysmacros.h>
520Sstevel@tonic-gate #include <sys/vmsystm.h>
53*6695Saguzovsk #include <sys/tuneable.h>
540Sstevel@tonic-gate #include <sys/debug.h>
55*6695Saguzovsk #include <sys/fs/swapnode.h>
560Sstevel@tonic-gate #include <sys/cmn_err.h>
570Sstevel@tonic-gate #include <sys/callb.h>
580Sstevel@tonic-gate #include <sys/mem_config.h>
593247Sgjelinek #include <sys/mman.h>
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #include <vm/hat.h>
620Sstevel@tonic-gate #include <vm/as.h>
630Sstevel@tonic-gate #include <vm/seg.h>
640Sstevel@tonic-gate #include <vm/seg_kmem.h>
653247Sgjelinek #include <vm/seg_spt.h>
663247Sgjelinek #include <vm/seg_vn.h>
67*6695Saguzovsk #include <vm/anon.h>
68*6695Saguzovsk 
690Sstevel@tonic-gate /*
700Sstevel@tonic-gate  * kstats for segment advise
710Sstevel@tonic-gate  */
720Sstevel@tonic-gate segadvstat_t segadvstat = {
730Sstevel@tonic-gate 	{ "MADV_FREE_hit",	KSTAT_DATA_ULONG },
740Sstevel@tonic-gate 	{ "MADV_FREE_miss",	KSTAT_DATA_ULONG },
750Sstevel@tonic-gate };
760Sstevel@tonic-gate 
770Sstevel@tonic-gate kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
780Sstevel@tonic-gate uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
790Sstevel@tonic-gate 
800Sstevel@tonic-gate /*
810Sstevel@tonic-gate  * entry in the segment page cache
820Sstevel@tonic-gate  */
830Sstevel@tonic-gate struct seg_pcache {
84*6695Saguzovsk 	struct seg_pcache	*p_hnext;	/* list for hashed blocks */
85*6695Saguzovsk 	struct seg_pcache	*p_hprev;
86*6695Saguzovsk 	pcache_link_t		p_plink;	/* per segment/amp list */
87*6695Saguzovsk 	void 			*p_htag0;	/* segment/amp pointer */
88*6695Saguzovsk 	caddr_t			p_addr;		/* base address/anon_idx */
89*6695Saguzovsk 	size_t			p_len;		/* total bytes */
90*6695Saguzovsk 	size_t			p_wlen;		/* writtable bytes at p_addr */
91*6695Saguzovsk 	struct page		**p_pp;		/* pp shadow list */
92*6695Saguzovsk 	seg_preclaim_cbfunc_t	p_callback;	/* reclaim callback function */
93*6695Saguzovsk 	clock_t			p_lbolt;	/* lbolt from last use */
94*6695Saguzovsk 	struct seg_phash	*p_hashp;	/* our pcache hash bucket */
95*6695Saguzovsk 	uint_t			p_active;	/* active count */
96*6695Saguzovsk 	uchar_t			p_write;	/* true if S_WRITE */
97*6695Saguzovsk 	uchar_t			p_ref;		/* reference byte */
98*6695Saguzovsk 	ushort_t		p_flags;	/* bit flags */
990Sstevel@tonic-gate };
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate struct seg_phash {
102*6695Saguzovsk 	struct seg_pcache	*p_hnext;	/* list for hashed blocks */
103*6695Saguzovsk 	struct seg_pcache	*p_hprev;
104*6695Saguzovsk 	kmutex_t		p_hmutex;	/* protects hash bucket */
105*6695Saguzovsk 	pcache_link_t		p_halink[2];	/* active bucket linkages */
106*6695Saguzovsk };
107*6695Saguzovsk 
108*6695Saguzovsk struct seg_phash_wired {
109*6695Saguzovsk 	struct seg_pcache	*p_hnext;	/* list for hashed blocks */
110*6695Saguzovsk 	struct seg_pcache	*p_hprev;
111*6695Saguzovsk 	kmutex_t		p_hmutex;	/* protects hash bucket */
1120Sstevel@tonic-gate };
1130Sstevel@tonic-gate 
114*6695Saguzovsk /*
115*6695Saguzovsk  * A parameter to control a maximum number of bytes that can be
116*6695Saguzovsk  * purged from pcache at a time.
117*6695Saguzovsk  */
118*6695Saguzovsk #define	P_MAX_APURGE_BYTES	(1024 * 1024 * 1024)
119*6695Saguzovsk 
120*6695Saguzovsk /*
121*6695Saguzovsk  * log2(fraction of pcache to reclaim at a time).
122*6695Saguzovsk  */
123*6695Saguzovsk #define	P_SHRINK_SHFT		(5)
124*6695Saguzovsk 
125*6695Saguzovsk /*
126*6695Saguzovsk  * The following variables can be tuned via /etc/system.
127*6695Saguzovsk  */
128*6695Saguzovsk 
129*6695Saguzovsk int	segpcache_enabled = 1;		/* if 1, shadow lists are cached */
130*6695Saguzovsk pgcnt_t	segpcache_maxwindow = 0;	/* max # of pages that can be cached */
131*6695Saguzovsk ulong_t	segpcache_hashsize_win = 0;	/* # of non wired buckets */
132*6695Saguzovsk ulong_t	segpcache_hashsize_wired = 0;	/* # of wired buckets */
133*6695Saguzovsk int	segpcache_reap_sec = 1;		/* reap check rate in secs */
134*6695Saguzovsk clock_t	segpcache_reap_ticks = 0;	/* reap interval in ticks */
135*6695Saguzovsk int	segpcache_pcp_maxage_sec = 1;	/* pcp max age in secs */
136*6695Saguzovsk clock_t	segpcache_pcp_maxage_ticks = 0;	/* pcp max age in ticks */
137*6695Saguzovsk int	segpcache_shrink_shift = P_SHRINK_SHFT;	/* log2 reap fraction */
138*6695Saguzovsk pgcnt_t	segpcache_maxapurge_bytes = P_MAX_APURGE_BYTES;	/* max purge bytes */
1390Sstevel@tonic-gate 
140*6695Saguzovsk static kmutex_t seg_pcache_mtx;	/* protects seg_pdisabled counter */
141*6695Saguzovsk static kmutex_t seg_pasync_mtx;	/* protects async thread scheduling */
142*6695Saguzovsk static kcondvar_t seg_pasync_cv;
143*6695Saguzovsk 
144*6695Saguzovsk #pragma align 64(pctrl1)
145*6695Saguzovsk #pragma align 64(pctrl2)
146*6695Saguzovsk #pragma align 64(pctrl3)
1470Sstevel@tonic-gate 
148*6695Saguzovsk /*
149*6695Saguzovsk  * Keep frequently used variables together in one cache line.
150*6695Saguzovsk  */
151*6695Saguzovsk static struct p_ctrl1 {
152*6695Saguzovsk 	uint_t p_disabled;		/* if not 0, caching temporarily off */
153*6695Saguzovsk 	pgcnt_t p_maxwin;		/* max # of pages that can be cached */
154*6695Saguzovsk 	size_t p_hashwin_sz;		/* # of non wired buckets */
155*6695Saguzovsk 	struct seg_phash *p_htabwin;	/* hash table for non wired entries */
156*6695Saguzovsk 	size_t p_hashwired_sz;		/* # of wired buckets */
157*6695Saguzovsk 	struct seg_phash_wired *p_htabwired; /* hash table for wired entries */
158*6695Saguzovsk 	kmem_cache_t *p_kmcache;	/* kmem cache for seg_pcache structs */
159*6695Saguzovsk #ifdef _LP64
160*6695Saguzovsk 	ulong_t pad[1];
161*6695Saguzovsk #endif /* _LP64 */
162*6695Saguzovsk } pctrl1;
163*6695Saguzovsk 
164*6695Saguzovsk static struct p_ctrl2 {
165*6695Saguzovsk 	kmutex_t p_mem_mtx;	/* protects window counter and p_halinks */
166*6695Saguzovsk 	pgcnt_t  p_locked_win;	/* # pages from window */
167*6695Saguzovsk 	pgcnt_t  p_locked;	/* # of pages cached by pagelock */
168*6695Saguzovsk 	uchar_t	 p_ahcur;	/* current active links for insert/delete */
169*6695Saguzovsk 	uchar_t  p_athr_on;	/* async reclaim thread is running. */
170*6695Saguzovsk 	pcache_link_t p_ahhead[2]; /* active buckets linkages */
171*6695Saguzovsk } pctrl2;
1720Sstevel@tonic-gate 
173*6695Saguzovsk static struct p_ctrl3 {
174*6695Saguzovsk 	clock_t	p_pcp_maxage;		/* max pcp age in ticks */
175*6695Saguzovsk 	ulong_t	p_athr_empty_ahb;	/* athread walk stats */
176*6695Saguzovsk 	ulong_t p_athr_full_ahb;	/* athread walk stats */
177*6695Saguzovsk 	pgcnt_t	p_maxapurge_npages;	/* max pages to purge at a time */
178*6695Saguzovsk 	int	p_shrink_shft;		/* reap shift factor */
179*6695Saguzovsk #ifdef _LP64
180*6695Saguzovsk 	ulong_t pad[3];
181*6695Saguzovsk #endif /* _LP64 */
182*6695Saguzovsk } pctrl3;
1830Sstevel@tonic-gate 
184*6695Saguzovsk #define	seg_pdisabled			pctrl1.p_disabled
185*6695Saguzovsk #define	seg_pmaxwindow			pctrl1.p_maxwin
186*6695Saguzovsk #define	seg_phashsize_win		pctrl1.p_hashwin_sz
187*6695Saguzovsk #define	seg_phashtab_win		pctrl1.p_htabwin
188*6695Saguzovsk #define	seg_phashsize_wired		pctrl1.p_hashwired_sz
189*6695Saguzovsk #define	seg_phashtab_wired		pctrl1.p_htabwired
190*6695Saguzovsk #define	seg_pkmcache			pctrl1.p_kmcache
191*6695Saguzovsk #define	seg_pmem_mtx			pctrl2.p_mem_mtx
192*6695Saguzovsk #define	seg_plocked_window		pctrl2.p_locked_win
193*6695Saguzovsk #define	seg_plocked			pctrl2.p_locked
194*6695Saguzovsk #define	seg_pahcur			pctrl2.p_ahcur
195*6695Saguzovsk #define	seg_pathr_on			pctrl2.p_athr_on
196*6695Saguzovsk #define	seg_pahhead			pctrl2.p_ahhead
197*6695Saguzovsk #define	seg_pmax_pcpage			pctrl3.p_pcp_maxage
198*6695Saguzovsk #define	seg_pathr_empty_ahb		pctrl3.p_athr_empty_ahb
199*6695Saguzovsk #define	seg_pathr_full_ahb		pctrl3.p_athr_full_ahb
200*6695Saguzovsk #define	seg_pshrink_shift		pctrl3.p_shrink_shft
201*6695Saguzovsk #define	seg_pmaxapurge_npages		pctrl3.p_maxapurge_npages
202*6695Saguzovsk 
203*6695Saguzovsk #define	P_HASHWIN_MASK			(seg_phashsize_win - 1)
204*6695Saguzovsk #define	P_HASHWIRED_MASK		(seg_phashsize_wired - 1)
205*6695Saguzovsk #define	P_BASESHIFT			(6)
206*6695Saguzovsk 
207*6695Saguzovsk kthread_t *seg_pasync_thr;
208*6695Saguzovsk 
209*6695Saguzovsk extern struct seg_ops segvn_ops;
210*6695Saguzovsk extern struct seg_ops segspt_shmops;
2110Sstevel@tonic-gate 
212*6695Saguzovsk #define	IS_PFLAGS_WIRED(flags) ((flags) & SEGP_FORCE_WIRED)
213*6695Saguzovsk #define	IS_PCP_WIRED(pcp) IS_PFLAGS_WIRED((pcp)->p_flags)
214*6695Saguzovsk 
215*6695Saguzovsk #define	LBOLT_DELTA(t)	((ulong_t)(lbolt - (t)))
216*6695Saguzovsk 
217*6695Saguzovsk #define	PCP_AGE(pcp)	LBOLT_DELTA((pcp)->p_lbolt)
218*6695Saguzovsk 
219*6695Saguzovsk /*
220*6695Saguzovsk  * htag0 argument can be a seg or amp pointer.
221*6695Saguzovsk  */
222*6695Saguzovsk #define	P_HASHBP(seg, htag0, addr, flags)				\
223*6695Saguzovsk 	(IS_PFLAGS_WIRED((flags)) ?					\
224*6695Saguzovsk 	    ((struct seg_phash *)&seg_phashtab_wired[P_HASHWIRED_MASK &	\
225*6695Saguzovsk 	    ((uintptr_t)(htag0) >> P_BASESHIFT)]) :			\
226*6695Saguzovsk 	    (&seg_phashtab_win[P_HASHWIN_MASK &				\
227*6695Saguzovsk 	    (((uintptr_t)(htag0) >> 3) ^				\
228*6695Saguzovsk 	    ((uintptr_t)(addr) >> ((flags & SEGP_PSHIFT) ?		\
229*6695Saguzovsk 	    (flags >> 16) : page_get_shift((seg)->s_szc))))]))
2300Sstevel@tonic-gate 
231*6695Saguzovsk /*
232*6695Saguzovsk  * htag0 argument can be a seg or amp pointer.
233*6695Saguzovsk  */
234*6695Saguzovsk #define	P_MATCH(pcp, htag0, addr, len)					\
235*6695Saguzovsk 	((pcp)->p_htag0 == (htag0) &&					\
236*6695Saguzovsk 	(pcp)->p_addr == (addr) &&					\
237*6695Saguzovsk 	(pcp)->p_len >= (len))
2380Sstevel@tonic-gate 
239*6695Saguzovsk #define	P_MATCH_PP(pcp, htag0, addr, len, pp)				\
240*6695Saguzovsk 	((pcp)->p_pp == (pp) &&						\
241*6695Saguzovsk 	(pcp)->p_htag0 == (htag0) &&					\
242*6695Saguzovsk 	(pcp)->p_addr == (addr) &&					\
243*6695Saguzovsk 	(pcp)->p_len >= (len))
244*6695Saguzovsk 
245*6695Saguzovsk #define	plink2pcache(pl)	((struct seg_pcache *)((uintptr_t)(pl) - \
246*6695Saguzovsk     offsetof(struct seg_pcache, p_plink)))
247*6695Saguzovsk 
248*6695Saguzovsk #define	hlink2phash(hl, l)	((struct seg_phash *)((uintptr_t)(hl) -	\
249*6695Saguzovsk     offsetof(struct seg_phash, p_halink[l])))
2500Sstevel@tonic-gate 
2510Sstevel@tonic-gate /*
252*6695Saguzovsk  * seg_padd_abuck()/seg_premove_abuck() link and unlink hash buckets from
253*6695Saguzovsk  * active hash bucket lists. We maintain active bucket lists to reduce the
254*6695Saguzovsk  * overhead of finding active buckets during asynchronous purging since there
255*6695Saguzovsk  * can be 10s of millions of buckets on a large system but only a small subset
256*6695Saguzovsk  * of them in actual use.
257*6695Saguzovsk  *
258*6695Saguzovsk  * There're 2 active bucket lists. Current active list (as per seg_pahcur) is
259*6695Saguzovsk  * used by seg_pinsert()/seg_pinactive()/seg_ppurge() to add and delete
260*6695Saguzovsk  * buckets. The other list is used by asynchronous purge thread. This allows
261*6695Saguzovsk  * the purge thread to walk its active list without holding seg_pmem_mtx for a
262*6695Saguzovsk  * long time. When asynchronous thread is done with its list it switches to
263*6695Saguzovsk  * current active list and makes the list it just finished processing as
264*6695Saguzovsk  * current active list.
265*6695Saguzovsk  *
266*6695Saguzovsk  * seg_padd_abuck() only adds the bucket to current list if the bucket is not
267*6695Saguzovsk  * yet on any list.  seg_premove_abuck() may remove the bucket from either
268*6695Saguzovsk  * list. If the bucket is on current list it will be always removed. Otherwise
269*6695Saguzovsk  * the bucket is only removed if asynchronous purge thread is not currently
270*6695Saguzovsk  * running or seg_premove_abuck() is called by asynchronous purge thread
271*6695Saguzovsk  * itself. A given bucket can only be on one of active lists at a time. These
272*6695Saguzovsk  * routines should be called with per bucket lock held.  The routines use
273*6695Saguzovsk  * seg_pmem_mtx to protect list updates. seg_padd_abuck() must be called after
274*6695Saguzovsk  * the first entry is added to the bucket chain and seg_premove_abuck() must
275*6695Saguzovsk  * be called after the last pcp entry is deleted from its chain. Per bucket
276*6695Saguzovsk  * lock should be held by the callers.  This avoids a potential race condition
277*6695Saguzovsk  * when seg_premove_abuck() removes a bucket after pcp entries are added to
278*6695Saguzovsk  * its list after the caller checked that the bucket has no entries. (this
279*6695Saguzovsk  * race would cause a loss of an active bucket from the active lists).
280*6695Saguzovsk  *
281*6695Saguzovsk  * Both lists are circular doubly linked lists anchored at seg_pahhead heads.
282*6695Saguzovsk  * New entries are added to the end of the list since LRU is used as the
283*6695Saguzovsk  * purging policy.
284*6695Saguzovsk  */
285*6695Saguzovsk static void
286*6695Saguzovsk seg_padd_abuck(struct seg_phash *hp)
287*6695Saguzovsk {
288*6695Saguzovsk 	int lix;
289*6695Saguzovsk 
290*6695Saguzovsk 	ASSERT(MUTEX_HELD(&hp->p_hmutex));
291*6695Saguzovsk 	ASSERT((struct seg_phash *)hp->p_hnext != hp);
292*6695Saguzovsk 	ASSERT((struct seg_phash *)hp->p_hprev != hp);
293*6695Saguzovsk 	ASSERT(hp->p_hnext == hp->p_hprev);
294*6695Saguzovsk 	ASSERT(!IS_PCP_WIRED(hp->p_hnext));
295*6695Saguzovsk 	ASSERT(hp->p_hnext->p_hnext == (struct seg_pcache *)hp);
296*6695Saguzovsk 	ASSERT(hp->p_hprev->p_hprev == (struct seg_pcache *)hp);
297*6695Saguzovsk 	ASSERT(hp >= seg_phashtab_win &&
298*6695Saguzovsk 	    hp < &seg_phashtab_win[seg_phashsize_win]);
299*6695Saguzovsk 
300*6695Saguzovsk 	/*
301*6695Saguzovsk 	 * This bucket can already be on one of active lists
302*6695Saguzovsk 	 * since seg_premove_abuck() may have failed to remove it
303*6695Saguzovsk 	 * before.
304*6695Saguzovsk 	 */
305*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
306*6695Saguzovsk 	lix = seg_pahcur;
307*6695Saguzovsk 	ASSERT(lix >= 0 && lix <= 1);
308*6695Saguzovsk 	if (hp->p_halink[lix].p_lnext != NULL) {
309*6695Saguzovsk 		ASSERT(hp->p_halink[lix].p_lprev != NULL);
310*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lnext == NULL);
311*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lprev == NULL);
312*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
313*6695Saguzovsk 		return;
314*6695Saguzovsk 	}
315*6695Saguzovsk 	ASSERT(hp->p_halink[lix].p_lprev == NULL);
316*6695Saguzovsk 
317*6695Saguzovsk 	/*
318*6695Saguzovsk 	 * If this bucket is still on list !lix async thread can't yet remove
319*6695Saguzovsk 	 * it since we hold here per bucket lock. In this case just return
320*6695Saguzovsk 	 * since async thread will eventually find and process this bucket.
321*6695Saguzovsk 	 */
322*6695Saguzovsk 	if (hp->p_halink[!lix].p_lnext != NULL) {
323*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lprev != NULL);
324*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
325*6695Saguzovsk 		return;
326*6695Saguzovsk 	}
327*6695Saguzovsk 	ASSERT(hp->p_halink[!lix].p_lprev == NULL);
328*6695Saguzovsk 	/*
329*6695Saguzovsk 	 * This bucket is not on any active bucket list yet.
330*6695Saguzovsk 	 * Add the bucket to the tail of current active list.
331*6695Saguzovsk 	 */
332*6695Saguzovsk 	hp->p_halink[lix].p_lnext = &seg_pahhead[lix];
333*6695Saguzovsk 	hp->p_halink[lix].p_lprev = seg_pahhead[lix].p_lprev;
334*6695Saguzovsk 	seg_pahhead[lix].p_lprev->p_lnext = &hp->p_halink[lix];
335*6695Saguzovsk 	seg_pahhead[lix].p_lprev = &hp->p_halink[lix];
336*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
337*6695Saguzovsk }
338*6695Saguzovsk 
339*6695Saguzovsk static void
340*6695Saguzovsk seg_premove_abuck(struct seg_phash *hp, int athr)
341*6695Saguzovsk {
342*6695Saguzovsk 	int lix;
343*6695Saguzovsk 
344*6695Saguzovsk 	ASSERT(MUTEX_HELD(&hp->p_hmutex));
345*6695Saguzovsk 	ASSERT((struct seg_phash *)hp->p_hnext == hp);
346*6695Saguzovsk 	ASSERT((struct seg_phash *)hp->p_hprev == hp);
347*6695Saguzovsk 	ASSERT(hp >= seg_phashtab_win &&
348*6695Saguzovsk 	    hp < &seg_phashtab_win[seg_phashsize_win]);
349*6695Saguzovsk 
350*6695Saguzovsk 	if (athr) {
351*6695Saguzovsk 		ASSERT(seg_pathr_on);
352*6695Saguzovsk 		ASSERT(seg_pahcur <= 1);
353*6695Saguzovsk 		/*
354*6695Saguzovsk 		 * We are called by asynchronous thread that found this bucket
355*6695Saguzovsk 		 * on not currently active (i.e. !seg_pahcur) list. Remove it
356*6695Saguzovsk 		 * from there.  Per bucket lock we are holding makes sure
357*6695Saguzovsk 		 * seg_pinsert() can't sneak in and add pcp entries to this
358*6695Saguzovsk 		 * bucket right before we remove the bucket from its list.
359*6695Saguzovsk 		 */
360*6695Saguzovsk 		lix = !seg_pahcur;
361*6695Saguzovsk 		ASSERT(hp->p_halink[lix].p_lnext != NULL);
362*6695Saguzovsk 		ASSERT(hp->p_halink[lix].p_lprev != NULL);
363*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lnext == NULL);
364*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lprev == NULL);
365*6695Saguzovsk 		hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
366*6695Saguzovsk 		hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
367*6695Saguzovsk 		hp->p_halink[lix].p_lnext = NULL;
368*6695Saguzovsk 		hp->p_halink[lix].p_lprev = NULL;
369*6695Saguzovsk 		return;
370*6695Saguzovsk 	}
371*6695Saguzovsk 
372*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
373*6695Saguzovsk 	lix = seg_pahcur;
374*6695Saguzovsk 	ASSERT(lix >= 0 && lix <= 1);
375*6695Saguzovsk 
376*6695Saguzovsk 	/*
377*6695Saguzovsk 	 * If the bucket is on currently active list just remove it from
378*6695Saguzovsk 	 * there.
379*6695Saguzovsk 	 */
380*6695Saguzovsk 	if (hp->p_halink[lix].p_lnext != NULL) {
381*6695Saguzovsk 		ASSERT(hp->p_halink[lix].p_lprev != NULL);
382*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lnext == NULL);
383*6695Saguzovsk 		ASSERT(hp->p_halink[!lix].p_lprev == NULL);
384*6695Saguzovsk 		hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
385*6695Saguzovsk 		hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
386*6695Saguzovsk 		hp->p_halink[lix].p_lnext = NULL;
387*6695Saguzovsk 		hp->p_halink[lix].p_lprev = NULL;
388*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
389*6695Saguzovsk 		return;
390*6695Saguzovsk 	}
391*6695Saguzovsk 	ASSERT(hp->p_halink[lix].p_lprev == NULL);
392*6695Saguzovsk 
393*6695Saguzovsk 	/*
394*6695Saguzovsk 	 * If asynchronous thread is not running we can remove the bucket from
395*6695Saguzovsk 	 * not currently active list. The bucket must be on this list since we
396*6695Saguzovsk 	 * already checked that it's not on the other list and the bucket from
397*6695Saguzovsk 	 * which we just deleted the last pcp entry must be still on one of the
398*6695Saguzovsk 	 * active bucket lists.
399*6695Saguzovsk 	 */
400*6695Saguzovsk 	lix = !lix;
401*6695Saguzovsk 	ASSERT(hp->p_halink[lix].p_lnext != NULL);
402*6695Saguzovsk 	ASSERT(hp->p_halink[lix].p_lprev != NULL);
403*6695Saguzovsk 
404*6695Saguzovsk 	if (!seg_pathr_on) {
405*6695Saguzovsk 		hp->p_halink[lix].p_lnext->p_lprev = hp->p_halink[lix].p_lprev;
406*6695Saguzovsk 		hp->p_halink[lix].p_lprev->p_lnext = hp->p_halink[lix].p_lnext;
407*6695Saguzovsk 		hp->p_halink[lix].p_lnext = NULL;
408*6695Saguzovsk 		hp->p_halink[lix].p_lprev = NULL;
409*6695Saguzovsk 	}
410*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
411*6695Saguzovsk }
412*6695Saguzovsk 
413*6695Saguzovsk /*
414*6695Saguzovsk  * Check if bucket pointed by hp already has a pcp entry that matches request
415*6695Saguzovsk  * htag0, addr and len. Set *found to 1 if match is found and to 0 otherwise.
416*6695Saguzovsk  * Also delete matching entries that cover smaller address range but start
417*6695Saguzovsk  * at the same address as addr argument. Return the list of deleted entries if
418*6695Saguzovsk  * any. This is an internal helper function called from seg_pinsert() only
419*6695Saguzovsk  * for non wired shadow lists. The caller already holds a per seg/amp list
420*6695Saguzovsk  * lock.
421*6695Saguzovsk  */
422*6695Saguzovsk static struct seg_pcache *
423*6695Saguzovsk seg_plookup_checkdup(struct seg_phash *hp, void *htag0,
424*6695Saguzovsk     caddr_t addr, size_t len, int *found)
425*6695Saguzovsk {
426*6695Saguzovsk 	struct seg_pcache *pcp;
427*6695Saguzovsk 	struct seg_pcache *delcallb_list = NULL;
428*6695Saguzovsk 
429*6695Saguzovsk 	ASSERT(MUTEX_HELD(&hp->p_hmutex));
430*6695Saguzovsk 
431*6695Saguzovsk 	*found = 0;
432*6695Saguzovsk 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
433*6695Saguzovsk 	    pcp = pcp->p_hnext) {
434*6695Saguzovsk 		ASSERT(pcp->p_hashp == hp);
435*6695Saguzovsk 		if (pcp->p_htag0 == htag0 && pcp->p_addr == addr) {
436*6695Saguzovsk 			ASSERT(!IS_PCP_WIRED(pcp));
437*6695Saguzovsk 			if (pcp->p_len < len) {
438*6695Saguzovsk 				pcache_link_t *plinkp;
439*6695Saguzovsk 				if (pcp->p_active) {
440*6695Saguzovsk 					continue;
441*6695Saguzovsk 				}
442*6695Saguzovsk 				plinkp = &pcp->p_plink;
443*6695Saguzovsk 				plinkp->p_lprev->p_lnext = plinkp->p_lnext;
444*6695Saguzovsk 				plinkp->p_lnext->p_lprev = plinkp->p_lprev;
445*6695Saguzovsk 				pcp->p_hprev->p_hnext = pcp->p_hnext;
446*6695Saguzovsk 				pcp->p_hnext->p_hprev = pcp->p_hprev;
447*6695Saguzovsk 				pcp->p_hprev = delcallb_list;
448*6695Saguzovsk 				delcallb_list = pcp;
449*6695Saguzovsk 			} else {
450*6695Saguzovsk 				*found = 1;
451*6695Saguzovsk 				break;
452*6695Saguzovsk 			}
453*6695Saguzovsk 		}
454*6695Saguzovsk 	}
455*6695Saguzovsk 	return (delcallb_list);
456*6695Saguzovsk }
457*6695Saguzovsk 
458*6695Saguzovsk /*
459*6695Saguzovsk  * lookup an address range in pagelock cache. Return shadow list and bump up
460*6695Saguzovsk  * active count. If amp is not NULL use amp as a lookup tag otherwise use seg
461*6695Saguzovsk  * as a lookup tag.
4620Sstevel@tonic-gate  */
4630Sstevel@tonic-gate struct page **
464*6695Saguzovsk seg_plookup(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
465*6695Saguzovsk     enum seg_rw rw, uint_t flags)
4660Sstevel@tonic-gate {
4670Sstevel@tonic-gate 	struct seg_pcache *pcp;
4680Sstevel@tonic-gate 	struct seg_phash *hp;
469*6695Saguzovsk 	void *htag0;
470*6695Saguzovsk 
471*6695Saguzovsk 	ASSERT(seg != NULL);
472*6695Saguzovsk 	ASSERT(rw == S_READ || rw == S_WRITE);
4730Sstevel@tonic-gate 
4740Sstevel@tonic-gate 	/*
4750Sstevel@tonic-gate 	 * Skip pagelock cache, while DR is in progress or
4760Sstevel@tonic-gate 	 * seg_pcache is off.
4770Sstevel@tonic-gate 	 */
478*6695Saguzovsk 	if (seg_pdisabled) {
4790Sstevel@tonic-gate 		return (NULL);
4800Sstevel@tonic-gate 	}
481*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
4820Sstevel@tonic-gate 
483*6695Saguzovsk 	htag0 = (amp == NULL ? (void *)seg : (void *)amp);
484*6695Saguzovsk 	hp = P_HASHBP(seg, htag0, addr, flags);
4850Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
4860Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
4870Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
488*6695Saguzovsk 		ASSERT(pcp->p_hashp == hp);
489*6695Saguzovsk 		if (P_MATCH(pcp, htag0, addr, len)) {
490*6695Saguzovsk 			ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
491*6695Saguzovsk 			/*
492*6695Saguzovsk 			 * If this request wants to write pages
493*6695Saguzovsk 			 * but write permissions starting from
494*6695Saguzovsk 			 * addr don't cover the entire length len
495*6695Saguzovsk 			 * return lookup failure back to the caller.
496*6695Saguzovsk 			 * It will check protections and fail this
497*6695Saguzovsk 			 * pagelock operation with EACCESS error.
498*6695Saguzovsk 			 */
499*6695Saguzovsk 			if (rw == S_WRITE && pcp->p_wlen < len) {
500*6695Saguzovsk 				break;
501*6695Saguzovsk 			}
502*6695Saguzovsk 			if (pcp->p_active == UINT_MAX) {
503*6695Saguzovsk 				break;
504*6695Saguzovsk 			}
5050Sstevel@tonic-gate 			pcp->p_active++;
506*6695Saguzovsk 			if (rw == S_WRITE && !pcp->p_write) {
507*6695Saguzovsk 				pcp->p_write = 1;
508*6695Saguzovsk 			}
5090Sstevel@tonic-gate 			mutex_exit(&hp->p_hmutex);
5100Sstevel@tonic-gate 			return (pcp->p_pp);
5110Sstevel@tonic-gate 		}
5120Sstevel@tonic-gate 	}
5130Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
5140Sstevel@tonic-gate 	return (NULL);
5150Sstevel@tonic-gate }
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate /*
518*6695Saguzovsk  * mark address range inactive. If the cache is off or the address range is
519*6695Saguzovsk  * not in the cache or another shadow list that covers bigger range is found
520*6695Saguzovsk  * we call the segment driver to reclaim the pages. Otherwise just decrement
521*6695Saguzovsk  * active count and set ref bit.  If amp is not NULL use amp as a lookup tag
522*6695Saguzovsk  * otherwise use seg as a lookup tag.
5230Sstevel@tonic-gate  */
5240Sstevel@tonic-gate void
525*6695Saguzovsk seg_pinactive(struct seg *seg, struct anon_map *amp, caddr_t addr,
526*6695Saguzovsk     size_t len, struct page **pp, enum seg_rw rw, uint_t flags,
527*6695Saguzovsk     seg_preclaim_cbfunc_t callback)
5280Sstevel@tonic-gate {
5290Sstevel@tonic-gate 	struct seg_pcache *pcp;
5300Sstevel@tonic-gate 	struct seg_phash *hp;
531*6695Saguzovsk 	kmutex_t *pmtx = NULL;
532*6695Saguzovsk 	pcache_link_t *pheadp;
533*6695Saguzovsk 	void *htag0;
534*6695Saguzovsk 	pgcnt_t npages = 0;
535*6695Saguzovsk 	int keep = 0;
5360Sstevel@tonic-gate 
537*6695Saguzovsk 	ASSERT(seg != NULL);
538*6695Saguzovsk 	ASSERT(rw == S_READ || rw == S_WRITE);
539*6695Saguzovsk 
540*6695Saguzovsk 	htag0 = (amp == NULL ? (void *)seg : (void *)amp);
541*6695Saguzovsk 
542*6695Saguzovsk 	/*
543*6695Saguzovsk 	 * Skip lookup if pcache is not configured.
544*6695Saguzovsk 	 */
545*6695Saguzovsk 	if (seg_phashsize_win == 0) {
546*6695Saguzovsk 		goto out;
5470Sstevel@tonic-gate 	}
548*6695Saguzovsk 
549*6695Saguzovsk 	/*
550*6695Saguzovsk 	 * Grab per seg/amp lock before hash lock if we are going to remove
551*6695Saguzovsk 	 * inactive entry from pcache.
552*6695Saguzovsk 	 */
553*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags) && seg_pdisabled) {
554*6695Saguzovsk 		if (amp == NULL) {
555*6695Saguzovsk 			pheadp = &seg->s_phead;
556*6695Saguzovsk 			pmtx = &seg->s_pmtx;
557*6695Saguzovsk 		} else {
558*6695Saguzovsk 			pheadp = &amp->a_phead;
559*6695Saguzovsk 			pmtx = &amp->a_pmtx;
560*6695Saguzovsk 		}
561*6695Saguzovsk 		mutex_enter(pmtx);
562*6695Saguzovsk 	}
563*6695Saguzovsk 
564*6695Saguzovsk 	hp = P_HASHBP(seg, htag0, addr, flags);
5650Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
566*6695Saguzovsk again:
5670Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
5680Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
569*6695Saguzovsk 		ASSERT(pcp->p_hashp == hp);
570*6695Saguzovsk 		if (P_MATCH_PP(pcp, htag0, addr, len, pp)) {
571*6695Saguzovsk 			ASSERT(IS_PFLAGS_WIRED(flags) == IS_PCP_WIRED(pcp));
572*6695Saguzovsk 			ASSERT(pcp->p_active);
573*6695Saguzovsk 			if (keep) {
574*6695Saguzovsk 				/*
575*6695Saguzovsk 				 * Don't remove this pcp entry
576*6695Saguzovsk 				 * if we didn't find duplicate
577*6695Saguzovsk 				 * shadow lists on second search.
578*6695Saguzovsk 				 * Somebody removed those duplicates
579*6695Saguzovsk 				 * since we dropped hash lock after first
580*6695Saguzovsk 				 * search.
581*6695Saguzovsk 				 */
582*6695Saguzovsk 				ASSERT(pmtx != NULL);
583*6695Saguzovsk 				ASSERT(!IS_PFLAGS_WIRED(flags));
584*6695Saguzovsk 				mutex_exit(pmtx);
585*6695Saguzovsk 				pmtx = NULL;
586*6695Saguzovsk 			}
5870Sstevel@tonic-gate 			pcp->p_active--;
588*6695Saguzovsk 			if (pcp->p_active == 0 && (pmtx != NULL ||
589*6695Saguzovsk 			    (seg_pdisabled && IS_PFLAGS_WIRED(flags)))) {
590*6695Saguzovsk 
591*6695Saguzovsk 				/*
592*6695Saguzovsk 				 * This entry is no longer active.  Remove it
593*6695Saguzovsk 				 * now either because pcaching is temporarily
594*6695Saguzovsk 				 * disabled or there're other pcp entries that
595*6695Saguzovsk 				 * can match this pagelock request (i.e. this
596*6695Saguzovsk 				 * entry is a duplicate).
597*6695Saguzovsk 				 */
5980Sstevel@tonic-gate 
5990Sstevel@tonic-gate 				ASSERT(callback == pcp->p_callback);
600*6695Saguzovsk 				if (pmtx != NULL) {
601*6695Saguzovsk 					pcache_link_t *plinkp = &pcp->p_plink;
602*6695Saguzovsk 					ASSERT(!IS_PCP_WIRED(pcp));
603*6695Saguzovsk 					ASSERT(pheadp->p_lnext != pheadp);
604*6695Saguzovsk 					ASSERT(pheadp->p_lprev != pheadp);
605*6695Saguzovsk 					plinkp->p_lprev->p_lnext =
606*6695Saguzovsk 					    plinkp->p_lnext;
607*6695Saguzovsk 					plinkp->p_lnext->p_lprev =
608*6695Saguzovsk 					    plinkp->p_lprev;
609*6695Saguzovsk 				}
6100Sstevel@tonic-gate 				pcp->p_hprev->p_hnext = pcp->p_hnext;
6110Sstevel@tonic-gate 				pcp->p_hnext->p_hprev = pcp->p_hprev;
612*6695Saguzovsk 				if (!IS_PCP_WIRED(pcp) &&
613*6695Saguzovsk 				    hp->p_hnext == (struct seg_pcache *)hp) {
614*6695Saguzovsk 					/*
615*6695Saguzovsk 					 * We removed the last entry from this
616*6695Saguzovsk 					 * bucket.  Now remove the bucket from
617*6695Saguzovsk 					 * its active list.
618*6695Saguzovsk 					 */
619*6695Saguzovsk 					seg_premove_abuck(hp, 0);
620*6695Saguzovsk 				}
6210Sstevel@tonic-gate 				mutex_exit(&hp->p_hmutex);
622*6695Saguzovsk 				if (pmtx != NULL) {
623*6695Saguzovsk 					mutex_exit(pmtx);
624*6695Saguzovsk 				}
625*6695Saguzovsk 				len = pcp->p_len;
626*6695Saguzovsk 				npages = btop(len);
627*6695Saguzovsk 				if (rw != S_WRITE && pcp->p_write) {
628*6695Saguzovsk 					rw = S_WRITE;
629*6695Saguzovsk 				}
630*6695Saguzovsk 				kmem_cache_free(seg_pkmcache, pcp);
631*6695Saguzovsk 				goto out;
632*6695Saguzovsk 			} else {
633*6695Saguzovsk 				/*
634*6695Saguzovsk 				 * We found a matching pcp entry but will not
635*6695Saguzovsk 				 * free it right away even if it's no longer
636*6695Saguzovsk 				 * active.
637*6695Saguzovsk 				 */
638*6695Saguzovsk 				if (!pcp->p_active && !IS_PCP_WIRED(pcp)) {
639*6695Saguzovsk 					/*
640*6695Saguzovsk 					 * Set the reference bit and mark the
641*6695Saguzovsk 					 * time of last access to this pcp
642*6695Saguzovsk 					 * so that asynchronous thread doesn't
643*6695Saguzovsk 					 * free it immediately since
644*6695Saguzovsk 					 * it may be reactivated very soon.
645*6695Saguzovsk 					 */
646*6695Saguzovsk 					pcp->p_lbolt = lbolt;
647*6695Saguzovsk 					pcp->p_ref = 1;
648*6695Saguzovsk 				}
649*6695Saguzovsk 				mutex_exit(&hp->p_hmutex);
650*6695Saguzovsk 				if (pmtx != NULL) {
651*6695Saguzovsk 					mutex_exit(pmtx);
6520Sstevel@tonic-gate 				}
653*6695Saguzovsk 				return;
654*6695Saguzovsk 			}
655*6695Saguzovsk 		} else if (!IS_PFLAGS_WIRED(flags) &&
656*6695Saguzovsk 		    P_MATCH(pcp, htag0, addr, len)) {
657*6695Saguzovsk 			/*
658*6695Saguzovsk 			 * This is a duplicate pcp entry.  This situation may
659*6695Saguzovsk 			 * happen if a bigger shadow list that covers our
660*6695Saguzovsk 			 * range was added while our entry was still active.
661*6695Saguzovsk 			 * Now we can free our pcp entry if it becomes
662*6695Saguzovsk 			 * inactive.
663*6695Saguzovsk 			 */
664*6695Saguzovsk 			if (!pcp->p_active) {
665*6695Saguzovsk 				/*
666*6695Saguzovsk 				 * Mark this entry as referenced just in case
667*6695Saguzovsk 				 * we'll free our own pcp entry soon.
668*6695Saguzovsk 				 */
669*6695Saguzovsk 				pcp->p_lbolt = lbolt;
670*6695Saguzovsk 				pcp->p_ref = 1;
671*6695Saguzovsk 			}
672*6695Saguzovsk 			if (pmtx != NULL) {
673*6695Saguzovsk 				/*
674*6695Saguzovsk 				 * we are already holding pmtx and found a
675*6695Saguzovsk 				 * duplicate.  Don't keep our own pcp entry.
676*6695Saguzovsk 				 */
677*6695Saguzovsk 				keep = 0;
678*6695Saguzovsk 				continue;
6790Sstevel@tonic-gate 			}
680*6695Saguzovsk 			/*
681*6695Saguzovsk 			 * We have to use mutex_tryenter to attempt to lock
682*6695Saguzovsk 			 * seg/amp list lock since we already hold hash lock
683*6695Saguzovsk 			 * and seg/amp list lock is above hash lock in lock
684*6695Saguzovsk 			 * order.  If mutex_tryenter fails drop hash lock and
685*6695Saguzovsk 			 * retake both locks in correct order and research
686*6695Saguzovsk 			 * this hash chain.
687*6695Saguzovsk 			 */
688*6695Saguzovsk 			ASSERT(keep == 0);
689*6695Saguzovsk 			if (amp == NULL) {
690*6695Saguzovsk 				pheadp = &seg->s_phead;
691*6695Saguzovsk 				pmtx = &seg->s_pmtx;
692*6695Saguzovsk 			} else {
693*6695Saguzovsk 				pheadp = &amp->a_phead;
694*6695Saguzovsk 				pmtx = &amp->a_pmtx;
695*6695Saguzovsk 			}
696*6695Saguzovsk 			if (!mutex_tryenter(pmtx)) {
697*6695Saguzovsk 				mutex_exit(&hp->p_hmutex);
698*6695Saguzovsk 				mutex_enter(pmtx);
699*6695Saguzovsk 				mutex_enter(&hp->p_hmutex);
700*6695Saguzovsk 				/*
701*6695Saguzovsk 				 * If we don't find bigger shadow list on
702*6695Saguzovsk 				 * second search (it may happen since we
703*6695Saguzovsk 				 * dropped bucket lock) keep the entry that
704*6695Saguzovsk 				 * matches our own shadow list.
705*6695Saguzovsk 				 */
706*6695Saguzovsk 				keep = 1;
707*6695Saguzovsk 				goto again;
708*6695Saguzovsk 			}
7090Sstevel@tonic-gate 		}
7100Sstevel@tonic-gate 	}
7110Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
712*6695Saguzovsk 	if (pmtx != NULL) {
713*6695Saguzovsk 		mutex_exit(pmtx);
714*6695Saguzovsk 	}
7150Sstevel@tonic-gate out:
716*6695Saguzovsk 	(*callback)(htag0, addr, len, pp, rw, 0);
717*6695Saguzovsk 	if (npages) {
718*6695Saguzovsk 		mutex_enter(&seg_pmem_mtx);
719*6695Saguzovsk 		ASSERT(seg_plocked >= npages);
720*6695Saguzovsk 		seg_plocked -= npages;
721*6695Saguzovsk 		if (!IS_PFLAGS_WIRED(flags)) {
722*6695Saguzovsk 			ASSERT(seg_plocked_window >= npages);
723*6695Saguzovsk 			seg_plocked_window -= npages;
724*6695Saguzovsk 		}
725*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
726*6695Saguzovsk 	}
727*6695Saguzovsk 
7280Sstevel@tonic-gate }
7290Sstevel@tonic-gate 
730*6695Saguzovsk #ifdef DEBUG
731*6695Saguzovsk static uint32_t p_insert_chk_mtbf = 0;
732*6695Saguzovsk #endif
733*6695Saguzovsk 
7340Sstevel@tonic-gate /*
7350Sstevel@tonic-gate  * The seg_pinsert_check() is used by segment drivers to predict whether
7360Sstevel@tonic-gate  * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
7370Sstevel@tonic-gate  */
738*6695Saguzovsk /*ARGSUSED*/
7390Sstevel@tonic-gate int
740*6695Saguzovsk seg_pinsert_check(struct seg *seg, struct anon_map *amp, caddr_t addr,
741*6695Saguzovsk     size_t len, uint_t flags)
7420Sstevel@tonic-gate {
743*6695Saguzovsk 	ASSERT(seg != NULL);
7440Sstevel@tonic-gate 
745*6695Saguzovsk #ifdef DEBUG
746*6695Saguzovsk 	if (p_insert_chk_mtbf && !(gethrtime() % p_insert_chk_mtbf)) {
7470Sstevel@tonic-gate 		return (SEGP_FAIL);
7480Sstevel@tonic-gate 	}
749*6695Saguzovsk #endif
750*6695Saguzovsk 
751*6695Saguzovsk 	if (seg_pdisabled) {
7520Sstevel@tonic-gate 		return (SEGP_FAIL);
7530Sstevel@tonic-gate 	}
754*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
755*6695Saguzovsk 
756*6695Saguzovsk 	if (IS_PFLAGS_WIRED(flags)) {
757*6695Saguzovsk 		return (SEGP_SUCCESS);
758*6695Saguzovsk 	}
7590Sstevel@tonic-gate 
760*6695Saguzovsk 	if (seg_plocked_window + btop(len) > seg_pmaxwindow) {
761*6695Saguzovsk 		return (SEGP_FAIL);
7620Sstevel@tonic-gate 	}
763*6695Saguzovsk 
764*6695Saguzovsk 	if (freemem < desfree) {
765*6695Saguzovsk 		return (SEGP_FAIL);
766*6695Saguzovsk 	}
767*6695Saguzovsk 
7680Sstevel@tonic-gate 	return (SEGP_SUCCESS);
7690Sstevel@tonic-gate }
7700Sstevel@tonic-gate 
771*6695Saguzovsk #ifdef DEBUG
772*6695Saguzovsk static uint32_t p_insert_mtbf = 0;
773*6695Saguzovsk #endif
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate /*
776*6695Saguzovsk  * Insert address range with shadow list into pagelock cache if there's no
777*6695Saguzovsk  * shadow list already cached for this address range. If the cache is off or
778*6695Saguzovsk  * caching is temporarily disabled or the allowed 'window' is exceeded return
779*6695Saguzovsk  * SEGP_FAIL. Otherwise return SEGP_SUCCESS.
780*6695Saguzovsk  *
781*6695Saguzovsk  * For non wired shadow lists (segvn case) include address in the hashing
782*6695Saguzovsk  * function to avoid linking all the entries from the same segment or amp on
783*6695Saguzovsk  * the same bucket.  amp is used instead of seg if amp is not NULL. Non wired
784*6695Saguzovsk  * pcache entries are also linked on a per segment/amp list so that all
785*6695Saguzovsk  * entries can be found quickly during seg/amp purge without walking the
786*6695Saguzovsk  * entire pcache hash table.  For wired shadow lists (segspt case) we
787*6695Saguzovsk  * don't use address hashing and per segment linking because the caller
788*6695Saguzovsk  * currently inserts only one entry per segment that covers the entire
789*6695Saguzovsk  * segment. If we used per segment linking even for segspt it would complicate
790*6695Saguzovsk  * seg_ppurge_wiredpp() locking.
791*6695Saguzovsk  *
792*6695Saguzovsk  * Both hash bucket and per seg/amp locks need to be held before adding a non
793*6695Saguzovsk  * wired entry to hash and per seg/amp lists. per seg/amp lock should be taken
794*6695Saguzovsk  * first.
795*6695Saguzovsk  *
796*6695Saguzovsk  * This function will also remove from pcache old inactive shadow lists that
797*6695Saguzovsk  * overlap with this request but cover smaller range for the same start
798*6695Saguzovsk  * address.
7990Sstevel@tonic-gate  */
8000Sstevel@tonic-gate int
801*6695Saguzovsk seg_pinsert(struct seg *seg, struct anon_map *amp, caddr_t addr, size_t len,
802*6695Saguzovsk     size_t wlen, struct page **pp, enum seg_rw rw, uint_t flags,
803*6695Saguzovsk     seg_preclaim_cbfunc_t callback)
8040Sstevel@tonic-gate {
8050Sstevel@tonic-gate 	struct seg_pcache *pcp;
8060Sstevel@tonic-gate 	struct seg_phash *hp;
8070Sstevel@tonic-gate 	pgcnt_t npages;
808*6695Saguzovsk 	pcache_link_t *pheadp;
809*6695Saguzovsk 	kmutex_t *pmtx;
810*6695Saguzovsk 	struct seg_pcache *delcallb_list = NULL;
8110Sstevel@tonic-gate 
812*6695Saguzovsk 	ASSERT(seg != NULL);
813*6695Saguzovsk 	ASSERT(rw == S_READ || rw == S_WRITE);
814*6695Saguzovsk 	ASSERT(rw == S_READ || wlen == len);
815*6695Saguzovsk 	ASSERT(rw == S_WRITE || wlen <= len);
816*6695Saguzovsk 	ASSERT(amp == NULL || wlen == len);
817*6695Saguzovsk 
818*6695Saguzovsk #ifdef DEBUG
819*6695Saguzovsk 	if (p_insert_mtbf && !(gethrtime() % p_insert_mtbf)) {
8200Sstevel@tonic-gate 		return (SEGP_FAIL);
8210Sstevel@tonic-gate 	}
822*6695Saguzovsk #endif
823*6695Saguzovsk 
824*6695Saguzovsk 	if (seg_pdisabled) {
8250Sstevel@tonic-gate 		return (SEGP_FAIL);
8260Sstevel@tonic-gate 	}
827*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
828*6695Saguzovsk 
829*6695Saguzovsk 	ASSERT((len & PAGEOFFSET) == 0);
830*6695Saguzovsk 	npages = btop(len);
831*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
832*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags)) {
833*6695Saguzovsk 		if (seg_plocked_window + npages > seg_pmaxwindow) {
834*6695Saguzovsk 			mutex_exit(&seg_pmem_mtx);
8350Sstevel@tonic-gate 			return (SEGP_FAIL);
8360Sstevel@tonic-gate 		}
837*6695Saguzovsk 		seg_plocked_window += npages;
8380Sstevel@tonic-gate 	}
8390Sstevel@tonic-gate 	seg_plocked += npages;
840*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
8410Sstevel@tonic-gate 
842*6695Saguzovsk 	pcp = kmem_cache_alloc(seg_pkmcache, KM_SLEEP);
843*6695Saguzovsk 	/*
844*6695Saguzovsk 	 * If amp is not NULL set htag0 to amp otherwise set it to seg.
845*6695Saguzovsk 	 */
846*6695Saguzovsk 	if (amp == NULL) {
847*6695Saguzovsk 		pcp->p_htag0 = (void *)seg;
848*6695Saguzovsk 		pcp->p_flags = flags & 0xffff;
849*6695Saguzovsk 	} else {
850*6695Saguzovsk 		pcp->p_htag0 = (void *)amp;
851*6695Saguzovsk 		pcp->p_flags = (flags & 0xffff) | SEGP_AMP;
852*6695Saguzovsk 	}
8530Sstevel@tonic-gate 	pcp->p_addr = addr;
8540Sstevel@tonic-gate 	pcp->p_len = len;
855*6695Saguzovsk 	pcp->p_wlen = wlen;
8560Sstevel@tonic-gate 	pcp->p_pp = pp;
857*6695Saguzovsk 	pcp->p_write = (rw == S_WRITE);
8580Sstevel@tonic-gate 	pcp->p_callback = callback;
8590Sstevel@tonic-gate 	pcp->p_active = 1;
8600Sstevel@tonic-gate 
861*6695Saguzovsk 	hp = P_HASHBP(seg, pcp->p_htag0, addr, flags);
862*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags)) {
863*6695Saguzovsk 		int found;
864*6695Saguzovsk 		void *htag0;
865*6695Saguzovsk 		if (amp == NULL) {
866*6695Saguzovsk 			pheadp = &seg->s_phead;
867*6695Saguzovsk 			pmtx = &seg->s_pmtx;
868*6695Saguzovsk 			htag0 = (void *)seg;
869*6695Saguzovsk 		} else {
870*6695Saguzovsk 			pheadp = &amp->a_phead;
871*6695Saguzovsk 			pmtx = &amp->a_pmtx;
872*6695Saguzovsk 			htag0 = (void *)amp;
873*6695Saguzovsk 		}
874*6695Saguzovsk 		mutex_enter(pmtx);
875*6695Saguzovsk 		mutex_enter(&hp->p_hmutex);
876*6695Saguzovsk 		delcallb_list = seg_plookup_checkdup(hp, htag0, addr,
877*6695Saguzovsk 		    len, &found);
878*6695Saguzovsk 		if (found) {
879*6695Saguzovsk 			mutex_exit(&hp->p_hmutex);
880*6695Saguzovsk 			mutex_exit(pmtx);
881*6695Saguzovsk 			mutex_enter(&seg_pmem_mtx);
882*6695Saguzovsk 			seg_plocked -= npages;
883*6695Saguzovsk 			seg_plocked_window -= npages;
884*6695Saguzovsk 			mutex_exit(&seg_pmem_mtx);
885*6695Saguzovsk 			kmem_cache_free(seg_pkmcache, pcp);
886*6695Saguzovsk 			goto out;
887*6695Saguzovsk 		}
888*6695Saguzovsk 		pcp->p_plink.p_lnext = pheadp->p_lnext;
889*6695Saguzovsk 		pcp->p_plink.p_lprev = pheadp;
890*6695Saguzovsk 		pheadp->p_lnext->p_lprev = &pcp->p_plink;
891*6695Saguzovsk 		pheadp->p_lnext = &pcp->p_plink;
892*6695Saguzovsk 	} else {
893*6695Saguzovsk 		mutex_enter(&hp->p_hmutex);
894*6695Saguzovsk 	}
895*6695Saguzovsk 	pcp->p_hashp = hp;
8960Sstevel@tonic-gate 	pcp->p_hnext = hp->p_hnext;
8970Sstevel@tonic-gate 	pcp->p_hprev = (struct seg_pcache *)hp;
8980Sstevel@tonic-gate 	hp->p_hnext->p_hprev = pcp;
8990Sstevel@tonic-gate 	hp->p_hnext = pcp;
900*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags) &&
901*6695Saguzovsk 	    hp->p_hprev == pcp) {
902*6695Saguzovsk 		seg_padd_abuck(hp);
903*6695Saguzovsk 	}
9040Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
905*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags)) {
906*6695Saguzovsk 		mutex_exit(pmtx);
907*6695Saguzovsk 	}
908*6695Saguzovsk 
909*6695Saguzovsk out:
910*6695Saguzovsk 	npages = 0;
911*6695Saguzovsk 	while (delcallb_list != NULL) {
912*6695Saguzovsk 		pcp = delcallb_list;
913*6695Saguzovsk 		delcallb_list = pcp->p_hprev;
914*6695Saguzovsk 		ASSERT(!IS_PCP_WIRED(pcp) && !pcp->p_active);
915*6695Saguzovsk 		(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
916*6695Saguzovsk 		    pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
917*6695Saguzovsk 		npages += btop(pcp->p_len);
918*6695Saguzovsk 		kmem_cache_free(seg_pkmcache, pcp);
919*6695Saguzovsk 	}
920*6695Saguzovsk 	if (npages) {
921*6695Saguzovsk 		ASSERT(!IS_PFLAGS_WIRED(flags));
922*6695Saguzovsk 		mutex_enter(&seg_pmem_mtx);
923*6695Saguzovsk 		ASSERT(seg_plocked >= npages);
924*6695Saguzovsk 		ASSERT(seg_plocked_window >= npages);
925*6695Saguzovsk 		seg_plocked -= npages;
926*6695Saguzovsk 		seg_plocked_window -= npages;
927*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
928*6695Saguzovsk 	}
929*6695Saguzovsk 
9300Sstevel@tonic-gate 	return (SEGP_SUCCESS);
9310Sstevel@tonic-gate }
9320Sstevel@tonic-gate 
9330Sstevel@tonic-gate /*
934*6695Saguzovsk  * purge entries from the pagelock cache if not active
935*6695Saguzovsk  * and not recently used.
9360Sstevel@tonic-gate  */
9370Sstevel@tonic-gate static void
938*6695Saguzovsk seg_ppurge_async(int force)
9390Sstevel@tonic-gate {
9400Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
9410Sstevel@tonic-gate 	struct seg_pcache *pcp;
9420Sstevel@tonic-gate 	struct seg_phash *hp;
9430Sstevel@tonic-gate 	pgcnt_t npages = 0;
9440Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
945*6695Saguzovsk 	pgcnt_t	npgs_to_purge;
946*6695Saguzovsk 	pgcnt_t npgs_purged = 0;
947*6695Saguzovsk 	int hlinks = 0;
948*6695Saguzovsk 	int hlix;
949*6695Saguzovsk 	pcache_link_t *hlinkp;
950*6695Saguzovsk 	pcache_link_t *hlnextp = NULL;
951*6695Saguzovsk 	int lowmem;
952*6695Saguzovsk 	int trim;
953*6695Saguzovsk 
954*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
9550Sstevel@tonic-gate 
9560Sstevel@tonic-gate 	/*
957*6695Saguzovsk 	 * if the cache is off or empty, return
9580Sstevel@tonic-gate 	 */
959*6695Saguzovsk 	if (seg_plocked == 0 || (!force && seg_plocked_window == 0)) {
9600Sstevel@tonic-gate 		return;
9610Sstevel@tonic-gate 	}
962*6695Saguzovsk 
963*6695Saguzovsk 	if (!force) {
964*6695Saguzovsk 		lowmem = 0;
965*6695Saguzovsk 		trim = 0;
966*6695Saguzovsk 		if (freemem < lotsfree + needfree) {
967*6695Saguzovsk 			spgcnt_t fmem = MAX((spgcnt_t)(freemem - needfree), 0);
968*6695Saguzovsk 			if (fmem <= 5 * (desfree >> 2)) {
969*6695Saguzovsk 				lowmem = 1;
970*6695Saguzovsk 			} else if (fmem <= 7 * (lotsfree >> 3)) {
971*6695Saguzovsk 				if (seg_plocked_window >=
972*6695Saguzovsk 				    (availrmem_initial >> 1)) {
973*6695Saguzovsk 					lowmem = 1;
974*6695Saguzovsk 				}
975*6695Saguzovsk 			} else if (fmem < lotsfree) {
976*6695Saguzovsk 				if (seg_plocked_window >=
977*6695Saguzovsk 				    3 * (availrmem_initial >> 2)) {
978*6695Saguzovsk 					lowmem = 1;
979*6695Saguzovsk 				}
980*6695Saguzovsk 			}
981*6695Saguzovsk 		}
982*6695Saguzovsk 		if (seg_plocked_window >= 7 * (seg_pmaxwindow >> 3)) {
983*6695Saguzovsk 			trim = 1;
984*6695Saguzovsk 		}
985*6695Saguzovsk 		if (!lowmem && !trim) {
986*6695Saguzovsk 			return;
987*6695Saguzovsk 		}
988*6695Saguzovsk 		npgs_to_purge = seg_plocked_window >>
989*6695Saguzovsk 		    seg_pshrink_shift;
990*6695Saguzovsk 		if (lowmem) {
991*6695Saguzovsk 			npgs_to_purge = MIN(npgs_to_purge,
992*6695Saguzovsk 			    MAX(seg_pmaxapurge_npages, desfree));
993*6695Saguzovsk 		} else {
994*6695Saguzovsk 			npgs_to_purge = MIN(npgs_to_purge,
995*6695Saguzovsk 			    seg_pmaxapurge_npages);
996*6695Saguzovsk 		}
997*6695Saguzovsk 		if (npgs_to_purge == 0) {
998*6695Saguzovsk 			return;
999*6695Saguzovsk 		}
1000*6695Saguzovsk 	} else {
1001*6695Saguzovsk 		struct seg_phash_wired *hpw;
1002*6695Saguzovsk 
1003*6695Saguzovsk 		ASSERT(seg_phashsize_wired != 0);
1004*6695Saguzovsk 
1005*6695Saguzovsk 		for (hpw = seg_phashtab_wired;
1006*6695Saguzovsk 		    hpw < &seg_phashtab_wired[seg_phashsize_wired]; hpw++) {
1007*6695Saguzovsk 
1008*6695Saguzovsk 			if (hpw->p_hnext == (struct seg_pcache *)hpw) {
1009*6695Saguzovsk 				continue;
1010*6695Saguzovsk 			}
1011*6695Saguzovsk 
1012*6695Saguzovsk 			mutex_enter(&hpw->p_hmutex);
1013*6695Saguzovsk 
1014*6695Saguzovsk 			for (pcp = hpw->p_hnext;
1015*6695Saguzovsk 			    pcp != (struct seg_pcache *)hpw;
1016*6695Saguzovsk 			    pcp = pcp->p_hnext) {
1017*6695Saguzovsk 
1018*6695Saguzovsk 				ASSERT(IS_PCP_WIRED(pcp));
1019*6695Saguzovsk 				ASSERT(pcp->p_hashp ==
1020*6695Saguzovsk 				    (struct seg_phash *)hpw);
1021*6695Saguzovsk 
1022*6695Saguzovsk 				if (pcp->p_active) {
1023*6695Saguzovsk 					continue;
1024*6695Saguzovsk 				}
1025*6695Saguzovsk 				pcp->p_hprev->p_hnext = pcp->p_hnext;
1026*6695Saguzovsk 				pcp->p_hnext->p_hprev = pcp->p_hprev;
1027*6695Saguzovsk 				pcp->p_hprev = delcallb_list;
1028*6695Saguzovsk 				delcallb_list = pcp;
1029*6695Saguzovsk 			}
1030*6695Saguzovsk 			mutex_exit(&hpw->p_hmutex);
1031*6695Saguzovsk 		}
1032*6695Saguzovsk 	}
1033*6695Saguzovsk 
1034*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
1035*6695Saguzovsk 	if (seg_pathr_on) {
1036*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
1037*6695Saguzovsk 		goto runcb;
1038*6695Saguzovsk 	}
1039*6695Saguzovsk 	seg_pathr_on = 1;
1040*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
1041*6695Saguzovsk 	ASSERT(seg_pahcur <= 1);
1042*6695Saguzovsk 	hlix = !seg_pahcur;
1043*6695Saguzovsk 
1044*6695Saguzovsk again:
1045*6695Saguzovsk 	for (hlinkp = seg_pahhead[hlix].p_lnext; hlinkp != &seg_pahhead[hlix];
1046*6695Saguzovsk 	    hlinkp = hlnextp) {
1047*6695Saguzovsk 
1048*6695Saguzovsk 		hlnextp = hlinkp->p_lnext;
1049*6695Saguzovsk 		ASSERT(hlnextp != NULL);
1050*6695Saguzovsk 
1051*6695Saguzovsk 		hp = hlink2phash(hlinkp, hlix);
1052*6695Saguzovsk 		if (hp->p_hnext == (struct seg_pcache *)hp) {
1053*6695Saguzovsk 			seg_pathr_empty_ahb++;
1054*6695Saguzovsk 			continue;
1055*6695Saguzovsk 		}
1056*6695Saguzovsk 		seg_pathr_full_ahb++;
10570Sstevel@tonic-gate 		mutex_enter(&hp->p_hmutex);
1058*6695Saguzovsk 
1059*6695Saguzovsk 		for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
1060*6695Saguzovsk 		    pcp = pcp->p_hnext) {
1061*6695Saguzovsk 			pcache_link_t *pheadp;
1062*6695Saguzovsk 			pcache_link_t *plinkp;
1063*6695Saguzovsk 			void *htag0;
1064*6695Saguzovsk 			kmutex_t *pmtx;
1065*6695Saguzovsk 
1066*6695Saguzovsk 			ASSERT(!IS_PCP_WIRED(pcp));
1067*6695Saguzovsk 			ASSERT(pcp->p_hashp == hp);
1068*6695Saguzovsk 
1069*6695Saguzovsk 			if (pcp->p_active) {
1070*6695Saguzovsk 				continue;
1071*6695Saguzovsk 			}
1072*6695Saguzovsk 			if (!force && pcp->p_ref &&
1073*6695Saguzovsk 			    PCP_AGE(pcp) < seg_pmax_pcpage) {
1074*6695Saguzovsk 				pcp->p_ref = 0;
1075*6695Saguzovsk 				continue;
1076*6695Saguzovsk 			}
1077*6695Saguzovsk 			plinkp = &pcp->p_plink;
1078*6695Saguzovsk 			htag0 = pcp->p_htag0;
1079*6695Saguzovsk 			if (pcp->p_flags & SEGP_AMP) {
1080*6695Saguzovsk 				pheadp = &((amp_t *)htag0)->a_phead;
1081*6695Saguzovsk 				pmtx = &((amp_t *)htag0)->a_pmtx;
1082*6695Saguzovsk 			} else {
1083*6695Saguzovsk 				pheadp = &((seg_t *)htag0)->s_phead;
1084*6695Saguzovsk 				pmtx = &((seg_t *)htag0)->s_pmtx;
1085*6695Saguzovsk 			}
1086*6695Saguzovsk 			if (!mutex_tryenter(pmtx)) {
1087*6695Saguzovsk 				continue;
1088*6695Saguzovsk 			}
1089*6695Saguzovsk 			ASSERT(pheadp->p_lnext != pheadp);
1090*6695Saguzovsk 			ASSERT(pheadp->p_lprev != pheadp);
1091*6695Saguzovsk 			plinkp->p_lprev->p_lnext =
1092*6695Saguzovsk 			    plinkp->p_lnext;
1093*6695Saguzovsk 			plinkp->p_lnext->p_lprev =
1094*6695Saguzovsk 			    plinkp->p_lprev;
1095*6695Saguzovsk 			pcp->p_hprev->p_hnext = pcp->p_hnext;
1096*6695Saguzovsk 			pcp->p_hnext->p_hprev = pcp->p_hprev;
1097*6695Saguzovsk 			mutex_exit(pmtx);
1098*6695Saguzovsk 			pcp->p_hprev = delcallb_list;
1099*6695Saguzovsk 			delcallb_list = pcp;
1100*6695Saguzovsk 			npgs_purged += btop(pcp->p_len);
1101*6695Saguzovsk 		}
1102*6695Saguzovsk 		if (hp->p_hnext == (struct seg_pcache *)hp) {
1103*6695Saguzovsk 			seg_premove_abuck(hp, 1);
1104*6695Saguzovsk 		}
1105*6695Saguzovsk 		mutex_exit(&hp->p_hmutex);
1106*6695Saguzovsk 		if (npgs_purged >= seg_plocked_window) {
1107*6695Saguzovsk 			break;
1108*6695Saguzovsk 		}
1109*6695Saguzovsk 		if (!force) {
1110*6695Saguzovsk 			if (npgs_purged >= npgs_to_purge) {
1111*6695Saguzovsk 				break;
1112*6695Saguzovsk 			}
1113*6695Saguzovsk 			if (!trim && !(seg_pathr_full_ahb & 15)) {
1114*6695Saguzovsk 				ASSERT(lowmem);
1115*6695Saguzovsk 				if (freemem >= lotsfree + needfree) {
1116*6695Saguzovsk 					break;
1117*6695Saguzovsk 				}
1118*6695Saguzovsk 			}
1119*6695Saguzovsk 		}
1120*6695Saguzovsk 	}
1121*6695Saguzovsk 
1122*6695Saguzovsk 	if (hlinkp == &seg_pahhead[hlix]) {
1123*6695Saguzovsk 		/*
1124*6695Saguzovsk 		 * We processed the entire hlix active bucket list
1125*6695Saguzovsk 		 * but didn't find enough pages to reclaim.
1126*6695Saguzovsk 		 * Switch the lists and walk the other list
1127*6695Saguzovsk 		 * if we haven't done it yet.
1128*6695Saguzovsk 		 */
1129*6695Saguzovsk 		mutex_enter(&seg_pmem_mtx);
1130*6695Saguzovsk 		ASSERT(seg_pathr_on);
1131*6695Saguzovsk 		ASSERT(seg_pahcur == !hlix);
1132*6695Saguzovsk 		seg_pahcur = hlix;
1133*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
1134*6695Saguzovsk 		if (++hlinks < 2) {
1135*6695Saguzovsk 			hlix = !hlix;
1136*6695Saguzovsk 			goto again;
1137*6695Saguzovsk 		}
1138*6695Saguzovsk 	} else if ((hlinkp = hlnextp) != &seg_pahhead[hlix] &&
1139*6695Saguzovsk 	    seg_pahhead[hlix].p_lnext != hlinkp) {
1140*6695Saguzovsk 		ASSERT(hlinkp != NULL);
1141*6695Saguzovsk 		ASSERT(hlinkp->p_lprev != &seg_pahhead[hlix]);
1142*6695Saguzovsk 		ASSERT(seg_pahhead[hlix].p_lnext != &seg_pahhead[hlix]);
1143*6695Saguzovsk 		ASSERT(seg_pahhead[hlix].p_lprev != &seg_pahhead[hlix]);
11440Sstevel@tonic-gate 
11450Sstevel@tonic-gate 		/*
1146*6695Saguzovsk 		 * Reinsert the header to point to hlinkp
1147*6695Saguzovsk 		 * so that we start from hlinkp bucket next time around.
11480Sstevel@tonic-gate 		 */
1149*6695Saguzovsk 		seg_pahhead[hlix].p_lnext->p_lprev = seg_pahhead[hlix].p_lprev;
1150*6695Saguzovsk 		seg_pahhead[hlix].p_lprev->p_lnext = seg_pahhead[hlix].p_lnext;
1151*6695Saguzovsk 		seg_pahhead[hlix].p_lnext = hlinkp;
1152*6695Saguzovsk 		seg_pahhead[hlix].p_lprev = hlinkp->p_lprev;
1153*6695Saguzovsk 		hlinkp->p_lprev->p_lnext = &seg_pahhead[hlix];
1154*6695Saguzovsk 		hlinkp->p_lprev = &seg_pahhead[hlix];
1155*6695Saguzovsk 	}
1156*6695Saguzovsk 
1157*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
1158*6695Saguzovsk 	ASSERT(seg_pathr_on);
1159*6695Saguzovsk 	seg_pathr_on = 0;
1160*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
11610Sstevel@tonic-gate 
1162*6695Saguzovsk runcb:
1163*6695Saguzovsk 	/*
1164*6695Saguzovsk 	 * Run the delayed callback list. segments/amps can't go away until
1165*6695Saguzovsk 	 * callback is executed since they must have non 0 softlockcnt. That's
1166*6695Saguzovsk 	 * why we don't need to hold as/seg/amp locks to execute the callback.
1167*6695Saguzovsk 	 */
1168*6695Saguzovsk 	while (delcallb_list != NULL) {
1169*6695Saguzovsk 		pcp = delcallb_list;
1170*6695Saguzovsk 		delcallb_list = pcp->p_hprev;
1171*6695Saguzovsk 		ASSERT(!pcp->p_active);
1172*6695Saguzovsk 		(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1173*6695Saguzovsk 		    pcp->p_len, pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 1);
1174*6695Saguzovsk 		npages += btop(pcp->p_len);
1175*6695Saguzovsk 		if (!IS_PCP_WIRED(pcp)) {
1176*6695Saguzovsk 			npages_window += btop(pcp->p_len);
1177*6695Saguzovsk 		}
1178*6695Saguzovsk 		kmem_cache_free(seg_pkmcache, pcp);
1179*6695Saguzovsk 	}
1180*6695Saguzovsk 	if (npages) {
1181*6695Saguzovsk 		mutex_enter(&seg_pmem_mtx);
1182*6695Saguzovsk 		ASSERT(seg_plocked >= npages);
1183*6695Saguzovsk 		ASSERT(seg_plocked_window >= npages_window);
1184*6695Saguzovsk 		seg_plocked -= npages;
1185*6695Saguzovsk 		seg_plocked_window -= npages_window;
1186*6695Saguzovsk 		mutex_exit(&seg_pmem_mtx);
1187*6695Saguzovsk 	}
1188*6695Saguzovsk }
1189*6695Saguzovsk 
1190*6695Saguzovsk /*
1191*6695Saguzovsk  * Remove cached pages for segment(s) entries from hashtable.  The segments
1192*6695Saguzovsk  * are identified by pp array. This is useful for multiple seg's cached on
1193*6695Saguzovsk  * behalf of dummy segment (ISM/DISM) with common pp array.
1194*6695Saguzovsk  */
1195*6695Saguzovsk void
1196*6695Saguzovsk seg_ppurge_wiredpp(struct page **pp)
1197*6695Saguzovsk {
1198*6695Saguzovsk 	struct seg_pcache *pcp;
1199*6695Saguzovsk 	struct seg_phash_wired *hp;
1200*6695Saguzovsk 	pgcnt_t npages = 0;
1201*6695Saguzovsk 	struct	seg_pcache *delcallb_list = NULL;
1202*6695Saguzovsk 
1203*6695Saguzovsk 	/*
1204*6695Saguzovsk 	 * if the cache is empty, return
1205*6695Saguzovsk 	 */
1206*6695Saguzovsk 	if (seg_plocked == 0) {
1207*6695Saguzovsk 		return;
1208*6695Saguzovsk 	}
1209*6695Saguzovsk 	ASSERT(seg_phashsize_wired != 0);
1210*6695Saguzovsk 
1211*6695Saguzovsk 	for (hp = seg_phashtab_wired;
1212*6695Saguzovsk 	    hp < &seg_phashtab_wired[seg_phashsize_wired]; hp++) {
1213*6695Saguzovsk 		if (hp->p_hnext == (struct seg_pcache *)hp) {
1214*6695Saguzovsk 			continue;
1215*6695Saguzovsk 		}
1216*6695Saguzovsk 		mutex_enter(&hp->p_hmutex);
1217*6695Saguzovsk 		pcp = hp->p_hnext;
1218*6695Saguzovsk 		while (pcp != (struct seg_pcache *)hp) {
1219*6695Saguzovsk 			ASSERT(pcp->p_hashp == (struct seg_phash *)hp);
1220*6695Saguzovsk 			ASSERT(IS_PCP_WIRED(pcp));
12210Sstevel@tonic-gate 			/*
1222*6695Saguzovsk 			 * purge entries which are not active
12230Sstevel@tonic-gate 			 */
1224*6695Saguzovsk 			if (!pcp->p_active && pcp->p_pp == pp) {
1225*6695Saguzovsk 				ASSERT(pcp->p_htag0 != NULL);
1226*6695Saguzovsk 				pcp->p_hprev->p_hnext = pcp->p_hnext;
1227*6695Saguzovsk 				pcp->p_hnext->p_hprev = pcp->p_hprev;
1228*6695Saguzovsk 				pcp->p_hprev = delcallb_list;
1229*6695Saguzovsk 				delcallb_list = pcp;
12300Sstevel@tonic-gate 			}
12310Sstevel@tonic-gate 			pcp = pcp->p_hnext;
12320Sstevel@tonic-gate 		}
12330Sstevel@tonic-gate 		mutex_exit(&hp->p_hmutex);
1234*6695Saguzovsk 		/*
1235*6695Saguzovsk 		 * segments can't go away until callback is executed since
1236*6695Saguzovsk 		 * they must have non 0 softlockcnt. That's why we don't
1237*6695Saguzovsk 		 * need to hold as/seg locks to execute the callback.
1238*6695Saguzovsk 		 */
1239*6695Saguzovsk 		while (delcallb_list != NULL) {
1240*6695Saguzovsk 			int done;
1241*6695Saguzovsk 			pcp = delcallb_list;
1242*6695Saguzovsk 			delcallb_list = pcp->p_hprev;
1243*6695Saguzovsk 			ASSERT(!pcp->p_active);
1244*6695Saguzovsk 			done = (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr,
1245*6695Saguzovsk 			    pcp->p_len, pcp->p_pp,
1246*6695Saguzovsk 			    pcp->p_write ? S_WRITE : S_READ, 1);
1247*6695Saguzovsk 			npages += btop(pcp->p_len);
1248*6695Saguzovsk 			ASSERT(IS_PCP_WIRED(pcp));
1249*6695Saguzovsk 			kmem_cache_free(seg_pkmcache, pcp);
1250*6695Saguzovsk 			if (done) {
1251*6695Saguzovsk 				ASSERT(delcallb_list == NULL);
1252*6695Saguzovsk 				goto out;
1253*6695Saguzovsk 			}
1254*6695Saguzovsk 		}
12550Sstevel@tonic-gate 	}
12560Sstevel@tonic-gate 
1257*6695Saguzovsk out:
1258*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
1259*6695Saguzovsk 	ASSERT(seg_plocked >= npages);
12600Sstevel@tonic-gate 	seg_plocked -= npages;
1261*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
12620Sstevel@tonic-gate }
12630Sstevel@tonic-gate 
12640Sstevel@tonic-gate /*
12650Sstevel@tonic-gate  * purge all entries for a given segment. Since we
12660Sstevel@tonic-gate  * callback into the segment driver directly for page
12670Sstevel@tonic-gate  * reclaim the caller needs to hold the right locks.
12680Sstevel@tonic-gate  */
12690Sstevel@tonic-gate void
1270*6695Saguzovsk seg_ppurge(struct seg *seg, struct anon_map *amp, uint_t flags)
12710Sstevel@tonic-gate {
12720Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
12730Sstevel@tonic-gate 	struct seg_pcache *pcp;
12740Sstevel@tonic-gate 	struct seg_phash *hp;
12750Sstevel@tonic-gate 	pgcnt_t npages = 0;
1276*6695Saguzovsk 	void *htag0;
12770Sstevel@tonic-gate 
1278*6695Saguzovsk 	if (seg_plocked == 0) {
12790Sstevel@tonic-gate 		return;
12800Sstevel@tonic-gate 	}
1281*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
1282*6695Saguzovsk 
1283*6695Saguzovsk 	/*
1284*6695Saguzovsk 	 * If amp is not NULL use amp as a lookup tag otherwise use seg
1285*6695Saguzovsk 	 * as a lookup tag.
1286*6695Saguzovsk 	 */
1287*6695Saguzovsk 	htag0 = (amp == NULL ? (void *)seg : (void *)amp);
1288*6695Saguzovsk 	ASSERT(htag0 != NULL);
1289*6695Saguzovsk 	if (IS_PFLAGS_WIRED(flags)) {
1290*6695Saguzovsk 		hp = P_HASHBP(seg, htag0, 0, flags);
1291*6695Saguzovsk 		mutex_enter(&hp->p_hmutex);
1292*6695Saguzovsk 		pcp = hp->p_hnext;
1293*6695Saguzovsk 		while (pcp != (struct seg_pcache *)hp) {
1294*6695Saguzovsk 			ASSERT(pcp->p_hashp == hp);
1295*6695Saguzovsk 			ASSERT(IS_PCP_WIRED(pcp));
1296*6695Saguzovsk 			if (pcp->p_htag0 == htag0) {
1297*6695Saguzovsk 				if (pcp->p_active) {
1298*6695Saguzovsk 					break;
1299*6695Saguzovsk 				}
1300*6695Saguzovsk 				pcp->p_hprev->p_hnext = pcp->p_hnext;
1301*6695Saguzovsk 				pcp->p_hnext->p_hprev = pcp->p_hprev;
1302*6695Saguzovsk 				pcp->p_hprev = delcallb_list;
1303*6695Saguzovsk 				delcallb_list = pcp;
1304*6695Saguzovsk 			}
1305*6695Saguzovsk 			pcp = pcp->p_hnext;
1306*6695Saguzovsk 		}
1307*6695Saguzovsk 		mutex_exit(&hp->p_hmutex);
1308*6695Saguzovsk 	} else {
1309*6695Saguzovsk 		pcache_link_t *plinkp;
1310*6695Saguzovsk 		pcache_link_t *pheadp;
1311*6695Saguzovsk 		kmutex_t *pmtx;
1312*6695Saguzovsk 
1313*6695Saguzovsk 		if (amp == NULL) {
1314*6695Saguzovsk 			ASSERT(seg != NULL);
1315*6695Saguzovsk 			pheadp = &seg->s_phead;
1316*6695Saguzovsk 			pmtx = &seg->s_pmtx;
1317*6695Saguzovsk 		} else {
1318*6695Saguzovsk 			pheadp = &amp->a_phead;
1319*6695Saguzovsk 			pmtx = &amp->a_pmtx;
1320*6695Saguzovsk 		}
1321*6695Saguzovsk 		mutex_enter(pmtx);
1322*6695Saguzovsk 		while ((plinkp = pheadp->p_lnext) != pheadp) {
1323*6695Saguzovsk 			pcp = plink2pcache(plinkp);
1324*6695Saguzovsk 			ASSERT(!IS_PCP_WIRED(pcp));
1325*6695Saguzovsk 			ASSERT(pcp->p_htag0 == htag0);
1326*6695Saguzovsk 			hp = pcp->p_hashp;
1327*6695Saguzovsk 			mutex_enter(&hp->p_hmutex);
13280Sstevel@tonic-gate 			if (pcp->p_active) {
1329*6695Saguzovsk 				mutex_exit(&hp->p_hmutex);
13300Sstevel@tonic-gate 				break;
13310Sstevel@tonic-gate 			}
1332*6695Saguzovsk 			ASSERT(plinkp->p_lprev == pheadp);
1333*6695Saguzovsk 			pheadp->p_lnext = plinkp->p_lnext;
1334*6695Saguzovsk 			plinkp->p_lnext->p_lprev = pheadp;
13350Sstevel@tonic-gate 			pcp->p_hprev->p_hnext = pcp->p_hnext;
13360Sstevel@tonic-gate 			pcp->p_hnext->p_hprev = pcp->p_hprev;
13370Sstevel@tonic-gate 			pcp->p_hprev = delcallb_list;
13380Sstevel@tonic-gate 			delcallb_list = pcp;
1339*6695Saguzovsk 			if (hp->p_hnext == (struct seg_pcache *)hp) {
1340*6695Saguzovsk 				seg_premove_abuck(hp, 0);
1341*6695Saguzovsk 			}
1342*6695Saguzovsk 			mutex_exit(&hp->p_hmutex);
13430Sstevel@tonic-gate 		}
1344*6695Saguzovsk 		mutex_exit(pmtx);
13450Sstevel@tonic-gate 	}
13460Sstevel@tonic-gate 	while (delcallb_list != NULL) {
13470Sstevel@tonic-gate 		pcp = delcallb_list;
13480Sstevel@tonic-gate 		delcallb_list = pcp->p_hprev;
1349*6695Saguzovsk 		ASSERT(!pcp->p_active);
1350*6695Saguzovsk 		(void) (*pcp->p_callback)(pcp->p_htag0, pcp->p_addr, pcp->p_len,
1351*6695Saguzovsk 		    pcp->p_pp, pcp->p_write ? S_WRITE : S_READ, 0);
1352*6695Saguzovsk 		npages += btop(pcp->p_len);
1353*6695Saguzovsk 		kmem_cache_free(seg_pkmcache, pcp);
13540Sstevel@tonic-gate 	}
1355*6695Saguzovsk 	mutex_enter(&seg_pmem_mtx);
1356*6695Saguzovsk 	ASSERT(seg_plocked >= npages);
13570Sstevel@tonic-gate 	seg_plocked -= npages;
1358*6695Saguzovsk 	if (!IS_PFLAGS_WIRED(flags)) {
1359*6695Saguzovsk 		ASSERT(seg_plocked_window >= npages);
1360*6695Saguzovsk 		seg_plocked_window -= npages;
1361*6695Saguzovsk 	}
1362*6695Saguzovsk 	mutex_exit(&seg_pmem_mtx);
13630Sstevel@tonic-gate }
13640Sstevel@tonic-gate 
13650Sstevel@tonic-gate static void seg_pinit_mem_config(void);
13660Sstevel@tonic-gate 
13670Sstevel@tonic-gate /*
13680Sstevel@tonic-gate  * setup the pagelock cache
13690Sstevel@tonic-gate  */
13700Sstevel@tonic-gate static void
13710Sstevel@tonic-gate seg_pinit(void)
13720Sstevel@tonic-gate {
13730Sstevel@tonic-gate 	struct seg_phash *hp;
1374*6695Saguzovsk 	ulong_t i;
1375*6695Saguzovsk 	pgcnt_t physmegs;
1376*6695Saguzovsk 
1377*6695Saguzovsk 	seg_plocked = 0;
1378*6695Saguzovsk 	seg_plocked_window = 0;
1379*6695Saguzovsk 
1380*6695Saguzovsk 	if (segpcache_enabled == 0) {
1381*6695Saguzovsk 		seg_phashsize_win = 0;
1382*6695Saguzovsk 		seg_phashsize_wired = 0;
1383*6695Saguzovsk 		seg_pdisabled = 1;
1384*6695Saguzovsk 		return;
1385*6695Saguzovsk 	}
13860Sstevel@tonic-gate 
1387*6695Saguzovsk 	seg_pdisabled = 0;
1388*6695Saguzovsk 	seg_pkmcache = kmem_cache_create("seg_pcache",
1389*6695Saguzovsk 	    sizeof (struct seg_pcache), 0, NULL, NULL, NULL, NULL, NULL, 0);
1390*6695Saguzovsk 	if (segpcache_pcp_maxage_ticks <= 0) {
1391*6695Saguzovsk 		segpcache_pcp_maxage_ticks = segpcache_pcp_maxage_sec * hz;
1392*6695Saguzovsk 	}
1393*6695Saguzovsk 	seg_pmax_pcpage = segpcache_pcp_maxage_ticks;
1394*6695Saguzovsk 	seg_pathr_empty_ahb = 0;
1395*6695Saguzovsk 	seg_pathr_full_ahb = 0;
1396*6695Saguzovsk 	seg_pshrink_shift = segpcache_shrink_shift;
1397*6695Saguzovsk 	seg_pmaxapurge_npages = btop(segpcache_maxapurge_bytes);
13980Sstevel@tonic-gate 
1399*6695Saguzovsk 	mutex_init(&seg_pcache_mtx, NULL, MUTEX_DEFAULT, NULL);
1400*6695Saguzovsk 	mutex_init(&seg_pmem_mtx, NULL, MUTEX_DEFAULT, NULL);
1401*6695Saguzovsk 	mutex_init(&seg_pasync_mtx, NULL, MUTEX_DEFAULT, NULL);
1402*6695Saguzovsk 	cv_init(&seg_pasync_cv, NULL, CV_DEFAULT, NULL);
1403*6695Saguzovsk 
1404*6695Saguzovsk 	physmegs = physmem >> (20 - PAGESHIFT);
14050Sstevel@tonic-gate 
1406*6695Saguzovsk 	/*
1407*6695Saguzovsk 	 * If segpcache_hashsize_win was not set in /etc/system or it has
1408*6695Saguzovsk 	 * absurd value set it to a default.
1409*6695Saguzovsk 	 */
1410*6695Saguzovsk 	if (segpcache_hashsize_win == 0 || segpcache_hashsize_win > physmem) {
1411*6695Saguzovsk 		/*
1412*6695Saguzovsk 		 * Create one bucket per 32K (or at least per 8 pages) of
1413*6695Saguzovsk 		 * available memory.
1414*6695Saguzovsk 		 */
1415*6695Saguzovsk 		pgcnt_t pages_per_bucket = MAX(btop(32 * 1024), 8);
1416*6695Saguzovsk 		segpcache_hashsize_win = MAX(1024, physmem / pages_per_bucket);
1417*6695Saguzovsk 	}
1418*6695Saguzovsk 	if (!ISP2(segpcache_hashsize_win)) {
1419*6695Saguzovsk 		ulong_t rndfac = ~(1UL <<
1420*6695Saguzovsk 		    (highbit(segpcache_hashsize_win) - 1));
1421*6695Saguzovsk 		rndfac &= segpcache_hashsize_win;
1422*6695Saguzovsk 		segpcache_hashsize_win += rndfac;
1423*6695Saguzovsk 		segpcache_hashsize_win = 1 <<
1424*6695Saguzovsk 		    (highbit(segpcache_hashsize_win) - 1);
1425*6695Saguzovsk 	}
1426*6695Saguzovsk 	seg_phashsize_win = segpcache_hashsize_win;
1427*6695Saguzovsk 	seg_phashtab_win = kmem_zalloc(
1428*6695Saguzovsk 	    seg_phashsize_win * sizeof (struct seg_phash),
1429*6695Saguzovsk 	    KM_SLEEP);
1430*6695Saguzovsk 	for (i = 0; i < seg_phashsize_win; i++) {
1431*6695Saguzovsk 		hp = &seg_phashtab_win[i];
1432*6695Saguzovsk 		hp->p_hnext = (struct seg_pcache *)hp;
1433*6695Saguzovsk 		hp->p_hprev = (struct seg_pcache *)hp;
1434*6695Saguzovsk 		mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1435*6695Saguzovsk 	}
14360Sstevel@tonic-gate 
1437*6695Saguzovsk 	seg_pahcur = 0;
1438*6695Saguzovsk 	seg_pathr_on = 0;
1439*6695Saguzovsk 	seg_pahhead[0].p_lnext = &seg_pahhead[0];
1440*6695Saguzovsk 	seg_pahhead[0].p_lprev = &seg_pahhead[0];
1441*6695Saguzovsk 	seg_pahhead[1].p_lnext = &seg_pahhead[1];
1442*6695Saguzovsk 	seg_pahhead[1].p_lprev = &seg_pahhead[1];
1443*6695Saguzovsk 
1444*6695Saguzovsk 	/*
1445*6695Saguzovsk 	 * If segpcache_hashsize_wired was not set in /etc/system or it has
1446*6695Saguzovsk 	 * absurd value set it to a default.
1447*6695Saguzovsk 	 */
1448*6695Saguzovsk 	if (segpcache_hashsize_wired == 0 ||
1449*6695Saguzovsk 	    segpcache_hashsize_wired > physmem / 4) {
1450*6695Saguzovsk 		/*
1451*6695Saguzovsk 		 * Choose segpcache_hashsize_wired based on physmem.
1452*6695Saguzovsk 		 * Create a bucket per 128K bytes upto 256K buckets.
1453*6695Saguzovsk 		 */
1454*6695Saguzovsk 		if (physmegs < 20 * 1024) {
1455*6695Saguzovsk 			segpcache_hashsize_wired = MAX(1024, physmegs << 3);
1456*6695Saguzovsk 		} else {
1457*6695Saguzovsk 			segpcache_hashsize_wired = 256 * 1024;
14580Sstevel@tonic-gate 		}
14590Sstevel@tonic-gate 	}
1460*6695Saguzovsk 	if (!ISP2(segpcache_hashsize_wired)) {
1461*6695Saguzovsk 		segpcache_hashsize_wired = 1 <<
1462*6695Saguzovsk 		    highbit(segpcache_hashsize_wired);
1463*6695Saguzovsk 	}
1464*6695Saguzovsk 	seg_phashsize_wired = segpcache_hashsize_wired;
1465*6695Saguzovsk 	seg_phashtab_wired = kmem_zalloc(
1466*6695Saguzovsk 	    seg_phashsize_wired * sizeof (struct seg_phash_wired), KM_SLEEP);
1467*6695Saguzovsk 	for (i = 0; i < seg_phashsize_wired; i++) {
1468*6695Saguzovsk 		hp = (struct seg_phash *)&seg_phashtab_wired[i];
1469*6695Saguzovsk 		hp->p_hnext = (struct seg_pcache *)hp;
1470*6695Saguzovsk 		hp->p_hprev = (struct seg_pcache *)hp;
1471*6695Saguzovsk 		mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
1472*6695Saguzovsk 	}
14730Sstevel@tonic-gate 
1474*6695Saguzovsk 	if (segpcache_maxwindow == 0) {
1475*6695Saguzovsk 		if (physmegs < 64) {
1476*6695Saguzovsk 			/* 3% of memory */
1477*6695Saguzovsk 			segpcache_maxwindow = availrmem >> 5;
1478*6695Saguzovsk 		} else if (physmegs < 512) {
1479*6695Saguzovsk 			/* 12% of memory */
1480*6695Saguzovsk 			segpcache_maxwindow = availrmem >> 3;
1481*6695Saguzovsk 		} else if (physmegs < 1024) {
1482*6695Saguzovsk 			/* 25% of memory */
1483*6695Saguzovsk 			segpcache_maxwindow = availrmem >> 2;
1484*6695Saguzovsk 		} else if (physmegs < 2048) {
1485*6695Saguzovsk 			/* 50% of memory */
1486*6695Saguzovsk 			segpcache_maxwindow = availrmem >> 1;
1487*6695Saguzovsk 		} else {
1488*6695Saguzovsk 			/* no limit */
1489*6695Saguzovsk 			segpcache_maxwindow = (pgcnt_t)-1;
1490*6695Saguzovsk 		}
1491*6695Saguzovsk 	}
1492*6695Saguzovsk 	seg_pmaxwindow = segpcache_maxwindow;
14930Sstevel@tonic-gate 	seg_pinit_mem_config();
14940Sstevel@tonic-gate }
14950Sstevel@tonic-gate 
14960Sstevel@tonic-gate /*
14970Sstevel@tonic-gate  * called by pageout if memory is low
14980Sstevel@tonic-gate  */
14990Sstevel@tonic-gate void
15000Sstevel@tonic-gate seg_preap(void)
15010Sstevel@tonic-gate {
15020Sstevel@tonic-gate 	/*
1503*6695Saguzovsk 	 * if the cache is off or empty, return
15040Sstevel@tonic-gate 	 */
1505*6695Saguzovsk 	if (seg_plocked_window == 0) {
15060Sstevel@tonic-gate 		return;
15070Sstevel@tonic-gate 	}
1508*6695Saguzovsk 	ASSERT(seg_phashsize_win != 0);
1509*6695Saguzovsk 
1510*6695Saguzovsk 	/*
1511*6695Saguzovsk 	 * If somebody is already purging pcache
1512*6695Saguzovsk 	 * just return.
1513*6695Saguzovsk 	 */
1514*6695Saguzovsk 	if (seg_pdisabled) {
1515*6695Saguzovsk 		return;
1516*6695Saguzovsk 	}
1517*6695Saguzovsk 
1518*6695Saguzovsk 	cv_signal(&seg_pasync_cv);
15190Sstevel@tonic-gate }
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate /*
15220Sstevel@tonic-gate  * run as a backgroud thread and reclaim pagelock
15230Sstevel@tonic-gate  * pages which have not been used recently
15240Sstevel@tonic-gate  */
15250Sstevel@tonic-gate void
15260Sstevel@tonic-gate seg_pasync_thread(void)
15270Sstevel@tonic-gate {
15280Sstevel@tonic-gate 	callb_cpr_t cpr_info;
15290Sstevel@tonic-gate 
1530*6695Saguzovsk 	if (seg_phashsize_win == 0) {
1531*6695Saguzovsk 		thread_exit();
1532*6695Saguzovsk 		/*NOTREACHED*/
15330Sstevel@tonic-gate 	}
15340Sstevel@tonic-gate 
1535*6695Saguzovsk 	seg_pasync_thr = curthread;
1536*6695Saguzovsk 
1537*6695Saguzovsk 	CALLB_CPR_INIT(&cpr_info, &seg_pasync_mtx,
1538*6695Saguzovsk 	    callb_generic_cpr, "seg_pasync");
1539*6695Saguzovsk 
1540*6695Saguzovsk 	if (segpcache_reap_ticks <= 0) {
1541*6695Saguzovsk 		segpcache_reap_ticks = segpcache_reap_sec * hz;
1542*6695Saguzovsk 	}
15430Sstevel@tonic-gate 
1544*6695Saguzovsk 	mutex_enter(&seg_pasync_mtx);
1545*6695Saguzovsk 	for (;;) {
1546*6695Saguzovsk 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
1547*6695Saguzovsk 		(void) cv_timedwait(&seg_pasync_cv, &seg_pasync_mtx,
1548*6695Saguzovsk 		    lbolt + segpcache_reap_ticks);
1549*6695Saguzovsk 		CALLB_CPR_SAFE_END(&cpr_info, &seg_pasync_mtx);
1550*6695Saguzovsk 		if (seg_pdisabled == 0) {
1551*6695Saguzovsk 			seg_ppurge_async(0);
1552*6695Saguzovsk 		}
15530Sstevel@tonic-gate 	}
15540Sstevel@tonic-gate }
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate static struct kmem_cache *seg_cache;
15570Sstevel@tonic-gate 
15580Sstevel@tonic-gate /*
15590Sstevel@tonic-gate  * Initialize segment management data structures.
15600Sstevel@tonic-gate  */
15610Sstevel@tonic-gate void
15620Sstevel@tonic-gate seg_init(void)
15630Sstevel@tonic-gate {
15640Sstevel@tonic-gate 	kstat_t *ksp;
15650Sstevel@tonic-gate 
1566*6695Saguzovsk 	seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
1567*6695Saguzovsk 	    0, NULL, NULL, NULL, NULL, NULL, 0);
15680Sstevel@tonic-gate 
15690Sstevel@tonic-gate 	ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
15705928Sjj204856 	    segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
15710Sstevel@tonic-gate 	if (ksp) {
15720Sstevel@tonic-gate 		ksp->ks_data = (void *)segadvstat_ptr;
15730Sstevel@tonic-gate 		kstat_install(ksp);
15740Sstevel@tonic-gate 	}
15750Sstevel@tonic-gate 
15760Sstevel@tonic-gate 	seg_pinit();
15770Sstevel@tonic-gate }
15780Sstevel@tonic-gate 
15790Sstevel@tonic-gate /*
15800Sstevel@tonic-gate  * Allocate a segment to cover [base, base+size]
15810Sstevel@tonic-gate  * and attach it to the specified address space.
15820Sstevel@tonic-gate  */
15830Sstevel@tonic-gate struct seg *
15840Sstevel@tonic-gate seg_alloc(struct as *as, caddr_t base, size_t size)
15850Sstevel@tonic-gate {
15860Sstevel@tonic-gate 	struct seg *new;
15870Sstevel@tonic-gate 	caddr_t segbase;
15880Sstevel@tonic-gate 	size_t segsize;
15890Sstevel@tonic-gate 
15900Sstevel@tonic-gate 	segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
15910Sstevel@tonic-gate 	segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
15920Sstevel@tonic-gate 	    (uintptr_t)segbase;
15930Sstevel@tonic-gate 
15940Sstevel@tonic-gate 	if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
15950Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
15960Sstevel@tonic-gate 
15970Sstevel@tonic-gate 	if (as != &kas &&
15980Sstevel@tonic-gate 	    valid_usr_range(segbase, segsize, 0, as,
15990Sstevel@tonic-gate 	    as->a_userlimit) != RANGE_OKAY)
16000Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
16010Sstevel@tonic-gate 
16020Sstevel@tonic-gate 	new = kmem_cache_alloc(seg_cache, KM_SLEEP);
16030Sstevel@tonic-gate 	new->s_ops = NULL;
16040Sstevel@tonic-gate 	new->s_data = NULL;
16050Sstevel@tonic-gate 	new->s_szc = 0;
16060Sstevel@tonic-gate 	new->s_flags = 0;
1607*6695Saguzovsk 	mutex_init(&new->s_pmtx, NULL, MUTEX_DEFAULT, NULL);
1608*6695Saguzovsk 	new->s_phead.p_lnext = &new->s_phead;
1609*6695Saguzovsk 	new->s_phead.p_lprev = &new->s_phead;
16100Sstevel@tonic-gate 	if (seg_attach(as, segbase, segsize, new) < 0) {
16110Sstevel@tonic-gate 		kmem_cache_free(seg_cache, new);
16120Sstevel@tonic-gate 		return ((struct seg *)NULL);
16130Sstevel@tonic-gate 	}
16140Sstevel@tonic-gate 	/* caller must fill in ops, data */
16150Sstevel@tonic-gate 	return (new);
16160Sstevel@tonic-gate }
16170Sstevel@tonic-gate 
16180Sstevel@tonic-gate /*
16190Sstevel@tonic-gate  * Attach a segment to the address space.  Used by seg_alloc()
16200Sstevel@tonic-gate  * and for kernel startup to attach to static segments.
16210Sstevel@tonic-gate  */
16220Sstevel@tonic-gate int
16230Sstevel@tonic-gate seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
16240Sstevel@tonic-gate {
16250Sstevel@tonic-gate 	seg->s_as = as;
16260Sstevel@tonic-gate 	seg->s_base = base;
16270Sstevel@tonic-gate 	seg->s_size = size;
16280Sstevel@tonic-gate 
16290Sstevel@tonic-gate 	/*
16300Sstevel@tonic-gate 	 * as_addseg() will add the segment at the appropraite point
16310Sstevel@tonic-gate 	 * in the list. It will return -1 if there is overlap with
16320Sstevel@tonic-gate 	 * an already existing segment.
16330Sstevel@tonic-gate 	 */
16340Sstevel@tonic-gate 	return (as_addseg(as, seg));
16350Sstevel@tonic-gate }
16360Sstevel@tonic-gate 
16370Sstevel@tonic-gate /*
16380Sstevel@tonic-gate  * Unmap a segment and free it from its associated address space.
16390Sstevel@tonic-gate  * This should be called by anybody who's finished with a whole segment's
16400Sstevel@tonic-gate  * mapping.  Just calls SEGOP_UNMAP() on the whole mapping .  It is the
16410Sstevel@tonic-gate  * responsibility of the segment driver to unlink the the segment
16420Sstevel@tonic-gate  * from the address space, and to free public and private data structures
16430Sstevel@tonic-gate  * associated with the segment.  (This is typically done by a call to
16440Sstevel@tonic-gate  * seg_free()).
16450Sstevel@tonic-gate  */
16460Sstevel@tonic-gate void
16470Sstevel@tonic-gate seg_unmap(struct seg *seg)
16480Sstevel@tonic-gate {
16490Sstevel@tonic-gate #ifdef DEBUG
16500Sstevel@tonic-gate 	int ret;
16510Sstevel@tonic-gate #endif /* DEBUG */
16520Sstevel@tonic-gate 
16530Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
16540Sstevel@tonic-gate 
16550Sstevel@tonic-gate 	/* Shouldn't have called seg_unmap if mapping isn't yet established */
16560Sstevel@tonic-gate 	ASSERT(seg->s_data != NULL);
16570Sstevel@tonic-gate 
16580Sstevel@tonic-gate 	/* Unmap the whole mapping */
16590Sstevel@tonic-gate #ifdef DEBUG
16600Sstevel@tonic-gate 	ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
16610Sstevel@tonic-gate 	ASSERT(ret == 0);
16620Sstevel@tonic-gate #else
16630Sstevel@tonic-gate 	SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
16640Sstevel@tonic-gate #endif /* DEBUG */
16650Sstevel@tonic-gate }
16660Sstevel@tonic-gate 
16670Sstevel@tonic-gate /*
16680Sstevel@tonic-gate  * Free the segment from its associated as. This should only be called
16690Sstevel@tonic-gate  * if a mapping to the segment has not yet been established (e.g., if
16700Sstevel@tonic-gate  * an error occurs in the middle of doing an as_map when the segment
16710Sstevel@tonic-gate  * has already been partially set up) or if it has already been deleted
16720Sstevel@tonic-gate  * (e.g., from a segment driver unmap routine if the unmap applies to the
16730Sstevel@tonic-gate  * entire segment). If the mapping is currently set up then seg_unmap() should
16740Sstevel@tonic-gate  * be called instead.
16750Sstevel@tonic-gate  */
16760Sstevel@tonic-gate void
16770Sstevel@tonic-gate seg_free(struct seg *seg)
16780Sstevel@tonic-gate {
16790Sstevel@tonic-gate 	register struct as *as = seg->s_as;
16800Sstevel@tonic-gate 	struct seg *tseg = as_removeseg(as, seg);
16810Sstevel@tonic-gate 
16820Sstevel@tonic-gate 	ASSERT(tseg == seg);
16830Sstevel@tonic-gate 
16840Sstevel@tonic-gate 	/*
16850Sstevel@tonic-gate 	 * If the segment private data field is NULL,
16860Sstevel@tonic-gate 	 * then segment driver is not attached yet.
16870Sstevel@tonic-gate 	 */
16880Sstevel@tonic-gate 	if (seg->s_data != NULL)
16890Sstevel@tonic-gate 		SEGOP_FREE(seg);
16900Sstevel@tonic-gate 
1691*6695Saguzovsk 	mutex_destroy(&seg->s_pmtx);
1692*6695Saguzovsk 	ASSERT(seg->s_phead.p_lnext == &seg->s_phead);
1693*6695Saguzovsk 	ASSERT(seg->s_phead.p_lprev == &seg->s_phead);
16940Sstevel@tonic-gate 	kmem_cache_free(seg_cache, seg);
16950Sstevel@tonic-gate }
16960Sstevel@tonic-gate 
16970Sstevel@tonic-gate /*ARGSUSED*/
16980Sstevel@tonic-gate static void
16990Sstevel@tonic-gate seg_p_mem_config_post_add(
17000Sstevel@tonic-gate 	void *arg,
17010Sstevel@tonic-gate 	pgcnt_t delta_pages)
17020Sstevel@tonic-gate {
17030Sstevel@tonic-gate 	/* Nothing to do. */
17040Sstevel@tonic-gate }
17050Sstevel@tonic-gate 
17063480Sjfrank void
17073480Sjfrank seg_p_enable(void)
17083480Sjfrank {
1709*6695Saguzovsk 	mutex_enter(&seg_pcache_mtx);
1710*6695Saguzovsk 	ASSERT(seg_pdisabled != 0);
1711*6695Saguzovsk 	seg_pdisabled--;
1712*6695Saguzovsk 	mutex_exit(&seg_pcache_mtx);
17133480Sjfrank }
17143480Sjfrank 
17153480Sjfrank /*
17163480Sjfrank  * seg_p_disable - disables seg_pcache, and then attempts to empty the
17173480Sjfrank  * cache.
17183480Sjfrank  * Returns SEGP_SUCCESS if the cache was successfully emptied, or
17193480Sjfrank  * SEGP_FAIL if the cache could not be emptied.
17203480Sjfrank  */
17213480Sjfrank int
17223480Sjfrank seg_p_disable(void)
17233480Sjfrank {
17243480Sjfrank 	pgcnt_t	old_plocked;
17253480Sjfrank 	int stall_count = 0;
17263480Sjfrank 
1727*6695Saguzovsk 	mutex_enter(&seg_pcache_mtx);
1728*6695Saguzovsk 	seg_pdisabled++;
1729*6695Saguzovsk 	ASSERT(seg_pdisabled != 0);
1730*6695Saguzovsk 	mutex_exit(&seg_pcache_mtx);
17313480Sjfrank 
17323480Sjfrank 	/*
17333480Sjfrank 	 * Attempt to empty the cache. Terminate if seg_plocked does not
17343480Sjfrank 	 * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
17353480Sjfrank 	 */
17363480Sjfrank 	while (seg_plocked != 0) {
1737*6695Saguzovsk 		ASSERT(seg_phashsize_win != 0);
17383480Sjfrank 		old_plocked = seg_plocked;
1739*6695Saguzovsk 		seg_ppurge_async(1);
17403480Sjfrank 		if (seg_plocked == old_plocked) {
17413480Sjfrank 			if (stall_count++ > SEGP_STALL_THRESHOLD) {
17423480Sjfrank 				return (SEGP_FAIL);
17433480Sjfrank 			}
17443480Sjfrank 		} else
17453480Sjfrank 			stall_count = 0;
17463480Sjfrank 		if (seg_plocked != 0)
17473480Sjfrank 			delay(hz/SEGP_PREDEL_DELAY_FACTOR);
17483480Sjfrank 	}
17493480Sjfrank 	return (SEGP_SUCCESS);
17503480Sjfrank }
17513480Sjfrank 
17520Sstevel@tonic-gate /*
17530Sstevel@tonic-gate  * Attempt to purge seg_pcache.  May need to return before this has
17540Sstevel@tonic-gate  * completed to allow other pre_del callbacks to unlock pages. This is
17550Sstevel@tonic-gate  * ok because:
1756*6695Saguzovsk  *	1) The seg_pdisabled flag has been set so at least we won't
17570Sstevel@tonic-gate  *	cache anymore locks and the locks we couldn't purge
17580Sstevel@tonic-gate  *	will not be held if they do get released by a subsequent
17590Sstevel@tonic-gate  *	pre-delete callback.
17600Sstevel@tonic-gate  *
17610Sstevel@tonic-gate  *	2) The rest of the memory delete thread processing does not
17620Sstevel@tonic-gate  *	depend on the changes made in this pre-delete callback. No
17630Sstevel@tonic-gate  *	panics will result, the worst that will happen is that the
17640Sstevel@tonic-gate  *	DR code will timeout and cancel the delete.
17650Sstevel@tonic-gate  */
17660Sstevel@tonic-gate /*ARGSUSED*/
17670Sstevel@tonic-gate static int
17680Sstevel@tonic-gate seg_p_mem_config_pre_del(
17690Sstevel@tonic-gate 	void *arg,
17700Sstevel@tonic-gate 	pgcnt_t delta_pages)
17710Sstevel@tonic-gate {
1772*6695Saguzovsk 	if (seg_phashsize_win == 0) {
1773*6695Saguzovsk 		return (0);
1774*6695Saguzovsk 	}
17753480Sjfrank 	if (seg_p_disable() != SEGP_SUCCESS)
17763480Sjfrank 		cmn_err(CE_NOTE,
17773480Sjfrank 		    "!Pre-delete couldn't purge"" pagelock cache - continuing");
17780Sstevel@tonic-gate 	return (0);
17790Sstevel@tonic-gate }
17800Sstevel@tonic-gate 
17810Sstevel@tonic-gate /*ARGSUSED*/
17820Sstevel@tonic-gate static void
17830Sstevel@tonic-gate seg_p_mem_config_post_del(
17840Sstevel@tonic-gate 	void *arg,
17850Sstevel@tonic-gate 	pgcnt_t delta_pages,
17860Sstevel@tonic-gate 	int cancelled)
17870Sstevel@tonic-gate {
1788*6695Saguzovsk 	if (seg_phashsize_win == 0) {
1789*6695Saguzovsk 		return;
1790*6695Saguzovsk 	}
17913480Sjfrank 	seg_p_enable();
17920Sstevel@tonic-gate }
17930Sstevel@tonic-gate 
17940Sstevel@tonic-gate static kphysm_setup_vector_t seg_p_mem_config_vec = {
17950Sstevel@tonic-gate 	KPHYSM_SETUP_VECTOR_VERSION,
17960Sstevel@tonic-gate 	seg_p_mem_config_post_add,
17970Sstevel@tonic-gate 	seg_p_mem_config_pre_del,
17980Sstevel@tonic-gate 	seg_p_mem_config_post_del,
17990Sstevel@tonic-gate };
18000Sstevel@tonic-gate 
18010Sstevel@tonic-gate static void
18020Sstevel@tonic-gate seg_pinit_mem_config(void)
18030Sstevel@tonic-gate {
18040Sstevel@tonic-gate 	int ret;
18050Sstevel@tonic-gate 
18060Sstevel@tonic-gate 	ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
18070Sstevel@tonic-gate 	/*
18080Sstevel@tonic-gate 	 * Want to catch this in the debug kernel. At run time, if the
18090Sstevel@tonic-gate 	 * callbacks don't get run all will be OK as the disable just makes
18100Sstevel@tonic-gate 	 * it more likely that the pages can be collected.
18110Sstevel@tonic-gate 	 */
18120Sstevel@tonic-gate 	ASSERT(ret == 0);
18130Sstevel@tonic-gate }
18143247Sgjelinek 
18153247Sgjelinek /*
18163247Sgjelinek  * Verify that segment is not a shared anonymous segment which reserves
18173247Sgjelinek  * swap.  zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
18183247Sgjelinek  * from one zone to another if any segments are shared.  This is because the
18193247Sgjelinek  * last process to exit will credit the swap reservation.  This could lead
18203247Sgjelinek  * to the swap being reserved by one zone, and credited to another.
18213247Sgjelinek  */
18223247Sgjelinek boolean_t
18233247Sgjelinek seg_can_change_zones(struct seg *seg)
18243247Sgjelinek {
18253247Sgjelinek 	struct segvn_data *svd;
18263247Sgjelinek 
18273247Sgjelinek 	if (seg->s_ops == &segspt_shmops)
18283247Sgjelinek 		return (B_FALSE);
18293247Sgjelinek 
18303247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
18313247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
18323247Sgjelinek 		if (svd->type == MAP_SHARED &&
18333247Sgjelinek 		    svd->amp != NULL &&
18343247Sgjelinek 		    svd->amp->swresv > 0)
18353247Sgjelinek 		return (B_FALSE);
18363247Sgjelinek 	}
18373247Sgjelinek 	return (B_TRUE);
18383247Sgjelinek }
18393247Sgjelinek 
18403247Sgjelinek /*
18413247Sgjelinek  * Return swap reserved by a segment backing a private mapping.
18423247Sgjelinek  */
18433247Sgjelinek size_t
18443247Sgjelinek seg_swresv(struct seg *seg)
18453247Sgjelinek {
18463247Sgjelinek 	struct segvn_data *svd;
18473247Sgjelinek 	size_t swap = 0;
18483247Sgjelinek 
18493247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
18503247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
18513247Sgjelinek 		if (svd->type == MAP_PRIVATE && svd->swresv > 0)
18523247Sgjelinek 			swap = svd->swresv;
18533247Sgjelinek 	}
18543247Sgjelinek 	return (swap);
18553247Sgjelinek }
1856