1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*0Sstevel@tonic-gate  * The Regents of the University of California
33*0Sstevel@tonic-gate  * All Rights Reserved
34*0Sstevel@tonic-gate  *
35*0Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*0Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*0Sstevel@tonic-gate  * contributors.
38*0Sstevel@tonic-gate  */
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
41*0Sstevel@tonic-gate 
42*0Sstevel@tonic-gate /*
43*0Sstevel@tonic-gate  * VM - segment management.
44*0Sstevel@tonic-gate  */
45*0Sstevel@tonic-gate 
46*0Sstevel@tonic-gate #include <sys/types.h>
47*0Sstevel@tonic-gate #include <sys/inttypes.h>
48*0Sstevel@tonic-gate #include <sys/t_lock.h>
49*0Sstevel@tonic-gate #include <sys/param.h>
50*0Sstevel@tonic-gate #include <sys/systm.h>
51*0Sstevel@tonic-gate #include <sys/kmem.h>
52*0Sstevel@tonic-gate #include <sys/vmsystm.h>
53*0Sstevel@tonic-gate #include <sys/debug.h>
54*0Sstevel@tonic-gate #include <sys/cmn_err.h>
55*0Sstevel@tonic-gate #include <sys/callb.h>
56*0Sstevel@tonic-gate #include <sys/mem_config.h>
57*0Sstevel@tonic-gate 
58*0Sstevel@tonic-gate #include <vm/hat.h>
59*0Sstevel@tonic-gate #include <vm/as.h>
60*0Sstevel@tonic-gate #include <vm/seg.h>
61*0Sstevel@tonic-gate #include <vm/seg_kmem.h>
62*0Sstevel@tonic-gate 
63*0Sstevel@tonic-gate /*
64*0Sstevel@tonic-gate  * kstats for segment advise
65*0Sstevel@tonic-gate  */
66*0Sstevel@tonic-gate segadvstat_t segadvstat = {
67*0Sstevel@tonic-gate 	{ "MADV_FREE_hit",	KSTAT_DATA_ULONG },
68*0Sstevel@tonic-gate 	{ "MADV_FREE_miss",	KSTAT_DATA_ULONG },
69*0Sstevel@tonic-gate };
70*0Sstevel@tonic-gate 
71*0Sstevel@tonic-gate kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
72*0Sstevel@tonic-gate uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate /* #define	PDEBUG */
75*0Sstevel@tonic-gate #if defined(PDEBUG) || defined(lint) || defined(__lint)
76*0Sstevel@tonic-gate int pdebug = 0;
77*0Sstevel@tonic-gate #else
78*0Sstevel@tonic-gate #define	pdebug		0
79*0Sstevel@tonic-gate #endif	/* PDEBUG */
80*0Sstevel@tonic-gate 
81*0Sstevel@tonic-gate #define	PPRINTF				if (pdebug) printf
82*0Sstevel@tonic-gate #define	PPRINT(x)			PPRINTF(x)
83*0Sstevel@tonic-gate #define	PPRINT1(x, a)			PPRINTF(x, a)
84*0Sstevel@tonic-gate #define	PPRINT2(x, a, b)		PPRINTF(x, a, b)
85*0Sstevel@tonic-gate #define	PPRINT3(x, a, b, c)		PPRINTF(x, a, b, c)
86*0Sstevel@tonic-gate #define	PPRINT4(x, a, b, c, d)		PPRINTF(x, a, b, c, d)
87*0Sstevel@tonic-gate #define	PPRINT5(x, a, b, c, d, e)	PPRINTF(x, a, b, c, d, e)
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate #define	P_HASHMASK		(p_hashsize - 1)
90*0Sstevel@tonic-gate #define	P_BASESHIFT		6
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate /*
93*0Sstevel@tonic-gate  * entry in the segment page cache
94*0Sstevel@tonic-gate  */
95*0Sstevel@tonic-gate struct seg_pcache {
96*0Sstevel@tonic-gate 	struct seg_pcache *p_hnext;	/* list for hashed blocks */
97*0Sstevel@tonic-gate 	struct seg_pcache *p_hprev;
98*0Sstevel@tonic-gate 	int		p_active;	/* active count */
99*0Sstevel@tonic-gate 	int		p_ref;		/* ref bit */
100*0Sstevel@tonic-gate 	size_t		p_len;		/* segment length */
101*0Sstevel@tonic-gate 	caddr_t		p_addr;		/* base address */
102*0Sstevel@tonic-gate 	struct seg 	*p_seg;		/* segment */
103*0Sstevel@tonic-gate 	struct page	**p_pp;		/* pp shadow list */
104*0Sstevel@tonic-gate 	enum seg_rw	p_rw;		/* rw */
105*0Sstevel@tonic-gate 	uint_t		p_flags;	/* bit flags */
106*0Sstevel@tonic-gate 	int		(*p_callback)(struct seg *, caddr_t, size_t,
107*0Sstevel@tonic-gate 			    struct page **, enum seg_rw);
108*0Sstevel@tonic-gate };
109*0Sstevel@tonic-gate 
110*0Sstevel@tonic-gate struct seg_phash {
111*0Sstevel@tonic-gate 	struct seg_pcache *p_hnext;	/* list for hashed blocks */
112*0Sstevel@tonic-gate 	struct seg_pcache *p_hprev;
113*0Sstevel@tonic-gate 	int p_qlen;			/* Q length */
114*0Sstevel@tonic-gate 	kmutex_t p_hmutex;		/* protects hash bucket */
115*0Sstevel@tonic-gate };
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate static int seg_preap_time = 20;	/* reclaim every 20 secs */
118*0Sstevel@tonic-gate static int seg_pmaxqlen = 5;	/* max Q length in hash list */
119*0Sstevel@tonic-gate static int seg_ppcount = 5;	/* max # of purges per reclaim interval */
120*0Sstevel@tonic-gate static int seg_plazy = 1;	/* if 1, pages are cached after pageunlock */
121*0Sstevel@tonic-gate static pgcnt_t seg_pwindow;	/* max # of pages that can be cached */
122*0Sstevel@tonic-gate static pgcnt_t seg_plocked;	/* # of pages which are cached by pagelock */
123*0Sstevel@tonic-gate static pgcnt_t seg_plocked_window; /* # pages from window */
124*0Sstevel@tonic-gate int seg_preapahead;
125*0Sstevel@tonic-gate 
126*0Sstevel@tonic-gate static uint_t seg_pdisable = 0;	/* if not 0, caching temporarily disabled */
127*0Sstevel@tonic-gate 
128*0Sstevel@tonic-gate static int seg_pupdate_active = 1;	/* background reclaim thread */
129*0Sstevel@tonic-gate static clock_t seg_preap_interval;	/* reap interval in ticks */
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate static kmutex_t seg_pcache;	/* protects the whole pagelock cache */
132*0Sstevel@tonic-gate static kmutex_t seg_pmem;	/* protects window counter */
133*0Sstevel@tonic-gate static ksema_t seg_psaync_sem;	/* sema for reclaim thread */
134*0Sstevel@tonic-gate static struct seg_phash *p_hashtab;
135*0Sstevel@tonic-gate static int p_hashsize = 0;
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate #define	p_hash(seg) \
138*0Sstevel@tonic-gate 	(P_HASHMASK & \
139*0Sstevel@tonic-gate 	((uintptr_t)(seg) >> P_BASESHIFT))
140*0Sstevel@tonic-gate 
141*0Sstevel@tonic-gate #define	p_match(pcp, seg, addr, len, rw) \
142*0Sstevel@tonic-gate 	(((pcp)->p_seg == (seg) && \
143*0Sstevel@tonic-gate 	(pcp)->p_addr == (addr) && \
144*0Sstevel@tonic-gate 	(pcp)->p_rw == (rw) && \
145*0Sstevel@tonic-gate 	(pcp)->p_len == (len)) ? 1 : 0)
146*0Sstevel@tonic-gate 
147*0Sstevel@tonic-gate #define	p_match_pp(pcp, seg, addr, len, pp, rw) \
148*0Sstevel@tonic-gate 	(((pcp)->p_seg == (seg) && \
149*0Sstevel@tonic-gate 	(pcp)->p_addr == (addr) && \
150*0Sstevel@tonic-gate 	(pcp)->p_pp == (pp) && \
151*0Sstevel@tonic-gate 	(pcp)->p_rw == (rw) && \
152*0Sstevel@tonic-gate 	(pcp)->p_len == (len)) ? 1 : 0)
153*0Sstevel@tonic-gate 
154*0Sstevel@tonic-gate 
155*0Sstevel@tonic-gate /*
156*0Sstevel@tonic-gate  * lookup an address range in pagelock cache. Return shadow list
157*0Sstevel@tonic-gate  * and bump up active count.
158*0Sstevel@tonic-gate  */
159*0Sstevel@tonic-gate struct page **
160*0Sstevel@tonic-gate seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
161*0Sstevel@tonic-gate {
162*0Sstevel@tonic-gate 	struct seg_pcache *pcp;
163*0Sstevel@tonic-gate 	struct seg_phash *hp;
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate 	/*
166*0Sstevel@tonic-gate 	 * Skip pagelock cache, while DR is in progress or
167*0Sstevel@tonic-gate 	 * seg_pcache is off.
168*0Sstevel@tonic-gate 	 */
169*0Sstevel@tonic-gate 	if (seg_pdisable || seg_plazy == 0) {
170*0Sstevel@tonic-gate 		return (NULL);
171*0Sstevel@tonic-gate 	}
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
174*0Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
175*0Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
176*0Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
177*0Sstevel@tonic-gate 		if (p_match(pcp, seg, addr, len, rw)) {
178*0Sstevel@tonic-gate 			pcp->p_active++;
179*0Sstevel@tonic-gate 			mutex_exit(&hp->p_hmutex);
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate 			PPRINT5("seg_plookup hit: seg %p, addr %p, "
182*0Sstevel@tonic-gate 			    "len %lx, count %d, pplist %p \n",
183*0Sstevel@tonic-gate 			    (void *)seg, (void *)addr, len, pcp->p_active,
184*0Sstevel@tonic-gate 			    (void *)pcp->p_pp);
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate 			return (pcp->p_pp);
187*0Sstevel@tonic-gate 		}
188*0Sstevel@tonic-gate 	}
189*0Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate 	PPRINT("seg_plookup miss:\n");
192*0Sstevel@tonic-gate 
193*0Sstevel@tonic-gate 	return (NULL);
194*0Sstevel@tonic-gate }
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate /*
197*0Sstevel@tonic-gate  * mark address range inactive. If the cache is off or the address
198*0Sstevel@tonic-gate  * range is not in the cache we call the segment driver to reclaim
199*0Sstevel@tonic-gate  * the pages. Otherwise just decrement active count and set ref bit.
200*0Sstevel@tonic-gate  */
201*0Sstevel@tonic-gate void
202*0Sstevel@tonic-gate seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
203*0Sstevel@tonic-gate     enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t,
204*0Sstevel@tonic-gate     struct page **, enum seg_rw))
205*0Sstevel@tonic-gate {
206*0Sstevel@tonic-gate 	struct seg_pcache *pcp;
207*0Sstevel@tonic-gate 	struct seg_phash *hp;
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate 	if (seg_plazy == 0) {
210*0Sstevel@tonic-gate 		(void) (*callback)(seg, addr, len, pp, rw);
211*0Sstevel@tonic-gate 		return;
212*0Sstevel@tonic-gate 	}
213*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
214*0Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
215*0Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
216*0Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
217*0Sstevel@tonic-gate 		if (p_match_pp(pcp, seg, addr, len, pp, rw)) {
218*0Sstevel@tonic-gate 			pcp->p_active--;
219*0Sstevel@tonic-gate 			ASSERT(pcp->p_active >= 0);
220*0Sstevel@tonic-gate 			if (pcp->p_active == 0 && seg_pdisable) {
221*0Sstevel@tonic-gate 				int npages;
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate 				ASSERT(callback == pcp->p_callback);
224*0Sstevel@tonic-gate 				/* free the entry */
225*0Sstevel@tonic-gate 				hp->p_qlen--;
226*0Sstevel@tonic-gate 				pcp->p_hprev->p_hnext = pcp->p_hnext;
227*0Sstevel@tonic-gate 				pcp->p_hnext->p_hprev = pcp->p_hprev;
228*0Sstevel@tonic-gate 				mutex_exit(&hp->p_hmutex);
229*0Sstevel@tonic-gate 				npages = pcp->p_len >> PAGESHIFT;
230*0Sstevel@tonic-gate 				mutex_enter(&seg_pmem);
231*0Sstevel@tonic-gate 				seg_plocked -= npages;
232*0Sstevel@tonic-gate 				if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
233*0Sstevel@tonic-gate 					seg_plocked_window -= npages;
234*0Sstevel@tonic-gate 				}
235*0Sstevel@tonic-gate 				mutex_exit(&seg_pmem);
236*0Sstevel@tonic-gate 				kmem_free(pcp, sizeof (struct seg_pcache));
237*0Sstevel@tonic-gate 				goto out;
238*0Sstevel@tonic-gate 			}
239*0Sstevel@tonic-gate 			pcp->p_ref = 1;
240*0Sstevel@tonic-gate 			mutex_exit(&hp->p_hmutex);
241*0Sstevel@tonic-gate 			return;
242*0Sstevel@tonic-gate 		}
243*0Sstevel@tonic-gate 	}
244*0Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
245*0Sstevel@tonic-gate out:
246*0Sstevel@tonic-gate 	(void) (*callback)(seg, addr, len, pp, rw);
247*0Sstevel@tonic-gate }
248*0Sstevel@tonic-gate 
249*0Sstevel@tonic-gate /*
250*0Sstevel@tonic-gate  * The seg_pinsert_check() is used by segment drivers to predict whether
251*0Sstevel@tonic-gate  * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
252*0Sstevel@tonic-gate  */
253*0Sstevel@tonic-gate 
254*0Sstevel@tonic-gate int
255*0Sstevel@tonic-gate seg_pinsert_check(struct seg *seg, size_t len, uint_t flags)
256*0Sstevel@tonic-gate {
257*0Sstevel@tonic-gate 	struct seg_phash *hp;
258*0Sstevel@tonic-gate 
259*0Sstevel@tonic-gate 	if (seg_plazy == 0) {
260*0Sstevel@tonic-gate 		return (SEGP_FAIL);
261*0Sstevel@tonic-gate 	}
262*0Sstevel@tonic-gate 	if (seg_pdisable != 0) {
263*0Sstevel@tonic-gate 		return (SEGP_FAIL);
264*0Sstevel@tonic-gate 	}
265*0Sstevel@tonic-gate 	ASSERT((len & PAGEOFFSET) == 0);
266*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
267*0Sstevel@tonic-gate 	if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
268*0Sstevel@tonic-gate 		return (SEGP_FAIL);
269*0Sstevel@tonic-gate 	}
270*0Sstevel@tonic-gate 	/*
271*0Sstevel@tonic-gate 	 * If the SEGP_FORCE_WIRED flag is set,
272*0Sstevel@tonic-gate 	 * we skip the check for seg_pwindow.
273*0Sstevel@tonic-gate 	 */
274*0Sstevel@tonic-gate 	if ((flags & SEGP_FORCE_WIRED) == 0) {
275*0Sstevel@tonic-gate 		pgcnt_t npages;
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate 		npages = len >> PAGESHIFT;
278*0Sstevel@tonic-gate 		if ((seg_plocked_window + npages) > seg_pwindow) {
279*0Sstevel@tonic-gate 			return (SEGP_FAIL);
280*0Sstevel@tonic-gate 		}
281*0Sstevel@tonic-gate 	}
282*0Sstevel@tonic-gate 	return (SEGP_SUCCESS);
283*0Sstevel@tonic-gate }
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 
286*0Sstevel@tonic-gate /*
287*0Sstevel@tonic-gate  * insert address range with shadow list into pagelock cache. If
288*0Sstevel@tonic-gate  * the cache is off or caching is temporarily disabled or the allowed
289*0Sstevel@tonic-gate  * 'window' is exceeded - return SEGP_FAIL. Otherwise return
290*0Sstevel@tonic-gate  * SEGP_SUCCESS.
291*0Sstevel@tonic-gate  */
292*0Sstevel@tonic-gate int
293*0Sstevel@tonic-gate seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
294*0Sstevel@tonic-gate     enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t,
295*0Sstevel@tonic-gate     size_t, struct page **, enum seg_rw))
296*0Sstevel@tonic-gate {
297*0Sstevel@tonic-gate 	struct seg_pcache *pcp;
298*0Sstevel@tonic-gate 	struct seg_phash *hp;
299*0Sstevel@tonic-gate 	pgcnt_t npages;
300*0Sstevel@tonic-gate 
301*0Sstevel@tonic-gate 	if (seg_plazy == 0) {
302*0Sstevel@tonic-gate 		return (SEGP_FAIL);
303*0Sstevel@tonic-gate 	}
304*0Sstevel@tonic-gate 	if (seg_pdisable != 0) {
305*0Sstevel@tonic-gate 		return (SEGP_FAIL);
306*0Sstevel@tonic-gate 	}
307*0Sstevel@tonic-gate 	ASSERT((len & PAGEOFFSET) == 0);
308*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
309*0Sstevel@tonic-gate 	if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
310*0Sstevel@tonic-gate 		return (SEGP_FAIL);
311*0Sstevel@tonic-gate 	}
312*0Sstevel@tonic-gate 	npages = len >> PAGESHIFT;
313*0Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
314*0Sstevel@tonic-gate 	/*
315*0Sstevel@tonic-gate 	 * If the SEGP_FORCE_WIRED flag is set,
316*0Sstevel@tonic-gate 	 * we skip the check for seg_pwindow.
317*0Sstevel@tonic-gate 	 */
318*0Sstevel@tonic-gate 	if ((flags & SEGP_FORCE_WIRED) == 0) {
319*0Sstevel@tonic-gate 		seg_plocked_window += npages;
320*0Sstevel@tonic-gate 		if (seg_plocked_window > seg_pwindow) {
321*0Sstevel@tonic-gate 			seg_plocked_window -= npages;
322*0Sstevel@tonic-gate 			mutex_exit(&seg_pmem);
323*0Sstevel@tonic-gate 			return (SEGP_FAIL);
324*0Sstevel@tonic-gate 		}
325*0Sstevel@tonic-gate 	}
326*0Sstevel@tonic-gate 	seg_plocked += npages;
327*0Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
328*0Sstevel@tonic-gate 
329*0Sstevel@tonic-gate 	pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP);
330*0Sstevel@tonic-gate 	pcp->p_seg = seg;
331*0Sstevel@tonic-gate 	pcp->p_addr = addr;
332*0Sstevel@tonic-gate 	pcp->p_len = len;
333*0Sstevel@tonic-gate 	pcp->p_pp = pp;
334*0Sstevel@tonic-gate 	pcp->p_rw = rw;
335*0Sstevel@tonic-gate 	pcp->p_callback = callback;
336*0Sstevel@tonic-gate 	pcp->p_active = 1;
337*0Sstevel@tonic-gate 	pcp->p_flags = flags;
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate 	PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n",
340*0Sstevel@tonic-gate 	    (void *)seg, (void *)addr, len, (void *)pp);
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
343*0Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
344*0Sstevel@tonic-gate 	hp->p_qlen++;
345*0Sstevel@tonic-gate 	pcp->p_hnext = hp->p_hnext;
346*0Sstevel@tonic-gate 	pcp->p_hprev = (struct seg_pcache *)hp;
347*0Sstevel@tonic-gate 	hp->p_hnext->p_hprev = pcp;
348*0Sstevel@tonic-gate 	hp->p_hnext = pcp;
349*0Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
350*0Sstevel@tonic-gate 	return (SEGP_SUCCESS);
351*0Sstevel@tonic-gate }
352*0Sstevel@tonic-gate 
353*0Sstevel@tonic-gate /*
354*0Sstevel@tonic-gate  * purge all entries from the pagelock cache if not active
355*0Sstevel@tonic-gate  * and not recently used. Drop all locks and call through
356*0Sstevel@tonic-gate  * the address space into the segment driver to reclaim
357*0Sstevel@tonic-gate  * the pages. This makes sure we get the address space
358*0Sstevel@tonic-gate  * and segment driver locking right.
359*0Sstevel@tonic-gate  */
360*0Sstevel@tonic-gate static void
361*0Sstevel@tonic-gate seg_ppurge_all(int force)
362*0Sstevel@tonic-gate {
363*0Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
364*0Sstevel@tonic-gate 	struct seg_pcache *pcp;
365*0Sstevel@tonic-gate 	struct seg_phash *hp;
366*0Sstevel@tonic-gate 	int purge_count = 0;
367*0Sstevel@tonic-gate 	pgcnt_t npages = 0;
368*0Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
369*0Sstevel@tonic-gate 
370*0Sstevel@tonic-gate 	/*
371*0Sstevel@tonic-gate 	 * if the cache if off or empty, return
372*0Sstevel@tonic-gate 	 */
373*0Sstevel@tonic-gate 	if (seg_plazy == 0 || seg_plocked == 0) {
374*0Sstevel@tonic-gate 		return;
375*0Sstevel@tonic-gate 	}
376*0Sstevel@tonic-gate 	for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
377*0Sstevel@tonic-gate 		mutex_enter(&hp->p_hmutex);
378*0Sstevel@tonic-gate 		pcp = hp->p_hnext;
379*0Sstevel@tonic-gate 
380*0Sstevel@tonic-gate 		/*
381*0Sstevel@tonic-gate 		 * While 'force' is set, seg_pasync_thread is not
382*0Sstevel@tonic-gate 		 * throttled.  This is to speedup flushing of seg_pcache
383*0Sstevel@tonic-gate 		 * in preparation for DR.
384*0Sstevel@tonic-gate 		 *
385*0Sstevel@tonic-gate 		 * In normal case, when 'force' is not set, we throttle
386*0Sstevel@tonic-gate 		 * seg_pasync_thread so that we don't spend all the time
387*0Sstevel@tonic-gate 		 * time in purging the cache.
388*0Sstevel@tonic-gate 		 */
389*0Sstevel@tonic-gate 		while ((pcp != (struct seg_pcache *)hp) &&
390*0Sstevel@tonic-gate 				(force || (purge_count <= seg_ppcount))) {
391*0Sstevel@tonic-gate 
392*0Sstevel@tonic-gate 			/*
393*0Sstevel@tonic-gate 			 * purge entries which are not active and
394*0Sstevel@tonic-gate 			 * have not been used recently and
395*0Sstevel@tonic-gate 			 * have the SEGP_ASYNC_FLUSH flag.
396*0Sstevel@tonic-gate 			 *
397*0Sstevel@tonic-gate 			 * In the 'force' case, we ignore the
398*0Sstevel@tonic-gate 			 * SEGP_ASYNC_FLUSH flag.
399*0Sstevel@tonic-gate 			 */
400*0Sstevel@tonic-gate 			if (!(pcp->p_flags & SEGP_ASYNC_FLUSH))
401*0Sstevel@tonic-gate 				pcp->p_ref = 1;
402*0Sstevel@tonic-gate 			if (force)
403*0Sstevel@tonic-gate 				pcp->p_ref = 0;
404*0Sstevel@tonic-gate 			if (!pcp->p_ref && !pcp->p_active) {
405*0Sstevel@tonic-gate 				struct as *as = pcp->p_seg->s_as;
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate 				/*
408*0Sstevel@tonic-gate 				 * try to get the readers lock on the address
409*0Sstevel@tonic-gate 				 * space before taking out the cache element.
410*0Sstevel@tonic-gate 				 * This ensures as_pagereclaim() can actually
411*0Sstevel@tonic-gate 				 * call through the address space and free
412*0Sstevel@tonic-gate 				 * the pages. If we don't get the lock, just
413*0Sstevel@tonic-gate 				 * skip this entry. The pages will be reclaimed
414*0Sstevel@tonic-gate 				 * by the segment driver at unmap time.
415*0Sstevel@tonic-gate 				 */
416*0Sstevel@tonic-gate 				if (AS_LOCK_TRYENTER(as, &as->a_lock,
417*0Sstevel@tonic-gate 				    RW_READER)) {
418*0Sstevel@tonic-gate 					hp->p_qlen--;
419*0Sstevel@tonic-gate 					pcp->p_hprev->p_hnext = pcp->p_hnext;
420*0Sstevel@tonic-gate 					pcp->p_hnext->p_hprev = pcp->p_hprev;
421*0Sstevel@tonic-gate 					pcp->p_hprev = delcallb_list;
422*0Sstevel@tonic-gate 					delcallb_list = pcp;
423*0Sstevel@tonic-gate 					purge_count++;
424*0Sstevel@tonic-gate 				}
425*0Sstevel@tonic-gate 			} else {
426*0Sstevel@tonic-gate 				pcp->p_ref = 0;
427*0Sstevel@tonic-gate 			}
428*0Sstevel@tonic-gate 			pcp = pcp->p_hnext;
429*0Sstevel@tonic-gate 		}
430*0Sstevel@tonic-gate 		mutex_exit(&hp->p_hmutex);
431*0Sstevel@tonic-gate 		if (!force && purge_count > seg_ppcount)
432*0Sstevel@tonic-gate 			break;
433*0Sstevel@tonic-gate 	}
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate 	/*
436*0Sstevel@tonic-gate 	 * run the delayed callback list. We don't want to hold the
437*0Sstevel@tonic-gate 	 * cache lock during a call through the address space.
438*0Sstevel@tonic-gate 	 */
439*0Sstevel@tonic-gate 	while (delcallb_list != NULL) {
440*0Sstevel@tonic-gate 		struct as *as;
441*0Sstevel@tonic-gate 
442*0Sstevel@tonic-gate 		pcp = delcallb_list;
443*0Sstevel@tonic-gate 		delcallb_list = pcp->p_hprev;
444*0Sstevel@tonic-gate 		as = pcp->p_seg->s_as;
445*0Sstevel@tonic-gate 
446*0Sstevel@tonic-gate 		PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, "
447*0Sstevel@tonic-gate 		    "pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr,
448*0Sstevel@tonic-gate 		    pcp->p_len, (void *)pcp->p_pp);
449*0Sstevel@tonic-gate 
450*0Sstevel@tonic-gate 		as_pagereclaim(as, pcp->p_pp, pcp->p_addr,
451*0Sstevel@tonic-gate 		    pcp->p_len, pcp->p_rw);
452*0Sstevel@tonic-gate 		AS_LOCK_EXIT(as, &as->a_lock);
453*0Sstevel@tonic-gate 		npages += pcp->p_len >> PAGESHIFT;
454*0Sstevel@tonic-gate 		if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
455*0Sstevel@tonic-gate 			npages_window += pcp->p_len >> PAGESHIFT;
456*0Sstevel@tonic-gate 		}
457*0Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct seg_pcache));
458*0Sstevel@tonic-gate 	}
459*0Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
460*0Sstevel@tonic-gate 	seg_plocked -= npages;
461*0Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
462*0Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
463*0Sstevel@tonic-gate }
464*0Sstevel@tonic-gate 
465*0Sstevel@tonic-gate /*
466*0Sstevel@tonic-gate  * Remove cached pages for segment(s) entries from hashtable.
467*0Sstevel@tonic-gate  * The segments are identified by a given clients callback
468*0Sstevel@tonic-gate  * function.
469*0Sstevel@tonic-gate  * This is useful for multiple seg's cached on behalf of
470*0Sstevel@tonic-gate  * dummy segment (ISM/DISM) with common callback function.
471*0Sstevel@tonic-gate  * The clients callback function may return status indicating
472*0Sstevel@tonic-gate  * that the last seg's entry has been purged. In such a case
473*0Sstevel@tonic-gate  * the seg_ppurge_seg() stops searching hashtable and exits.
474*0Sstevel@tonic-gate  * Otherwise all hashtable entries are scanned.
475*0Sstevel@tonic-gate  */
476*0Sstevel@tonic-gate void
477*0Sstevel@tonic-gate seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t,
478*0Sstevel@tonic-gate     struct page **, enum seg_rw))
479*0Sstevel@tonic-gate {
480*0Sstevel@tonic-gate 	struct seg_pcache *pcp, *npcp;
481*0Sstevel@tonic-gate 	struct seg_phash *hp;
482*0Sstevel@tonic-gate 	pgcnt_t npages = 0;
483*0Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
484*0Sstevel@tonic-gate 	int	done = 0;
485*0Sstevel@tonic-gate 
486*0Sstevel@tonic-gate 	/*
487*0Sstevel@tonic-gate 	 * if the cache if off or empty, return
488*0Sstevel@tonic-gate 	 */
489*0Sstevel@tonic-gate 	if (seg_plazy == 0 || seg_plocked == 0) {
490*0Sstevel@tonic-gate 		return;
491*0Sstevel@tonic-gate 	}
492*0Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
493*0Sstevel@tonic-gate 	seg_pdisable++;
494*0Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
495*0Sstevel@tonic-gate 
496*0Sstevel@tonic-gate 	for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
497*0Sstevel@tonic-gate 
498*0Sstevel@tonic-gate 		mutex_enter(&hp->p_hmutex);
499*0Sstevel@tonic-gate 		pcp = hp->p_hnext;
500*0Sstevel@tonic-gate 		while (pcp != (struct seg_pcache *)hp) {
501*0Sstevel@tonic-gate 
502*0Sstevel@tonic-gate 			/*
503*0Sstevel@tonic-gate 			 * purge entries which are not active
504*0Sstevel@tonic-gate 			 */
505*0Sstevel@tonic-gate 			npcp = pcp->p_hnext;
506*0Sstevel@tonic-gate 			if (!pcp->p_active && pcp->p_callback == callback) {
507*0Sstevel@tonic-gate 				hp->p_qlen--;
508*0Sstevel@tonic-gate 				pcp->p_hprev->p_hnext = pcp->p_hnext;
509*0Sstevel@tonic-gate 				pcp->p_hnext->p_hprev = pcp->p_hprev;
510*0Sstevel@tonic-gate 
511*0Sstevel@tonic-gate 				if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr,
512*0Sstevel@tonic-gate 				    pcp->p_len, pcp->p_pp, pcp->p_rw)) {
513*0Sstevel@tonic-gate 					done = 1;
514*0Sstevel@tonic-gate 				}
515*0Sstevel@tonic-gate 
516*0Sstevel@tonic-gate 				npages += pcp->p_len >> PAGESHIFT;
517*0Sstevel@tonic-gate 				if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
518*0Sstevel@tonic-gate 					npages_window +=
519*0Sstevel@tonic-gate 					    pcp->p_len >> PAGESHIFT;
520*0Sstevel@tonic-gate 				}
521*0Sstevel@tonic-gate 				kmem_free(pcp, sizeof (struct seg_pcache));
522*0Sstevel@tonic-gate 			}
523*0Sstevel@tonic-gate 			pcp = npcp;
524*0Sstevel@tonic-gate 			if (done)
525*0Sstevel@tonic-gate 				break;
526*0Sstevel@tonic-gate 		}
527*0Sstevel@tonic-gate 		mutex_exit(&hp->p_hmutex);
528*0Sstevel@tonic-gate 		if (done)
529*0Sstevel@tonic-gate 			break;
530*0Sstevel@tonic-gate 	}
531*0Sstevel@tonic-gate 
532*0Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
533*0Sstevel@tonic-gate 	seg_pdisable--;
534*0Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
537*0Sstevel@tonic-gate 	seg_plocked -= npages;
538*0Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
539*0Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
540*0Sstevel@tonic-gate }
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate /*
543*0Sstevel@tonic-gate  * purge all entries for a given segment. Since we
544*0Sstevel@tonic-gate  * callback into the segment driver directly for page
545*0Sstevel@tonic-gate  * reclaim the caller needs to hold the right locks.
546*0Sstevel@tonic-gate  */
547*0Sstevel@tonic-gate void
548*0Sstevel@tonic-gate seg_ppurge(struct seg *seg)
549*0Sstevel@tonic-gate {
550*0Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
551*0Sstevel@tonic-gate 	struct seg_pcache *pcp;
552*0Sstevel@tonic-gate 	struct seg_phash *hp;
553*0Sstevel@tonic-gate 	pgcnt_t npages = 0;
554*0Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
555*0Sstevel@tonic-gate 
556*0Sstevel@tonic-gate 	if (seg_plazy == 0) {
557*0Sstevel@tonic-gate 		return;
558*0Sstevel@tonic-gate 	}
559*0Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
560*0Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
561*0Sstevel@tonic-gate 	pcp = hp->p_hnext;
562*0Sstevel@tonic-gate 	while (pcp != (struct seg_pcache *)hp) {
563*0Sstevel@tonic-gate 		if (pcp->p_seg == seg) {
564*0Sstevel@tonic-gate 			if (pcp->p_active) {
565*0Sstevel@tonic-gate 				break;
566*0Sstevel@tonic-gate 			}
567*0Sstevel@tonic-gate 			hp->p_qlen--;
568*0Sstevel@tonic-gate 			pcp->p_hprev->p_hnext = pcp->p_hnext;
569*0Sstevel@tonic-gate 			pcp->p_hnext->p_hprev = pcp->p_hprev;
570*0Sstevel@tonic-gate 			pcp->p_hprev = delcallb_list;
571*0Sstevel@tonic-gate 			delcallb_list = pcp;
572*0Sstevel@tonic-gate 		}
573*0Sstevel@tonic-gate 		pcp = pcp->p_hnext;
574*0Sstevel@tonic-gate 	}
575*0Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
576*0Sstevel@tonic-gate 	while (delcallb_list != NULL) {
577*0Sstevel@tonic-gate 		pcp = delcallb_list;
578*0Sstevel@tonic-gate 		delcallb_list = pcp->p_hprev;
579*0Sstevel@tonic-gate 
580*0Sstevel@tonic-gate 		PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, "
581*0Sstevel@tonic-gate 		    "pplist %p\n", (void *)seg, (void *)pcp->p_addr,
582*0Sstevel@tonic-gate 		    pcp->p_len, (void *)pcp->p_pp);
583*0Sstevel@tonic-gate 
584*0Sstevel@tonic-gate 		ASSERT(seg == pcp->p_seg);
585*0Sstevel@tonic-gate 		(void) (*pcp->p_callback)(seg, pcp->p_addr,
586*0Sstevel@tonic-gate 		    pcp->p_len, pcp->p_pp, pcp->p_rw);
587*0Sstevel@tonic-gate 		npages += pcp->p_len >> PAGESHIFT;
588*0Sstevel@tonic-gate 		if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
589*0Sstevel@tonic-gate 			npages_window += pcp->p_len >> PAGESHIFT;
590*0Sstevel@tonic-gate 		}
591*0Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct seg_pcache));
592*0Sstevel@tonic-gate 	}
593*0Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
594*0Sstevel@tonic-gate 	seg_plocked -= npages;
595*0Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
596*0Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
597*0Sstevel@tonic-gate }
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate static void seg_pinit_mem_config(void);
600*0Sstevel@tonic-gate 
601*0Sstevel@tonic-gate /*
602*0Sstevel@tonic-gate  * setup the pagelock cache
603*0Sstevel@tonic-gate  */
604*0Sstevel@tonic-gate static void
605*0Sstevel@tonic-gate seg_pinit(void)
606*0Sstevel@tonic-gate {
607*0Sstevel@tonic-gate 	struct seg_phash *hp;
608*0Sstevel@tonic-gate 	int i;
609*0Sstevel@tonic-gate 	uint_t physmegs;
610*0Sstevel@tonic-gate 
611*0Sstevel@tonic-gate 	sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL);
612*0Sstevel@tonic-gate 
613*0Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
614*0Sstevel@tonic-gate 	if (p_hashtab == NULL) {
615*0Sstevel@tonic-gate 		physmegs = physmem >> (20 - PAGESHIFT);
616*0Sstevel@tonic-gate 
617*0Sstevel@tonic-gate 		/* If p_hashsize was not set in /etc/system ... */
618*0Sstevel@tonic-gate 		if (p_hashsize == 0) {
619*0Sstevel@tonic-gate 			/*
620*0Sstevel@tonic-gate 			 * Choose p_hashsize based on physmem.
621*0Sstevel@tonic-gate 			 */
622*0Sstevel@tonic-gate 			if (physmegs < 64) {
623*0Sstevel@tonic-gate 				p_hashsize = 64;
624*0Sstevel@tonic-gate 			} else if (physmegs < 1024) {
625*0Sstevel@tonic-gate 				p_hashsize = 1024;
626*0Sstevel@tonic-gate 			} else if (physmegs < 10 * 1024) {
627*0Sstevel@tonic-gate 				p_hashsize = 8192;
628*0Sstevel@tonic-gate 			} else if (physmegs < 20 * 1024) {
629*0Sstevel@tonic-gate 				p_hashsize = 2 * 8192;
630*0Sstevel@tonic-gate 				seg_pmaxqlen = 16;
631*0Sstevel@tonic-gate 			} else {
632*0Sstevel@tonic-gate 				p_hashsize = 128 * 1024;
633*0Sstevel@tonic-gate 				seg_pmaxqlen = 128;
634*0Sstevel@tonic-gate 			}
635*0Sstevel@tonic-gate 		}
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate 		p_hashtab = kmem_zalloc(
638*0Sstevel@tonic-gate 			p_hashsize * sizeof (struct seg_phash), KM_SLEEP);
639*0Sstevel@tonic-gate 		for (i = 0; i < p_hashsize; i++) {
640*0Sstevel@tonic-gate 			hp = (struct seg_phash *)&p_hashtab[i];
641*0Sstevel@tonic-gate 			hp->p_hnext = (struct seg_pcache *)hp;
642*0Sstevel@tonic-gate 			hp->p_hprev = (struct seg_pcache *)hp;
643*0Sstevel@tonic-gate 			mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
644*0Sstevel@tonic-gate 		}
645*0Sstevel@tonic-gate 		if (seg_pwindow == 0) {
646*0Sstevel@tonic-gate 			if (physmegs < 24) {
647*0Sstevel@tonic-gate 				/* don't use cache */
648*0Sstevel@tonic-gate 				seg_plazy = 0;
649*0Sstevel@tonic-gate 			} else if (physmegs < 64) {
650*0Sstevel@tonic-gate 				seg_pwindow = physmem >> 5; /* 3% of memory */
651*0Sstevel@tonic-gate 			} else if (physmegs < 10 * 1024) {
652*0Sstevel@tonic-gate 				seg_pwindow = physmem >> 3; /* 12% of memory */
653*0Sstevel@tonic-gate 			} else {
654*0Sstevel@tonic-gate 				seg_pwindow = physmem >> 1;
655*0Sstevel@tonic-gate 			}
656*0Sstevel@tonic-gate 		}
657*0Sstevel@tonic-gate 	}
658*0Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
659*0Sstevel@tonic-gate 
660*0Sstevel@tonic-gate 	seg_pinit_mem_config();
661*0Sstevel@tonic-gate }
662*0Sstevel@tonic-gate 
663*0Sstevel@tonic-gate /*
664*0Sstevel@tonic-gate  * called by pageout if memory is low
665*0Sstevel@tonic-gate  */
666*0Sstevel@tonic-gate void
667*0Sstevel@tonic-gate seg_preap(void)
668*0Sstevel@tonic-gate {
669*0Sstevel@tonic-gate 	/*
670*0Sstevel@tonic-gate 	 * if the cache if off or empty, return
671*0Sstevel@tonic-gate 	 */
672*0Sstevel@tonic-gate 	if (seg_plocked == 0 || seg_plazy == 0) {
673*0Sstevel@tonic-gate 		return;
674*0Sstevel@tonic-gate 	}
675*0Sstevel@tonic-gate 	sema_v(&seg_psaync_sem);
676*0Sstevel@tonic-gate }
677*0Sstevel@tonic-gate 
678*0Sstevel@tonic-gate static void seg_pupdate(void *);
679*0Sstevel@tonic-gate 
680*0Sstevel@tonic-gate /*
681*0Sstevel@tonic-gate  * run as a backgroud thread and reclaim pagelock
682*0Sstevel@tonic-gate  * pages which have not been used recently
683*0Sstevel@tonic-gate  */
684*0Sstevel@tonic-gate void
685*0Sstevel@tonic-gate seg_pasync_thread(void)
686*0Sstevel@tonic-gate {
687*0Sstevel@tonic-gate 	callb_cpr_t cpr_info;
688*0Sstevel@tonic-gate 	kmutex_t pasync_lock;	/* just for CPR stuff */
689*0Sstevel@tonic-gate 
690*0Sstevel@tonic-gate 	mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL);
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 	CALLB_CPR_INIT(&cpr_info, &pasync_lock,
693*0Sstevel@tonic-gate 		callb_generic_cpr, "seg_pasync");
694*0Sstevel@tonic-gate 
695*0Sstevel@tonic-gate 	if (seg_preap_interval == 0) {
696*0Sstevel@tonic-gate 		seg_preap_interval = seg_preap_time * hz;
697*0Sstevel@tonic-gate 	} else {
698*0Sstevel@tonic-gate 		seg_preap_interval *= hz;
699*0Sstevel@tonic-gate 	}
700*0Sstevel@tonic-gate 	if (seg_plazy && seg_pupdate_active) {
701*0Sstevel@tonic-gate 		(void) timeout(seg_pupdate, NULL, seg_preap_interval);
702*0Sstevel@tonic-gate 	}
703*0Sstevel@tonic-gate 
704*0Sstevel@tonic-gate 	for (;;) {
705*0Sstevel@tonic-gate 		mutex_enter(&pasync_lock);
706*0Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
707*0Sstevel@tonic-gate 		mutex_exit(&pasync_lock);
708*0Sstevel@tonic-gate 		sema_p(&seg_psaync_sem);
709*0Sstevel@tonic-gate 		mutex_enter(&pasync_lock);
710*0Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock);
711*0Sstevel@tonic-gate 		mutex_exit(&pasync_lock);
712*0Sstevel@tonic-gate 
713*0Sstevel@tonic-gate 		seg_ppurge_all(0);
714*0Sstevel@tonic-gate 	}
715*0Sstevel@tonic-gate }
716*0Sstevel@tonic-gate 
717*0Sstevel@tonic-gate static void
718*0Sstevel@tonic-gate seg_pupdate(void *dummy)
719*0Sstevel@tonic-gate {
720*0Sstevel@tonic-gate 	sema_v(&seg_psaync_sem);
721*0Sstevel@tonic-gate 
722*0Sstevel@tonic-gate 	if (seg_plazy && seg_pupdate_active) {
723*0Sstevel@tonic-gate 		(void) timeout(seg_pupdate, dummy, seg_preap_interval);
724*0Sstevel@tonic-gate 	}
725*0Sstevel@tonic-gate }
726*0Sstevel@tonic-gate 
727*0Sstevel@tonic-gate static struct kmem_cache *seg_cache;
728*0Sstevel@tonic-gate 
729*0Sstevel@tonic-gate /*
730*0Sstevel@tonic-gate  * Initialize segment management data structures.
731*0Sstevel@tonic-gate  */
732*0Sstevel@tonic-gate void
733*0Sstevel@tonic-gate seg_init(void)
734*0Sstevel@tonic-gate {
735*0Sstevel@tonic-gate 	kstat_t *ksp;
736*0Sstevel@tonic-gate 
737*0Sstevel@tonic-gate 	seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
738*0Sstevel@tonic-gate 		0, NULL, NULL, NULL, NULL, NULL, 0);
739*0Sstevel@tonic-gate 
740*0Sstevel@tonic-gate 	ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
741*0Sstevel@tonic-gate 		segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
742*0Sstevel@tonic-gate 	if (ksp) {
743*0Sstevel@tonic-gate 		ksp->ks_data = (void *)segadvstat_ptr;
744*0Sstevel@tonic-gate 		kstat_install(ksp);
745*0Sstevel@tonic-gate 	}
746*0Sstevel@tonic-gate 
747*0Sstevel@tonic-gate 	seg_pinit();
748*0Sstevel@tonic-gate }
749*0Sstevel@tonic-gate 
750*0Sstevel@tonic-gate /*
751*0Sstevel@tonic-gate  * Allocate a segment to cover [base, base+size]
752*0Sstevel@tonic-gate  * and attach it to the specified address space.
753*0Sstevel@tonic-gate  */
754*0Sstevel@tonic-gate struct seg *
755*0Sstevel@tonic-gate seg_alloc(struct as *as, caddr_t base, size_t size)
756*0Sstevel@tonic-gate {
757*0Sstevel@tonic-gate 	struct seg *new;
758*0Sstevel@tonic-gate 	caddr_t segbase;
759*0Sstevel@tonic-gate 	size_t segsize;
760*0Sstevel@tonic-gate 
761*0Sstevel@tonic-gate 	segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
762*0Sstevel@tonic-gate 	segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
763*0Sstevel@tonic-gate 	    (uintptr_t)segbase;
764*0Sstevel@tonic-gate 
765*0Sstevel@tonic-gate 	if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
766*0Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
767*0Sstevel@tonic-gate 
768*0Sstevel@tonic-gate 	if (as != &kas &&
769*0Sstevel@tonic-gate 	    valid_usr_range(segbase, segsize, 0, as,
770*0Sstevel@tonic-gate 	    as->a_userlimit) != RANGE_OKAY)
771*0Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
772*0Sstevel@tonic-gate 
773*0Sstevel@tonic-gate 	new = kmem_cache_alloc(seg_cache, KM_SLEEP);
774*0Sstevel@tonic-gate 	new->s_ops = NULL;
775*0Sstevel@tonic-gate 	new->s_data = NULL;
776*0Sstevel@tonic-gate 	new->s_szc = 0;
777*0Sstevel@tonic-gate 	new->s_flags = 0;
778*0Sstevel@tonic-gate 	if (seg_attach(as, segbase, segsize, new) < 0) {
779*0Sstevel@tonic-gate 		kmem_cache_free(seg_cache, new);
780*0Sstevel@tonic-gate 		return ((struct seg *)NULL);
781*0Sstevel@tonic-gate 	}
782*0Sstevel@tonic-gate 	/* caller must fill in ops, data */
783*0Sstevel@tonic-gate 	return (new);
784*0Sstevel@tonic-gate }
785*0Sstevel@tonic-gate 
786*0Sstevel@tonic-gate /*
787*0Sstevel@tonic-gate  * Attach a segment to the address space.  Used by seg_alloc()
788*0Sstevel@tonic-gate  * and for kernel startup to attach to static segments.
789*0Sstevel@tonic-gate  */
790*0Sstevel@tonic-gate int
791*0Sstevel@tonic-gate seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
792*0Sstevel@tonic-gate {
793*0Sstevel@tonic-gate 	seg->s_as = as;
794*0Sstevel@tonic-gate 	seg->s_base = base;
795*0Sstevel@tonic-gate 	seg->s_size = size;
796*0Sstevel@tonic-gate 
797*0Sstevel@tonic-gate 	/*
798*0Sstevel@tonic-gate 	 * as_addseg() will add the segment at the appropraite point
799*0Sstevel@tonic-gate 	 * in the list. It will return -1 if there is overlap with
800*0Sstevel@tonic-gate 	 * an already existing segment.
801*0Sstevel@tonic-gate 	 */
802*0Sstevel@tonic-gate 	return (as_addseg(as, seg));
803*0Sstevel@tonic-gate }
804*0Sstevel@tonic-gate 
805*0Sstevel@tonic-gate /*
806*0Sstevel@tonic-gate  * Unmap a segment and free it from its associated address space.
807*0Sstevel@tonic-gate  * This should be called by anybody who's finished with a whole segment's
808*0Sstevel@tonic-gate  * mapping.  Just calls SEGOP_UNMAP() on the whole mapping .  It is the
809*0Sstevel@tonic-gate  * responsibility of the segment driver to unlink the the segment
810*0Sstevel@tonic-gate  * from the address space, and to free public and private data structures
811*0Sstevel@tonic-gate  * associated with the segment.  (This is typically done by a call to
812*0Sstevel@tonic-gate  * seg_free()).
813*0Sstevel@tonic-gate  */
814*0Sstevel@tonic-gate void
815*0Sstevel@tonic-gate seg_unmap(struct seg *seg)
816*0Sstevel@tonic-gate {
817*0Sstevel@tonic-gate #ifdef DEBUG
818*0Sstevel@tonic-gate 	int ret;
819*0Sstevel@tonic-gate #endif /* DEBUG */
820*0Sstevel@tonic-gate 
821*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
822*0Sstevel@tonic-gate 
823*0Sstevel@tonic-gate 	/* Shouldn't have called seg_unmap if mapping isn't yet established */
824*0Sstevel@tonic-gate 	ASSERT(seg->s_data != NULL);
825*0Sstevel@tonic-gate 
826*0Sstevel@tonic-gate 	/* Unmap the whole mapping */
827*0Sstevel@tonic-gate #ifdef DEBUG
828*0Sstevel@tonic-gate 	ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
829*0Sstevel@tonic-gate 	ASSERT(ret == 0);
830*0Sstevel@tonic-gate #else
831*0Sstevel@tonic-gate 	SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
832*0Sstevel@tonic-gate #endif /* DEBUG */
833*0Sstevel@tonic-gate }
834*0Sstevel@tonic-gate 
835*0Sstevel@tonic-gate /*
836*0Sstevel@tonic-gate  * Free the segment from its associated as. This should only be called
837*0Sstevel@tonic-gate  * if a mapping to the segment has not yet been established (e.g., if
838*0Sstevel@tonic-gate  * an error occurs in the middle of doing an as_map when the segment
839*0Sstevel@tonic-gate  * has already been partially set up) or if it has already been deleted
840*0Sstevel@tonic-gate  * (e.g., from a segment driver unmap routine if the unmap applies to the
841*0Sstevel@tonic-gate  * entire segment). If the mapping is currently set up then seg_unmap() should
842*0Sstevel@tonic-gate  * be called instead.
843*0Sstevel@tonic-gate  */
844*0Sstevel@tonic-gate void
845*0Sstevel@tonic-gate seg_free(struct seg *seg)
846*0Sstevel@tonic-gate {
847*0Sstevel@tonic-gate 	register struct as *as = seg->s_as;
848*0Sstevel@tonic-gate 	struct seg *tseg = as_removeseg(as, seg);
849*0Sstevel@tonic-gate 
850*0Sstevel@tonic-gate 	ASSERT(tseg == seg);
851*0Sstevel@tonic-gate 
852*0Sstevel@tonic-gate 	/*
853*0Sstevel@tonic-gate 	 * If the segment private data field is NULL,
854*0Sstevel@tonic-gate 	 * then segment driver is not attached yet.
855*0Sstevel@tonic-gate 	 */
856*0Sstevel@tonic-gate 	if (seg->s_data != NULL)
857*0Sstevel@tonic-gate 		SEGOP_FREE(seg);
858*0Sstevel@tonic-gate 
859*0Sstevel@tonic-gate 	kmem_cache_free(seg_cache, seg);
860*0Sstevel@tonic-gate }
861*0Sstevel@tonic-gate 
862*0Sstevel@tonic-gate /*ARGSUSED*/
863*0Sstevel@tonic-gate static void
864*0Sstevel@tonic-gate seg_p_mem_config_post_add(
865*0Sstevel@tonic-gate 	void *arg,
866*0Sstevel@tonic-gate 	pgcnt_t delta_pages)
867*0Sstevel@tonic-gate {
868*0Sstevel@tonic-gate 	/* Nothing to do. */
869*0Sstevel@tonic-gate }
870*0Sstevel@tonic-gate 
871*0Sstevel@tonic-gate /*
872*0Sstevel@tonic-gate  * Attempt to purge seg_pcache.  May need to return before this has
873*0Sstevel@tonic-gate  * completed to allow other pre_del callbacks to unlock pages. This is
874*0Sstevel@tonic-gate  * ok because:
875*0Sstevel@tonic-gate  *	1) The seg_pdisable flag has been set so at least we won't
876*0Sstevel@tonic-gate  *	cache anymore locks and the locks we couldn't purge
877*0Sstevel@tonic-gate  *	will not be held if they do get released by a subsequent
878*0Sstevel@tonic-gate  *	pre-delete callback.
879*0Sstevel@tonic-gate  *
880*0Sstevel@tonic-gate  *	2) The rest of the memory delete thread processing does not
881*0Sstevel@tonic-gate  *	depend on the changes made in this pre-delete callback. No
882*0Sstevel@tonic-gate  *	panics will result, the worst that will happen is that the
883*0Sstevel@tonic-gate  *	DR code will timeout and cancel the delete.
884*0Sstevel@tonic-gate  */
885*0Sstevel@tonic-gate /*ARGSUSED*/
886*0Sstevel@tonic-gate static int
887*0Sstevel@tonic-gate seg_p_mem_config_pre_del(
888*0Sstevel@tonic-gate 	void *arg,
889*0Sstevel@tonic-gate 	pgcnt_t delta_pages)
890*0Sstevel@tonic-gate {
891*0Sstevel@tonic-gate 	pgcnt_t	old_plocked;
892*0Sstevel@tonic-gate 	int stall_count = 0;
893*0Sstevel@tonic-gate 
894*0Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
895*0Sstevel@tonic-gate 	seg_pdisable++;
896*0Sstevel@tonic-gate 	ASSERT(seg_pdisable != 0);
897*0Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
898*0Sstevel@tonic-gate 
899*0Sstevel@tonic-gate 	/*
900*0Sstevel@tonic-gate 	 * Attempt to empty the cache. Terminate if seg_plocked does not
901*0Sstevel@tonic-gate 	 * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
902*0Sstevel@tonic-gate 	 */
903*0Sstevel@tonic-gate 	while (seg_plocked != 0) {
904*0Sstevel@tonic-gate 		old_plocked = seg_plocked;
905*0Sstevel@tonic-gate 		seg_ppurge_all(1);
906*0Sstevel@tonic-gate 		if (seg_plocked == old_plocked) {
907*0Sstevel@tonic-gate 			if (stall_count++ > SEGP_STALL_THRESHOLD) {
908*0Sstevel@tonic-gate 				cmn_err(CE_NOTE, "!Pre-delete couldn't purge"
909*0Sstevel@tonic-gate 					" pagelock cache - continuing");
910*0Sstevel@tonic-gate 				break;
911*0Sstevel@tonic-gate 			}
912*0Sstevel@tonic-gate 		} else
913*0Sstevel@tonic-gate 			stall_count = 0;
914*0Sstevel@tonic-gate 		if (seg_plocked != 0)
915*0Sstevel@tonic-gate 			delay(hz/SEGP_PREDEL_DELAY_FACTOR);
916*0Sstevel@tonic-gate 	}
917*0Sstevel@tonic-gate 	return (0);
918*0Sstevel@tonic-gate }
919*0Sstevel@tonic-gate 
920*0Sstevel@tonic-gate /*ARGSUSED*/
921*0Sstevel@tonic-gate static void
922*0Sstevel@tonic-gate seg_p_mem_config_post_del(
923*0Sstevel@tonic-gate 	void *arg,
924*0Sstevel@tonic-gate 	pgcnt_t delta_pages,
925*0Sstevel@tonic-gate 	int cancelled)
926*0Sstevel@tonic-gate {
927*0Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
928*0Sstevel@tonic-gate 	ASSERT(seg_pdisable != 0);
929*0Sstevel@tonic-gate 	seg_pdisable--;
930*0Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
931*0Sstevel@tonic-gate }
932*0Sstevel@tonic-gate 
933*0Sstevel@tonic-gate static kphysm_setup_vector_t seg_p_mem_config_vec = {
934*0Sstevel@tonic-gate 	KPHYSM_SETUP_VECTOR_VERSION,
935*0Sstevel@tonic-gate 	seg_p_mem_config_post_add,
936*0Sstevel@tonic-gate 	seg_p_mem_config_pre_del,
937*0Sstevel@tonic-gate 	seg_p_mem_config_post_del,
938*0Sstevel@tonic-gate };
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate static void
941*0Sstevel@tonic-gate seg_pinit_mem_config(void)
942*0Sstevel@tonic-gate {
943*0Sstevel@tonic-gate 	int ret;
944*0Sstevel@tonic-gate 
945*0Sstevel@tonic-gate 	ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
946*0Sstevel@tonic-gate 	/*
947*0Sstevel@tonic-gate 	 * Want to catch this in the debug kernel. At run time, if the
948*0Sstevel@tonic-gate 	 * callbacks don't get run all will be OK as the disable just makes
949*0Sstevel@tonic-gate 	 * it more likely that the pages can be collected.
950*0Sstevel@tonic-gate 	 */
951*0Sstevel@tonic-gate 	ASSERT(ret == 0);
952*0Sstevel@tonic-gate }
953