xref: /onnv-gate/usr/src/uts/common/vm/vm_seg.c (revision 3247)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*3247Sgjelinek  * Common Development and Distribution License (the "License").
6*3247Sgjelinek  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
22*3247Sgjelinek  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
310Sstevel@tonic-gate  * The Regents of the University of California
320Sstevel@tonic-gate  * All Rights Reserved
330Sstevel@tonic-gate  *
340Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
350Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
360Sstevel@tonic-gate  * contributors.
370Sstevel@tonic-gate  */
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
400Sstevel@tonic-gate 
410Sstevel@tonic-gate /*
420Sstevel@tonic-gate  * VM - segment management.
430Sstevel@tonic-gate  */
440Sstevel@tonic-gate 
450Sstevel@tonic-gate #include <sys/types.h>
460Sstevel@tonic-gate #include <sys/inttypes.h>
470Sstevel@tonic-gate #include <sys/t_lock.h>
480Sstevel@tonic-gate #include <sys/param.h>
490Sstevel@tonic-gate #include <sys/systm.h>
500Sstevel@tonic-gate #include <sys/kmem.h>
510Sstevel@tonic-gate #include <sys/vmsystm.h>
520Sstevel@tonic-gate #include <sys/debug.h>
530Sstevel@tonic-gate #include <sys/cmn_err.h>
540Sstevel@tonic-gate #include <sys/callb.h>
550Sstevel@tonic-gate #include <sys/mem_config.h>
56*3247Sgjelinek #include <sys/mman.h>
570Sstevel@tonic-gate 
580Sstevel@tonic-gate #include <vm/hat.h>
590Sstevel@tonic-gate #include <vm/as.h>
600Sstevel@tonic-gate #include <vm/seg.h>
610Sstevel@tonic-gate #include <vm/seg_kmem.h>
62*3247Sgjelinek #include <vm/seg_spt.h>
63*3247Sgjelinek #include <vm/seg_vn.h>
640Sstevel@tonic-gate /*
650Sstevel@tonic-gate  * kstats for segment advise
660Sstevel@tonic-gate  */
670Sstevel@tonic-gate segadvstat_t segadvstat = {
680Sstevel@tonic-gate 	{ "MADV_FREE_hit",	KSTAT_DATA_ULONG },
690Sstevel@tonic-gate 	{ "MADV_FREE_miss",	KSTAT_DATA_ULONG },
700Sstevel@tonic-gate };
710Sstevel@tonic-gate 
720Sstevel@tonic-gate kstat_named_t *segadvstat_ptr = (kstat_named_t *)&segadvstat;
730Sstevel@tonic-gate uint_t segadvstat_ndata = sizeof (segadvstat) / sizeof (kstat_named_t);
740Sstevel@tonic-gate 
750Sstevel@tonic-gate /* #define	PDEBUG */
760Sstevel@tonic-gate #if defined(PDEBUG) || defined(lint) || defined(__lint)
770Sstevel@tonic-gate int pdebug = 0;
780Sstevel@tonic-gate #else
790Sstevel@tonic-gate #define	pdebug		0
800Sstevel@tonic-gate #endif	/* PDEBUG */
810Sstevel@tonic-gate 
820Sstevel@tonic-gate #define	PPRINTF				if (pdebug) printf
830Sstevel@tonic-gate #define	PPRINT(x)			PPRINTF(x)
840Sstevel@tonic-gate #define	PPRINT1(x, a)			PPRINTF(x, a)
850Sstevel@tonic-gate #define	PPRINT2(x, a, b)		PPRINTF(x, a, b)
860Sstevel@tonic-gate #define	PPRINT3(x, a, b, c)		PPRINTF(x, a, b, c)
870Sstevel@tonic-gate #define	PPRINT4(x, a, b, c, d)		PPRINTF(x, a, b, c, d)
880Sstevel@tonic-gate #define	PPRINT5(x, a, b, c, d, e)	PPRINTF(x, a, b, c, d, e)
890Sstevel@tonic-gate 
900Sstevel@tonic-gate #define	P_HASHMASK		(p_hashsize - 1)
910Sstevel@tonic-gate #define	P_BASESHIFT		6
920Sstevel@tonic-gate 
930Sstevel@tonic-gate /*
940Sstevel@tonic-gate  * entry in the segment page cache
950Sstevel@tonic-gate  */
960Sstevel@tonic-gate struct seg_pcache {
970Sstevel@tonic-gate 	struct seg_pcache *p_hnext;	/* list for hashed blocks */
980Sstevel@tonic-gate 	struct seg_pcache *p_hprev;
990Sstevel@tonic-gate 	int		p_active;	/* active count */
1000Sstevel@tonic-gate 	int		p_ref;		/* ref bit */
1010Sstevel@tonic-gate 	size_t		p_len;		/* segment length */
1020Sstevel@tonic-gate 	caddr_t		p_addr;		/* base address */
1030Sstevel@tonic-gate 	struct seg 	*p_seg;		/* segment */
1040Sstevel@tonic-gate 	struct page	**p_pp;		/* pp shadow list */
1050Sstevel@tonic-gate 	enum seg_rw	p_rw;		/* rw */
1060Sstevel@tonic-gate 	uint_t		p_flags;	/* bit flags */
1070Sstevel@tonic-gate 	int		(*p_callback)(struct seg *, caddr_t, size_t,
1080Sstevel@tonic-gate 			    struct page **, enum seg_rw);
1090Sstevel@tonic-gate };
1100Sstevel@tonic-gate 
1110Sstevel@tonic-gate struct seg_phash {
1120Sstevel@tonic-gate 	struct seg_pcache *p_hnext;	/* list for hashed blocks */
1130Sstevel@tonic-gate 	struct seg_pcache *p_hprev;
1140Sstevel@tonic-gate 	int p_qlen;			/* Q length */
1150Sstevel@tonic-gate 	kmutex_t p_hmutex;		/* protects hash bucket */
1160Sstevel@tonic-gate };
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate static int seg_preap_time = 20;	/* reclaim every 20 secs */
1190Sstevel@tonic-gate static int seg_pmaxqlen = 5;	/* max Q length in hash list */
1200Sstevel@tonic-gate static int seg_ppcount = 5;	/* max # of purges per reclaim interval */
1210Sstevel@tonic-gate static int seg_plazy = 1;	/* if 1, pages are cached after pageunlock */
1220Sstevel@tonic-gate static pgcnt_t seg_pwindow;	/* max # of pages that can be cached */
1230Sstevel@tonic-gate static pgcnt_t seg_plocked;	/* # of pages which are cached by pagelock */
1240Sstevel@tonic-gate static pgcnt_t seg_plocked_window; /* # pages from window */
1250Sstevel@tonic-gate int seg_preapahead;
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate static uint_t seg_pdisable = 0;	/* if not 0, caching temporarily disabled */
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate static int seg_pupdate_active = 1;	/* background reclaim thread */
1300Sstevel@tonic-gate static clock_t seg_preap_interval;	/* reap interval in ticks */
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate static kmutex_t seg_pcache;	/* protects the whole pagelock cache */
1330Sstevel@tonic-gate static kmutex_t seg_pmem;	/* protects window counter */
1340Sstevel@tonic-gate static ksema_t seg_psaync_sem;	/* sema for reclaim thread */
1350Sstevel@tonic-gate static struct seg_phash *p_hashtab;
1360Sstevel@tonic-gate static int p_hashsize = 0;
1370Sstevel@tonic-gate 
1380Sstevel@tonic-gate #define	p_hash(seg) \
1390Sstevel@tonic-gate 	(P_HASHMASK & \
1400Sstevel@tonic-gate 	((uintptr_t)(seg) >> P_BASESHIFT))
1410Sstevel@tonic-gate 
1420Sstevel@tonic-gate #define	p_match(pcp, seg, addr, len, rw) \
1430Sstevel@tonic-gate 	(((pcp)->p_seg == (seg) && \
1440Sstevel@tonic-gate 	(pcp)->p_addr == (addr) && \
1450Sstevel@tonic-gate 	(pcp)->p_rw == (rw) && \
1460Sstevel@tonic-gate 	(pcp)->p_len == (len)) ? 1 : 0)
1470Sstevel@tonic-gate 
1480Sstevel@tonic-gate #define	p_match_pp(pcp, seg, addr, len, pp, rw) \
1490Sstevel@tonic-gate 	(((pcp)->p_seg == (seg) && \
1500Sstevel@tonic-gate 	(pcp)->p_addr == (addr) && \
1510Sstevel@tonic-gate 	(pcp)->p_pp == (pp) && \
1520Sstevel@tonic-gate 	(pcp)->p_rw == (rw) && \
1530Sstevel@tonic-gate 	(pcp)->p_len == (len)) ? 1 : 0)
1540Sstevel@tonic-gate 
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate /*
1570Sstevel@tonic-gate  * lookup an address range in pagelock cache. Return shadow list
1580Sstevel@tonic-gate  * and bump up active count.
1590Sstevel@tonic-gate  */
1600Sstevel@tonic-gate struct page **
1610Sstevel@tonic-gate seg_plookup(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1620Sstevel@tonic-gate {
1630Sstevel@tonic-gate 	struct seg_pcache *pcp;
1640Sstevel@tonic-gate 	struct seg_phash *hp;
1650Sstevel@tonic-gate 
1660Sstevel@tonic-gate 	/*
1670Sstevel@tonic-gate 	 * Skip pagelock cache, while DR is in progress or
1680Sstevel@tonic-gate 	 * seg_pcache is off.
1690Sstevel@tonic-gate 	 */
1700Sstevel@tonic-gate 	if (seg_pdisable || seg_plazy == 0) {
1710Sstevel@tonic-gate 		return (NULL);
1720Sstevel@tonic-gate 	}
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
1750Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
1760Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
1770Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
1780Sstevel@tonic-gate 		if (p_match(pcp, seg, addr, len, rw)) {
1790Sstevel@tonic-gate 			pcp->p_active++;
1800Sstevel@tonic-gate 			mutex_exit(&hp->p_hmutex);
1810Sstevel@tonic-gate 
1820Sstevel@tonic-gate 			PPRINT5("seg_plookup hit: seg %p, addr %p, "
1830Sstevel@tonic-gate 			    "len %lx, count %d, pplist %p \n",
1840Sstevel@tonic-gate 			    (void *)seg, (void *)addr, len, pcp->p_active,
1850Sstevel@tonic-gate 			    (void *)pcp->p_pp);
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate 			return (pcp->p_pp);
1880Sstevel@tonic-gate 		}
1890Sstevel@tonic-gate 	}
1900Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate 	PPRINT("seg_plookup miss:\n");
1930Sstevel@tonic-gate 
1940Sstevel@tonic-gate 	return (NULL);
1950Sstevel@tonic-gate }
1960Sstevel@tonic-gate 
1970Sstevel@tonic-gate /*
1980Sstevel@tonic-gate  * mark address range inactive. If the cache is off or the address
1990Sstevel@tonic-gate  * range is not in the cache we call the segment driver to reclaim
2000Sstevel@tonic-gate  * the pages. Otherwise just decrement active count and set ref bit.
2010Sstevel@tonic-gate  */
2020Sstevel@tonic-gate void
2030Sstevel@tonic-gate seg_pinactive(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
2040Sstevel@tonic-gate     enum seg_rw rw, int (*callback)(struct seg *, caddr_t, size_t,
2050Sstevel@tonic-gate     struct page **, enum seg_rw))
2060Sstevel@tonic-gate {
2070Sstevel@tonic-gate 	struct seg_pcache *pcp;
2080Sstevel@tonic-gate 	struct seg_phash *hp;
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	if (seg_plazy == 0) {
2110Sstevel@tonic-gate 		(void) (*callback)(seg, addr, len, pp, rw);
2120Sstevel@tonic-gate 		return;
2130Sstevel@tonic-gate 	}
2140Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
2150Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
2160Sstevel@tonic-gate 	for (pcp = hp->p_hnext; pcp != (struct seg_pcache *)hp;
2170Sstevel@tonic-gate 	    pcp = pcp->p_hnext) {
2180Sstevel@tonic-gate 		if (p_match_pp(pcp, seg, addr, len, pp, rw)) {
2190Sstevel@tonic-gate 			pcp->p_active--;
2200Sstevel@tonic-gate 			ASSERT(pcp->p_active >= 0);
2210Sstevel@tonic-gate 			if (pcp->p_active == 0 && seg_pdisable) {
2220Sstevel@tonic-gate 				int npages;
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 				ASSERT(callback == pcp->p_callback);
2250Sstevel@tonic-gate 				/* free the entry */
2260Sstevel@tonic-gate 				hp->p_qlen--;
2270Sstevel@tonic-gate 				pcp->p_hprev->p_hnext = pcp->p_hnext;
2280Sstevel@tonic-gate 				pcp->p_hnext->p_hprev = pcp->p_hprev;
2290Sstevel@tonic-gate 				mutex_exit(&hp->p_hmutex);
2300Sstevel@tonic-gate 				npages = pcp->p_len >> PAGESHIFT;
2310Sstevel@tonic-gate 				mutex_enter(&seg_pmem);
2320Sstevel@tonic-gate 				seg_plocked -= npages;
2330Sstevel@tonic-gate 				if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
2340Sstevel@tonic-gate 					seg_plocked_window -= npages;
2350Sstevel@tonic-gate 				}
2360Sstevel@tonic-gate 				mutex_exit(&seg_pmem);
2370Sstevel@tonic-gate 				kmem_free(pcp, sizeof (struct seg_pcache));
2380Sstevel@tonic-gate 				goto out;
2390Sstevel@tonic-gate 			}
2400Sstevel@tonic-gate 			pcp->p_ref = 1;
2410Sstevel@tonic-gate 			mutex_exit(&hp->p_hmutex);
2420Sstevel@tonic-gate 			return;
2430Sstevel@tonic-gate 		}
2440Sstevel@tonic-gate 	}
2450Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
2460Sstevel@tonic-gate out:
2470Sstevel@tonic-gate 	(void) (*callback)(seg, addr, len, pp, rw);
2480Sstevel@tonic-gate }
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate /*
2510Sstevel@tonic-gate  * The seg_pinsert_check() is used by segment drivers to predict whether
2520Sstevel@tonic-gate  * a call to seg_pinsert will fail and thereby avoid wasteful pre-processing.
2530Sstevel@tonic-gate  */
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate int
2560Sstevel@tonic-gate seg_pinsert_check(struct seg *seg, size_t len, uint_t flags)
2570Sstevel@tonic-gate {
2580Sstevel@tonic-gate 	struct seg_phash *hp;
2590Sstevel@tonic-gate 
2600Sstevel@tonic-gate 	if (seg_plazy == 0) {
2610Sstevel@tonic-gate 		return (SEGP_FAIL);
2620Sstevel@tonic-gate 	}
2630Sstevel@tonic-gate 	if (seg_pdisable != 0) {
2640Sstevel@tonic-gate 		return (SEGP_FAIL);
2650Sstevel@tonic-gate 	}
2660Sstevel@tonic-gate 	ASSERT((len & PAGEOFFSET) == 0);
2670Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
2680Sstevel@tonic-gate 	if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
2690Sstevel@tonic-gate 		return (SEGP_FAIL);
2700Sstevel@tonic-gate 	}
2710Sstevel@tonic-gate 	/*
2720Sstevel@tonic-gate 	 * If the SEGP_FORCE_WIRED flag is set,
2730Sstevel@tonic-gate 	 * we skip the check for seg_pwindow.
2740Sstevel@tonic-gate 	 */
2750Sstevel@tonic-gate 	if ((flags & SEGP_FORCE_WIRED) == 0) {
2760Sstevel@tonic-gate 		pgcnt_t npages;
2770Sstevel@tonic-gate 
2780Sstevel@tonic-gate 		npages = len >> PAGESHIFT;
2790Sstevel@tonic-gate 		if ((seg_plocked_window + npages) > seg_pwindow) {
2800Sstevel@tonic-gate 			return (SEGP_FAIL);
2810Sstevel@tonic-gate 		}
2820Sstevel@tonic-gate 	}
2830Sstevel@tonic-gate 	return (SEGP_SUCCESS);
2840Sstevel@tonic-gate }
2850Sstevel@tonic-gate 
2860Sstevel@tonic-gate 
2870Sstevel@tonic-gate /*
2880Sstevel@tonic-gate  * insert address range with shadow list into pagelock cache. If
2890Sstevel@tonic-gate  * the cache is off or caching is temporarily disabled or the allowed
2900Sstevel@tonic-gate  * 'window' is exceeded - return SEGP_FAIL. Otherwise return
2910Sstevel@tonic-gate  * SEGP_SUCCESS.
2920Sstevel@tonic-gate  */
2930Sstevel@tonic-gate int
2940Sstevel@tonic-gate seg_pinsert(struct seg *seg, caddr_t addr, size_t len, struct page **pp,
2950Sstevel@tonic-gate     enum seg_rw rw, uint_t flags, int (*callback)(struct seg *, caddr_t,
2960Sstevel@tonic-gate     size_t, struct page **, enum seg_rw))
2970Sstevel@tonic-gate {
2980Sstevel@tonic-gate 	struct seg_pcache *pcp;
2990Sstevel@tonic-gate 	struct seg_phash *hp;
3000Sstevel@tonic-gate 	pgcnt_t npages;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	if (seg_plazy == 0) {
3030Sstevel@tonic-gate 		return (SEGP_FAIL);
3040Sstevel@tonic-gate 	}
3050Sstevel@tonic-gate 	if (seg_pdisable != 0) {
3060Sstevel@tonic-gate 		return (SEGP_FAIL);
3070Sstevel@tonic-gate 	}
3080Sstevel@tonic-gate 	ASSERT((len & PAGEOFFSET) == 0);
3090Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
3100Sstevel@tonic-gate 	if (hp->p_qlen > seg_pmaxqlen && (flags & SEGP_FORCE_WIRED) == 0) {
3110Sstevel@tonic-gate 		return (SEGP_FAIL);
3120Sstevel@tonic-gate 	}
3130Sstevel@tonic-gate 	npages = len >> PAGESHIFT;
3140Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
3150Sstevel@tonic-gate 	/*
3160Sstevel@tonic-gate 	 * If the SEGP_FORCE_WIRED flag is set,
3170Sstevel@tonic-gate 	 * we skip the check for seg_pwindow.
3180Sstevel@tonic-gate 	 */
3190Sstevel@tonic-gate 	if ((flags & SEGP_FORCE_WIRED) == 0) {
3200Sstevel@tonic-gate 		seg_plocked_window += npages;
3210Sstevel@tonic-gate 		if (seg_plocked_window > seg_pwindow) {
3220Sstevel@tonic-gate 			seg_plocked_window -= npages;
3230Sstevel@tonic-gate 			mutex_exit(&seg_pmem);
3240Sstevel@tonic-gate 			return (SEGP_FAIL);
3250Sstevel@tonic-gate 		}
3260Sstevel@tonic-gate 	}
3270Sstevel@tonic-gate 	seg_plocked += npages;
3280Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate 	pcp = kmem_alloc(sizeof (struct seg_pcache), KM_SLEEP);
3310Sstevel@tonic-gate 	pcp->p_seg = seg;
3320Sstevel@tonic-gate 	pcp->p_addr = addr;
3330Sstevel@tonic-gate 	pcp->p_len = len;
3340Sstevel@tonic-gate 	pcp->p_pp = pp;
3350Sstevel@tonic-gate 	pcp->p_rw = rw;
3360Sstevel@tonic-gate 	pcp->p_callback = callback;
3370Sstevel@tonic-gate 	pcp->p_active = 1;
3380Sstevel@tonic-gate 	pcp->p_flags = flags;
3390Sstevel@tonic-gate 
3400Sstevel@tonic-gate 	PPRINT4("seg_pinsert: seg %p, addr %p, len %lx, pplist %p\n",
3410Sstevel@tonic-gate 	    (void *)seg, (void *)addr, len, (void *)pp);
3420Sstevel@tonic-gate 
3430Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
3440Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
3450Sstevel@tonic-gate 	hp->p_qlen++;
3460Sstevel@tonic-gate 	pcp->p_hnext = hp->p_hnext;
3470Sstevel@tonic-gate 	pcp->p_hprev = (struct seg_pcache *)hp;
3480Sstevel@tonic-gate 	hp->p_hnext->p_hprev = pcp;
3490Sstevel@tonic-gate 	hp->p_hnext = pcp;
3500Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
3510Sstevel@tonic-gate 	return (SEGP_SUCCESS);
3520Sstevel@tonic-gate }
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate  * purge all entries from the pagelock cache if not active
3560Sstevel@tonic-gate  * and not recently used. Drop all locks and call through
3570Sstevel@tonic-gate  * the address space into the segment driver to reclaim
3580Sstevel@tonic-gate  * the pages. This makes sure we get the address space
3590Sstevel@tonic-gate  * and segment driver locking right.
3600Sstevel@tonic-gate  */
3610Sstevel@tonic-gate static void
3620Sstevel@tonic-gate seg_ppurge_all(int force)
3630Sstevel@tonic-gate {
3640Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
3650Sstevel@tonic-gate 	struct seg_pcache *pcp;
3660Sstevel@tonic-gate 	struct seg_phash *hp;
3670Sstevel@tonic-gate 	int purge_count = 0;
3680Sstevel@tonic-gate 	pgcnt_t npages = 0;
3690Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
3700Sstevel@tonic-gate 
3710Sstevel@tonic-gate 	/*
3720Sstevel@tonic-gate 	 * if the cache if off or empty, return
3730Sstevel@tonic-gate 	 */
3740Sstevel@tonic-gate 	if (seg_plazy == 0 || seg_plocked == 0) {
3750Sstevel@tonic-gate 		return;
3760Sstevel@tonic-gate 	}
3770Sstevel@tonic-gate 	for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
3780Sstevel@tonic-gate 		mutex_enter(&hp->p_hmutex);
3790Sstevel@tonic-gate 		pcp = hp->p_hnext;
3800Sstevel@tonic-gate 
3810Sstevel@tonic-gate 		/*
3820Sstevel@tonic-gate 		 * While 'force' is set, seg_pasync_thread is not
3830Sstevel@tonic-gate 		 * throttled.  This is to speedup flushing of seg_pcache
3840Sstevel@tonic-gate 		 * in preparation for DR.
3850Sstevel@tonic-gate 		 *
3860Sstevel@tonic-gate 		 * In normal case, when 'force' is not set, we throttle
3870Sstevel@tonic-gate 		 * seg_pasync_thread so that we don't spend all the time
3880Sstevel@tonic-gate 		 * time in purging the cache.
3890Sstevel@tonic-gate 		 */
3900Sstevel@tonic-gate 		while ((pcp != (struct seg_pcache *)hp) &&
3910Sstevel@tonic-gate 				(force || (purge_count <= seg_ppcount))) {
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 			/*
3940Sstevel@tonic-gate 			 * purge entries which are not active and
3950Sstevel@tonic-gate 			 * have not been used recently and
3960Sstevel@tonic-gate 			 * have the SEGP_ASYNC_FLUSH flag.
3970Sstevel@tonic-gate 			 *
3980Sstevel@tonic-gate 			 * In the 'force' case, we ignore the
3990Sstevel@tonic-gate 			 * SEGP_ASYNC_FLUSH flag.
4000Sstevel@tonic-gate 			 */
4010Sstevel@tonic-gate 			if (!(pcp->p_flags & SEGP_ASYNC_FLUSH))
4020Sstevel@tonic-gate 				pcp->p_ref = 1;
4030Sstevel@tonic-gate 			if (force)
4040Sstevel@tonic-gate 				pcp->p_ref = 0;
4050Sstevel@tonic-gate 			if (!pcp->p_ref && !pcp->p_active) {
4060Sstevel@tonic-gate 				struct as *as = pcp->p_seg->s_as;
4070Sstevel@tonic-gate 
4080Sstevel@tonic-gate 				/*
4090Sstevel@tonic-gate 				 * try to get the readers lock on the address
4100Sstevel@tonic-gate 				 * space before taking out the cache element.
4110Sstevel@tonic-gate 				 * This ensures as_pagereclaim() can actually
4120Sstevel@tonic-gate 				 * call through the address space and free
4130Sstevel@tonic-gate 				 * the pages. If we don't get the lock, just
4140Sstevel@tonic-gate 				 * skip this entry. The pages will be reclaimed
4150Sstevel@tonic-gate 				 * by the segment driver at unmap time.
4160Sstevel@tonic-gate 				 */
4170Sstevel@tonic-gate 				if (AS_LOCK_TRYENTER(as, &as->a_lock,
4180Sstevel@tonic-gate 				    RW_READER)) {
4190Sstevel@tonic-gate 					hp->p_qlen--;
4200Sstevel@tonic-gate 					pcp->p_hprev->p_hnext = pcp->p_hnext;
4210Sstevel@tonic-gate 					pcp->p_hnext->p_hprev = pcp->p_hprev;
4220Sstevel@tonic-gate 					pcp->p_hprev = delcallb_list;
4230Sstevel@tonic-gate 					delcallb_list = pcp;
4240Sstevel@tonic-gate 					purge_count++;
4250Sstevel@tonic-gate 				}
4260Sstevel@tonic-gate 			} else {
4270Sstevel@tonic-gate 				pcp->p_ref = 0;
4280Sstevel@tonic-gate 			}
4290Sstevel@tonic-gate 			pcp = pcp->p_hnext;
4300Sstevel@tonic-gate 		}
4310Sstevel@tonic-gate 		mutex_exit(&hp->p_hmutex);
4320Sstevel@tonic-gate 		if (!force && purge_count > seg_ppcount)
4330Sstevel@tonic-gate 			break;
4340Sstevel@tonic-gate 	}
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	/*
4370Sstevel@tonic-gate 	 * run the delayed callback list. We don't want to hold the
4380Sstevel@tonic-gate 	 * cache lock during a call through the address space.
4390Sstevel@tonic-gate 	 */
4400Sstevel@tonic-gate 	while (delcallb_list != NULL) {
4410Sstevel@tonic-gate 		struct as *as;
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 		pcp = delcallb_list;
4440Sstevel@tonic-gate 		delcallb_list = pcp->p_hprev;
4450Sstevel@tonic-gate 		as = pcp->p_seg->s_as;
4460Sstevel@tonic-gate 
4470Sstevel@tonic-gate 		PPRINT4("seg_ppurge_all: purge seg %p, addr %p, len %lx, "
4480Sstevel@tonic-gate 		    "pplist %p\n", (void *)pcp->p_seg, (void *)pcp->p_addr,
4490Sstevel@tonic-gate 		    pcp->p_len, (void *)pcp->p_pp);
4500Sstevel@tonic-gate 
4510Sstevel@tonic-gate 		as_pagereclaim(as, pcp->p_pp, pcp->p_addr,
4520Sstevel@tonic-gate 		    pcp->p_len, pcp->p_rw);
4530Sstevel@tonic-gate 		AS_LOCK_EXIT(as, &as->a_lock);
4540Sstevel@tonic-gate 		npages += pcp->p_len >> PAGESHIFT;
4550Sstevel@tonic-gate 		if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
4560Sstevel@tonic-gate 			npages_window += pcp->p_len >> PAGESHIFT;
4570Sstevel@tonic-gate 		}
4580Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct seg_pcache));
4590Sstevel@tonic-gate 	}
4600Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
4610Sstevel@tonic-gate 	seg_plocked -= npages;
4620Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
4630Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
4640Sstevel@tonic-gate }
4650Sstevel@tonic-gate 
4660Sstevel@tonic-gate /*
4670Sstevel@tonic-gate  * Remove cached pages for segment(s) entries from hashtable.
4680Sstevel@tonic-gate  * The segments are identified by a given clients callback
4690Sstevel@tonic-gate  * function.
4700Sstevel@tonic-gate  * This is useful for multiple seg's cached on behalf of
4710Sstevel@tonic-gate  * dummy segment (ISM/DISM) with common callback function.
4720Sstevel@tonic-gate  * The clients callback function may return status indicating
4730Sstevel@tonic-gate  * that the last seg's entry has been purged. In such a case
4740Sstevel@tonic-gate  * the seg_ppurge_seg() stops searching hashtable and exits.
4750Sstevel@tonic-gate  * Otherwise all hashtable entries are scanned.
4760Sstevel@tonic-gate  */
4770Sstevel@tonic-gate void
4780Sstevel@tonic-gate seg_ppurge_seg(int (*callback)(struct seg *, caddr_t, size_t,
4790Sstevel@tonic-gate     struct page **, enum seg_rw))
4800Sstevel@tonic-gate {
4810Sstevel@tonic-gate 	struct seg_pcache *pcp, *npcp;
4820Sstevel@tonic-gate 	struct seg_phash *hp;
4830Sstevel@tonic-gate 	pgcnt_t npages = 0;
4840Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
4850Sstevel@tonic-gate 	int	done = 0;
4860Sstevel@tonic-gate 
4870Sstevel@tonic-gate 	/*
4880Sstevel@tonic-gate 	 * if the cache if off or empty, return
4890Sstevel@tonic-gate 	 */
4900Sstevel@tonic-gate 	if (seg_plazy == 0 || seg_plocked == 0) {
4910Sstevel@tonic-gate 		return;
4920Sstevel@tonic-gate 	}
4930Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
4940Sstevel@tonic-gate 	seg_pdisable++;
4950Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
4960Sstevel@tonic-gate 
4970Sstevel@tonic-gate 	for (hp = p_hashtab; hp < &p_hashtab[p_hashsize]; hp++) {
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 		mutex_enter(&hp->p_hmutex);
5000Sstevel@tonic-gate 		pcp = hp->p_hnext;
5010Sstevel@tonic-gate 		while (pcp != (struct seg_pcache *)hp) {
5020Sstevel@tonic-gate 
5030Sstevel@tonic-gate 			/*
5040Sstevel@tonic-gate 			 * purge entries which are not active
5050Sstevel@tonic-gate 			 */
5060Sstevel@tonic-gate 			npcp = pcp->p_hnext;
5070Sstevel@tonic-gate 			if (!pcp->p_active && pcp->p_callback == callback) {
5080Sstevel@tonic-gate 				hp->p_qlen--;
5090Sstevel@tonic-gate 				pcp->p_hprev->p_hnext = pcp->p_hnext;
5100Sstevel@tonic-gate 				pcp->p_hnext->p_hprev = pcp->p_hprev;
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 				if ((*pcp->p_callback)(pcp->p_seg, pcp->p_addr,
5130Sstevel@tonic-gate 				    pcp->p_len, pcp->p_pp, pcp->p_rw)) {
5140Sstevel@tonic-gate 					done = 1;
5150Sstevel@tonic-gate 				}
5160Sstevel@tonic-gate 
5170Sstevel@tonic-gate 				npages += pcp->p_len >> PAGESHIFT;
5180Sstevel@tonic-gate 				if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
5190Sstevel@tonic-gate 					npages_window +=
5200Sstevel@tonic-gate 					    pcp->p_len >> PAGESHIFT;
5210Sstevel@tonic-gate 				}
5220Sstevel@tonic-gate 				kmem_free(pcp, sizeof (struct seg_pcache));
5230Sstevel@tonic-gate 			}
5240Sstevel@tonic-gate 			pcp = npcp;
5250Sstevel@tonic-gate 			if (done)
5260Sstevel@tonic-gate 				break;
5270Sstevel@tonic-gate 		}
5280Sstevel@tonic-gate 		mutex_exit(&hp->p_hmutex);
5290Sstevel@tonic-gate 		if (done)
5300Sstevel@tonic-gate 			break;
5310Sstevel@tonic-gate 	}
5320Sstevel@tonic-gate 
5330Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
5340Sstevel@tonic-gate 	seg_pdisable--;
5350Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
5380Sstevel@tonic-gate 	seg_plocked -= npages;
5390Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
5400Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
5410Sstevel@tonic-gate }
5420Sstevel@tonic-gate 
5430Sstevel@tonic-gate /*
5440Sstevel@tonic-gate  * purge all entries for a given segment. Since we
5450Sstevel@tonic-gate  * callback into the segment driver directly for page
5460Sstevel@tonic-gate  * reclaim the caller needs to hold the right locks.
5470Sstevel@tonic-gate  */
5480Sstevel@tonic-gate void
5490Sstevel@tonic-gate seg_ppurge(struct seg *seg)
5500Sstevel@tonic-gate {
5510Sstevel@tonic-gate 	struct seg_pcache *delcallb_list = NULL;
5520Sstevel@tonic-gate 	struct seg_pcache *pcp;
5530Sstevel@tonic-gate 	struct seg_phash *hp;
5540Sstevel@tonic-gate 	pgcnt_t npages = 0;
5550Sstevel@tonic-gate 	pgcnt_t npages_window = 0;
5560Sstevel@tonic-gate 
5570Sstevel@tonic-gate 	if (seg_plazy == 0) {
5580Sstevel@tonic-gate 		return;
5590Sstevel@tonic-gate 	}
5600Sstevel@tonic-gate 	hp = &p_hashtab[p_hash(seg)];
5610Sstevel@tonic-gate 	mutex_enter(&hp->p_hmutex);
5620Sstevel@tonic-gate 	pcp = hp->p_hnext;
5630Sstevel@tonic-gate 	while (pcp != (struct seg_pcache *)hp) {
5640Sstevel@tonic-gate 		if (pcp->p_seg == seg) {
5650Sstevel@tonic-gate 			if (pcp->p_active) {
5660Sstevel@tonic-gate 				break;
5670Sstevel@tonic-gate 			}
5680Sstevel@tonic-gate 			hp->p_qlen--;
5690Sstevel@tonic-gate 			pcp->p_hprev->p_hnext = pcp->p_hnext;
5700Sstevel@tonic-gate 			pcp->p_hnext->p_hprev = pcp->p_hprev;
5710Sstevel@tonic-gate 			pcp->p_hprev = delcallb_list;
5720Sstevel@tonic-gate 			delcallb_list = pcp;
5730Sstevel@tonic-gate 		}
5740Sstevel@tonic-gate 		pcp = pcp->p_hnext;
5750Sstevel@tonic-gate 	}
5760Sstevel@tonic-gate 	mutex_exit(&hp->p_hmutex);
5770Sstevel@tonic-gate 	while (delcallb_list != NULL) {
5780Sstevel@tonic-gate 		pcp = delcallb_list;
5790Sstevel@tonic-gate 		delcallb_list = pcp->p_hprev;
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate 		PPRINT4("seg_ppurge: purge seg %p, addr %p, len %lx, "
5820Sstevel@tonic-gate 		    "pplist %p\n", (void *)seg, (void *)pcp->p_addr,
5830Sstevel@tonic-gate 		    pcp->p_len, (void *)pcp->p_pp);
5840Sstevel@tonic-gate 
5850Sstevel@tonic-gate 		ASSERT(seg == pcp->p_seg);
5860Sstevel@tonic-gate 		(void) (*pcp->p_callback)(seg, pcp->p_addr,
5870Sstevel@tonic-gate 		    pcp->p_len, pcp->p_pp, pcp->p_rw);
5880Sstevel@tonic-gate 		npages += pcp->p_len >> PAGESHIFT;
5890Sstevel@tonic-gate 		if ((pcp->p_flags & SEGP_FORCE_WIRED) == 0) {
5900Sstevel@tonic-gate 			npages_window += pcp->p_len >> PAGESHIFT;
5910Sstevel@tonic-gate 		}
5920Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct seg_pcache));
5930Sstevel@tonic-gate 	}
5940Sstevel@tonic-gate 	mutex_enter(&seg_pmem);
5950Sstevel@tonic-gate 	seg_plocked -= npages;
5960Sstevel@tonic-gate 	seg_plocked_window -= npages_window;
5970Sstevel@tonic-gate 	mutex_exit(&seg_pmem);
5980Sstevel@tonic-gate }
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate static void seg_pinit_mem_config(void);
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate /*
6030Sstevel@tonic-gate  * setup the pagelock cache
6040Sstevel@tonic-gate  */
6050Sstevel@tonic-gate static void
6060Sstevel@tonic-gate seg_pinit(void)
6070Sstevel@tonic-gate {
6080Sstevel@tonic-gate 	struct seg_phash *hp;
6090Sstevel@tonic-gate 	int i;
6100Sstevel@tonic-gate 	uint_t physmegs;
6110Sstevel@tonic-gate 
6120Sstevel@tonic-gate 	sema_init(&seg_psaync_sem, 0, NULL, SEMA_DEFAULT, NULL);
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
6150Sstevel@tonic-gate 	if (p_hashtab == NULL) {
6160Sstevel@tonic-gate 		physmegs = physmem >> (20 - PAGESHIFT);
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 		/* If p_hashsize was not set in /etc/system ... */
6190Sstevel@tonic-gate 		if (p_hashsize == 0) {
6200Sstevel@tonic-gate 			/*
6210Sstevel@tonic-gate 			 * Choose p_hashsize based on physmem.
6220Sstevel@tonic-gate 			 */
6230Sstevel@tonic-gate 			if (physmegs < 64) {
6240Sstevel@tonic-gate 				p_hashsize = 64;
6250Sstevel@tonic-gate 			} else if (physmegs < 1024) {
6260Sstevel@tonic-gate 				p_hashsize = 1024;
6270Sstevel@tonic-gate 			} else if (physmegs < 10 * 1024) {
6280Sstevel@tonic-gate 				p_hashsize = 8192;
6290Sstevel@tonic-gate 			} else if (physmegs < 20 * 1024) {
6300Sstevel@tonic-gate 				p_hashsize = 2 * 8192;
6310Sstevel@tonic-gate 				seg_pmaxqlen = 16;
6320Sstevel@tonic-gate 			} else {
6330Sstevel@tonic-gate 				p_hashsize = 128 * 1024;
6340Sstevel@tonic-gate 				seg_pmaxqlen = 128;
6350Sstevel@tonic-gate 			}
6360Sstevel@tonic-gate 		}
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 		p_hashtab = kmem_zalloc(
6390Sstevel@tonic-gate 			p_hashsize * sizeof (struct seg_phash), KM_SLEEP);
6400Sstevel@tonic-gate 		for (i = 0; i < p_hashsize; i++) {
6410Sstevel@tonic-gate 			hp = (struct seg_phash *)&p_hashtab[i];
6420Sstevel@tonic-gate 			hp->p_hnext = (struct seg_pcache *)hp;
6430Sstevel@tonic-gate 			hp->p_hprev = (struct seg_pcache *)hp;
6440Sstevel@tonic-gate 			mutex_init(&hp->p_hmutex, NULL, MUTEX_DEFAULT, NULL);
6450Sstevel@tonic-gate 		}
6460Sstevel@tonic-gate 		if (seg_pwindow == 0) {
6470Sstevel@tonic-gate 			if (physmegs < 24) {
6480Sstevel@tonic-gate 				/* don't use cache */
6490Sstevel@tonic-gate 				seg_plazy = 0;
6500Sstevel@tonic-gate 			} else if (physmegs < 64) {
6510Sstevel@tonic-gate 				seg_pwindow = physmem >> 5; /* 3% of memory */
6520Sstevel@tonic-gate 			} else if (physmegs < 10 * 1024) {
6530Sstevel@tonic-gate 				seg_pwindow = physmem >> 3; /* 12% of memory */
6540Sstevel@tonic-gate 			} else {
6550Sstevel@tonic-gate 				seg_pwindow = physmem >> 1;
6560Sstevel@tonic-gate 			}
6570Sstevel@tonic-gate 		}
6580Sstevel@tonic-gate 	}
6590Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
6600Sstevel@tonic-gate 
6610Sstevel@tonic-gate 	seg_pinit_mem_config();
6620Sstevel@tonic-gate }
6630Sstevel@tonic-gate 
6640Sstevel@tonic-gate /*
6650Sstevel@tonic-gate  * called by pageout if memory is low
6660Sstevel@tonic-gate  */
6670Sstevel@tonic-gate void
6680Sstevel@tonic-gate seg_preap(void)
6690Sstevel@tonic-gate {
6700Sstevel@tonic-gate 	/*
6710Sstevel@tonic-gate 	 * if the cache if off or empty, return
6720Sstevel@tonic-gate 	 */
6730Sstevel@tonic-gate 	if (seg_plocked == 0 || seg_plazy == 0) {
6740Sstevel@tonic-gate 		return;
6750Sstevel@tonic-gate 	}
6760Sstevel@tonic-gate 	sema_v(&seg_psaync_sem);
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate 
6790Sstevel@tonic-gate static void seg_pupdate(void *);
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate /*
6820Sstevel@tonic-gate  * run as a backgroud thread and reclaim pagelock
6830Sstevel@tonic-gate  * pages which have not been used recently
6840Sstevel@tonic-gate  */
6850Sstevel@tonic-gate void
6860Sstevel@tonic-gate seg_pasync_thread(void)
6870Sstevel@tonic-gate {
6880Sstevel@tonic-gate 	callb_cpr_t cpr_info;
6890Sstevel@tonic-gate 	kmutex_t pasync_lock;	/* just for CPR stuff */
6900Sstevel@tonic-gate 
6910Sstevel@tonic-gate 	mutex_init(&pasync_lock, NULL, MUTEX_DEFAULT, NULL);
6920Sstevel@tonic-gate 
6930Sstevel@tonic-gate 	CALLB_CPR_INIT(&cpr_info, &pasync_lock,
6940Sstevel@tonic-gate 		callb_generic_cpr, "seg_pasync");
6950Sstevel@tonic-gate 
6960Sstevel@tonic-gate 	if (seg_preap_interval == 0) {
6970Sstevel@tonic-gate 		seg_preap_interval = seg_preap_time * hz;
6980Sstevel@tonic-gate 	} else {
6990Sstevel@tonic-gate 		seg_preap_interval *= hz;
7000Sstevel@tonic-gate 	}
7010Sstevel@tonic-gate 	if (seg_plazy && seg_pupdate_active) {
7020Sstevel@tonic-gate 		(void) timeout(seg_pupdate, NULL, seg_preap_interval);
7030Sstevel@tonic-gate 	}
7040Sstevel@tonic-gate 
7050Sstevel@tonic-gate 	for (;;) {
7060Sstevel@tonic-gate 		mutex_enter(&pasync_lock);
7070Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
7080Sstevel@tonic-gate 		mutex_exit(&pasync_lock);
7090Sstevel@tonic-gate 		sema_p(&seg_psaync_sem);
7100Sstevel@tonic-gate 		mutex_enter(&pasync_lock);
7110Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cpr_info, &pasync_lock);
7120Sstevel@tonic-gate 		mutex_exit(&pasync_lock);
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 		seg_ppurge_all(0);
7150Sstevel@tonic-gate 	}
7160Sstevel@tonic-gate }
7170Sstevel@tonic-gate 
7180Sstevel@tonic-gate static void
7190Sstevel@tonic-gate seg_pupdate(void *dummy)
7200Sstevel@tonic-gate {
7210Sstevel@tonic-gate 	sema_v(&seg_psaync_sem);
7220Sstevel@tonic-gate 
7230Sstevel@tonic-gate 	if (seg_plazy && seg_pupdate_active) {
7240Sstevel@tonic-gate 		(void) timeout(seg_pupdate, dummy, seg_preap_interval);
7250Sstevel@tonic-gate 	}
7260Sstevel@tonic-gate }
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate static struct kmem_cache *seg_cache;
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate /*
7310Sstevel@tonic-gate  * Initialize segment management data structures.
7320Sstevel@tonic-gate  */
7330Sstevel@tonic-gate void
7340Sstevel@tonic-gate seg_init(void)
7350Sstevel@tonic-gate {
7360Sstevel@tonic-gate 	kstat_t *ksp;
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 	seg_cache = kmem_cache_create("seg_cache", sizeof (struct seg),
7390Sstevel@tonic-gate 		0, NULL, NULL, NULL, NULL, NULL, 0);
7400Sstevel@tonic-gate 
7410Sstevel@tonic-gate 	ksp = kstat_create("unix", 0, "segadvstat", "vm", KSTAT_TYPE_NAMED,
7420Sstevel@tonic-gate 		segadvstat_ndata, KSTAT_FLAG_VIRTUAL);
7430Sstevel@tonic-gate 	if (ksp) {
7440Sstevel@tonic-gate 		ksp->ks_data = (void *)segadvstat_ptr;
7450Sstevel@tonic-gate 		kstat_install(ksp);
7460Sstevel@tonic-gate 	}
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	seg_pinit();
7490Sstevel@tonic-gate }
7500Sstevel@tonic-gate 
7510Sstevel@tonic-gate /*
7520Sstevel@tonic-gate  * Allocate a segment to cover [base, base+size]
7530Sstevel@tonic-gate  * and attach it to the specified address space.
7540Sstevel@tonic-gate  */
7550Sstevel@tonic-gate struct seg *
7560Sstevel@tonic-gate seg_alloc(struct as *as, caddr_t base, size_t size)
7570Sstevel@tonic-gate {
7580Sstevel@tonic-gate 	struct seg *new;
7590Sstevel@tonic-gate 	caddr_t segbase;
7600Sstevel@tonic-gate 	size_t segsize;
7610Sstevel@tonic-gate 
7620Sstevel@tonic-gate 	segbase = (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK);
7630Sstevel@tonic-gate 	segsize = (((uintptr_t)(base + size) + PAGEOFFSET) & PAGEMASK) -
7640Sstevel@tonic-gate 	    (uintptr_t)segbase;
7650Sstevel@tonic-gate 
7660Sstevel@tonic-gate 	if (!valid_va_range(&segbase, &segsize, segsize, AH_LO))
7670Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
7680Sstevel@tonic-gate 
7690Sstevel@tonic-gate 	if (as != &kas &&
7700Sstevel@tonic-gate 	    valid_usr_range(segbase, segsize, 0, as,
7710Sstevel@tonic-gate 	    as->a_userlimit) != RANGE_OKAY)
7720Sstevel@tonic-gate 		return ((struct seg *)NULL);	/* bad virtual addr range */
7730Sstevel@tonic-gate 
7740Sstevel@tonic-gate 	new = kmem_cache_alloc(seg_cache, KM_SLEEP);
7750Sstevel@tonic-gate 	new->s_ops = NULL;
7760Sstevel@tonic-gate 	new->s_data = NULL;
7770Sstevel@tonic-gate 	new->s_szc = 0;
7780Sstevel@tonic-gate 	new->s_flags = 0;
7790Sstevel@tonic-gate 	if (seg_attach(as, segbase, segsize, new) < 0) {
7800Sstevel@tonic-gate 		kmem_cache_free(seg_cache, new);
7810Sstevel@tonic-gate 		return ((struct seg *)NULL);
7820Sstevel@tonic-gate 	}
7830Sstevel@tonic-gate 	/* caller must fill in ops, data */
7840Sstevel@tonic-gate 	return (new);
7850Sstevel@tonic-gate }
7860Sstevel@tonic-gate 
7870Sstevel@tonic-gate /*
7880Sstevel@tonic-gate  * Attach a segment to the address space.  Used by seg_alloc()
7890Sstevel@tonic-gate  * and for kernel startup to attach to static segments.
7900Sstevel@tonic-gate  */
7910Sstevel@tonic-gate int
7920Sstevel@tonic-gate seg_attach(struct as *as, caddr_t base, size_t size, struct seg *seg)
7930Sstevel@tonic-gate {
7940Sstevel@tonic-gate 	seg->s_as = as;
7950Sstevel@tonic-gate 	seg->s_base = base;
7960Sstevel@tonic-gate 	seg->s_size = size;
7970Sstevel@tonic-gate 
7980Sstevel@tonic-gate 	/*
7990Sstevel@tonic-gate 	 * as_addseg() will add the segment at the appropraite point
8000Sstevel@tonic-gate 	 * in the list. It will return -1 if there is overlap with
8010Sstevel@tonic-gate 	 * an already existing segment.
8020Sstevel@tonic-gate 	 */
8030Sstevel@tonic-gate 	return (as_addseg(as, seg));
8040Sstevel@tonic-gate }
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate /*
8070Sstevel@tonic-gate  * Unmap a segment and free it from its associated address space.
8080Sstevel@tonic-gate  * This should be called by anybody who's finished with a whole segment's
8090Sstevel@tonic-gate  * mapping.  Just calls SEGOP_UNMAP() on the whole mapping .  It is the
8100Sstevel@tonic-gate  * responsibility of the segment driver to unlink the the segment
8110Sstevel@tonic-gate  * from the address space, and to free public and private data structures
8120Sstevel@tonic-gate  * associated with the segment.  (This is typically done by a call to
8130Sstevel@tonic-gate  * seg_free()).
8140Sstevel@tonic-gate  */
8150Sstevel@tonic-gate void
8160Sstevel@tonic-gate seg_unmap(struct seg *seg)
8170Sstevel@tonic-gate {
8180Sstevel@tonic-gate #ifdef DEBUG
8190Sstevel@tonic-gate 	int ret;
8200Sstevel@tonic-gate #endif /* DEBUG */
8210Sstevel@tonic-gate 
8220Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
8230Sstevel@tonic-gate 
8240Sstevel@tonic-gate 	/* Shouldn't have called seg_unmap if mapping isn't yet established */
8250Sstevel@tonic-gate 	ASSERT(seg->s_data != NULL);
8260Sstevel@tonic-gate 
8270Sstevel@tonic-gate 	/* Unmap the whole mapping */
8280Sstevel@tonic-gate #ifdef DEBUG
8290Sstevel@tonic-gate 	ret = SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
8300Sstevel@tonic-gate 	ASSERT(ret == 0);
8310Sstevel@tonic-gate #else
8320Sstevel@tonic-gate 	SEGOP_UNMAP(seg, seg->s_base, seg->s_size);
8330Sstevel@tonic-gate #endif /* DEBUG */
8340Sstevel@tonic-gate }
8350Sstevel@tonic-gate 
8360Sstevel@tonic-gate /*
8370Sstevel@tonic-gate  * Free the segment from its associated as. This should only be called
8380Sstevel@tonic-gate  * if a mapping to the segment has not yet been established (e.g., if
8390Sstevel@tonic-gate  * an error occurs in the middle of doing an as_map when the segment
8400Sstevel@tonic-gate  * has already been partially set up) or if it has already been deleted
8410Sstevel@tonic-gate  * (e.g., from a segment driver unmap routine if the unmap applies to the
8420Sstevel@tonic-gate  * entire segment). If the mapping is currently set up then seg_unmap() should
8430Sstevel@tonic-gate  * be called instead.
8440Sstevel@tonic-gate  */
8450Sstevel@tonic-gate void
8460Sstevel@tonic-gate seg_free(struct seg *seg)
8470Sstevel@tonic-gate {
8480Sstevel@tonic-gate 	register struct as *as = seg->s_as;
8490Sstevel@tonic-gate 	struct seg *tseg = as_removeseg(as, seg);
8500Sstevel@tonic-gate 
8510Sstevel@tonic-gate 	ASSERT(tseg == seg);
8520Sstevel@tonic-gate 
8530Sstevel@tonic-gate 	/*
8540Sstevel@tonic-gate 	 * If the segment private data field is NULL,
8550Sstevel@tonic-gate 	 * then segment driver is not attached yet.
8560Sstevel@tonic-gate 	 */
8570Sstevel@tonic-gate 	if (seg->s_data != NULL)
8580Sstevel@tonic-gate 		SEGOP_FREE(seg);
8590Sstevel@tonic-gate 
8600Sstevel@tonic-gate 	kmem_cache_free(seg_cache, seg);
8610Sstevel@tonic-gate }
8620Sstevel@tonic-gate 
8630Sstevel@tonic-gate /*ARGSUSED*/
8640Sstevel@tonic-gate static void
8650Sstevel@tonic-gate seg_p_mem_config_post_add(
8660Sstevel@tonic-gate 	void *arg,
8670Sstevel@tonic-gate 	pgcnt_t delta_pages)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate 	/* Nothing to do. */
8700Sstevel@tonic-gate }
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate /*
8730Sstevel@tonic-gate  * Attempt to purge seg_pcache.  May need to return before this has
8740Sstevel@tonic-gate  * completed to allow other pre_del callbacks to unlock pages. This is
8750Sstevel@tonic-gate  * ok because:
8760Sstevel@tonic-gate  *	1) The seg_pdisable flag has been set so at least we won't
8770Sstevel@tonic-gate  *	cache anymore locks and the locks we couldn't purge
8780Sstevel@tonic-gate  *	will not be held if they do get released by a subsequent
8790Sstevel@tonic-gate  *	pre-delete callback.
8800Sstevel@tonic-gate  *
8810Sstevel@tonic-gate  *	2) The rest of the memory delete thread processing does not
8820Sstevel@tonic-gate  *	depend on the changes made in this pre-delete callback. No
8830Sstevel@tonic-gate  *	panics will result, the worst that will happen is that the
8840Sstevel@tonic-gate  *	DR code will timeout and cancel the delete.
8850Sstevel@tonic-gate  */
8860Sstevel@tonic-gate /*ARGSUSED*/
8870Sstevel@tonic-gate static int
8880Sstevel@tonic-gate seg_p_mem_config_pre_del(
8890Sstevel@tonic-gate 	void *arg,
8900Sstevel@tonic-gate 	pgcnt_t delta_pages)
8910Sstevel@tonic-gate {
8920Sstevel@tonic-gate 	pgcnt_t	old_plocked;
8930Sstevel@tonic-gate 	int stall_count = 0;
8940Sstevel@tonic-gate 
8950Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
8960Sstevel@tonic-gate 	seg_pdisable++;
8970Sstevel@tonic-gate 	ASSERT(seg_pdisable != 0);
8980Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
8990Sstevel@tonic-gate 
9000Sstevel@tonic-gate 	/*
9010Sstevel@tonic-gate 	 * Attempt to empty the cache. Terminate if seg_plocked does not
9020Sstevel@tonic-gate 	 * diminish with SEGP_STALL_THRESHOLD consecutive attempts.
9030Sstevel@tonic-gate 	 */
9040Sstevel@tonic-gate 	while (seg_plocked != 0) {
9050Sstevel@tonic-gate 		old_plocked = seg_plocked;
9060Sstevel@tonic-gate 		seg_ppurge_all(1);
9070Sstevel@tonic-gate 		if (seg_plocked == old_plocked) {
9080Sstevel@tonic-gate 			if (stall_count++ > SEGP_STALL_THRESHOLD) {
9090Sstevel@tonic-gate 				cmn_err(CE_NOTE, "!Pre-delete couldn't purge"
9100Sstevel@tonic-gate 					" pagelock cache - continuing");
9110Sstevel@tonic-gate 				break;
9120Sstevel@tonic-gate 			}
9130Sstevel@tonic-gate 		} else
9140Sstevel@tonic-gate 			stall_count = 0;
9150Sstevel@tonic-gate 		if (seg_plocked != 0)
9160Sstevel@tonic-gate 			delay(hz/SEGP_PREDEL_DELAY_FACTOR);
9170Sstevel@tonic-gate 	}
9180Sstevel@tonic-gate 	return (0);
9190Sstevel@tonic-gate }
9200Sstevel@tonic-gate 
9210Sstevel@tonic-gate /*ARGSUSED*/
9220Sstevel@tonic-gate static void
9230Sstevel@tonic-gate seg_p_mem_config_post_del(
9240Sstevel@tonic-gate 	void *arg,
9250Sstevel@tonic-gate 	pgcnt_t delta_pages,
9260Sstevel@tonic-gate 	int cancelled)
9270Sstevel@tonic-gate {
9280Sstevel@tonic-gate 	mutex_enter(&seg_pcache);
9290Sstevel@tonic-gate 	ASSERT(seg_pdisable != 0);
9300Sstevel@tonic-gate 	seg_pdisable--;
9310Sstevel@tonic-gate 	mutex_exit(&seg_pcache);
9320Sstevel@tonic-gate }
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate static kphysm_setup_vector_t seg_p_mem_config_vec = {
9350Sstevel@tonic-gate 	KPHYSM_SETUP_VECTOR_VERSION,
9360Sstevel@tonic-gate 	seg_p_mem_config_post_add,
9370Sstevel@tonic-gate 	seg_p_mem_config_pre_del,
9380Sstevel@tonic-gate 	seg_p_mem_config_post_del,
9390Sstevel@tonic-gate };
9400Sstevel@tonic-gate 
9410Sstevel@tonic-gate static void
9420Sstevel@tonic-gate seg_pinit_mem_config(void)
9430Sstevel@tonic-gate {
9440Sstevel@tonic-gate 	int ret;
9450Sstevel@tonic-gate 
9460Sstevel@tonic-gate 	ret = kphysm_setup_func_register(&seg_p_mem_config_vec, (void *)NULL);
9470Sstevel@tonic-gate 	/*
9480Sstevel@tonic-gate 	 * Want to catch this in the debug kernel. At run time, if the
9490Sstevel@tonic-gate 	 * callbacks don't get run all will be OK as the disable just makes
9500Sstevel@tonic-gate 	 * it more likely that the pages can be collected.
9510Sstevel@tonic-gate 	 */
9520Sstevel@tonic-gate 	ASSERT(ret == 0);
9530Sstevel@tonic-gate }
954*3247Sgjelinek 
955*3247Sgjelinek extern struct seg_ops segvn_ops;
956*3247Sgjelinek extern struct seg_ops segspt_shmops;
957*3247Sgjelinek 
958*3247Sgjelinek /*
959*3247Sgjelinek  * Verify that segment is not a shared anonymous segment which reserves
960*3247Sgjelinek  * swap.  zone.max-swap accounting (zone->zone_max_swap) cannot be transfered
961*3247Sgjelinek  * from one zone to another if any segments are shared.  This is because the
962*3247Sgjelinek  * last process to exit will credit the swap reservation.  This could lead
963*3247Sgjelinek  * to the swap being reserved by one zone, and credited to another.
964*3247Sgjelinek  */
965*3247Sgjelinek boolean_t
966*3247Sgjelinek seg_can_change_zones(struct seg *seg)
967*3247Sgjelinek {
968*3247Sgjelinek 	struct segvn_data *svd;
969*3247Sgjelinek 
970*3247Sgjelinek 	if (seg->s_ops == &segspt_shmops)
971*3247Sgjelinek 		return (B_FALSE);
972*3247Sgjelinek 
973*3247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
974*3247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
975*3247Sgjelinek 		if (svd->type == MAP_SHARED &&
976*3247Sgjelinek 		    svd->amp != NULL &&
977*3247Sgjelinek 		    svd->amp->swresv > 0)
978*3247Sgjelinek 		return (B_FALSE);
979*3247Sgjelinek 	}
980*3247Sgjelinek 	return (B_TRUE);
981*3247Sgjelinek }
982*3247Sgjelinek 
983*3247Sgjelinek /*
984*3247Sgjelinek  * Return swap reserved by a segment backing a private mapping.
985*3247Sgjelinek  */
986*3247Sgjelinek size_t
987*3247Sgjelinek seg_swresv(struct seg *seg)
988*3247Sgjelinek {
989*3247Sgjelinek 	struct segvn_data *svd;
990*3247Sgjelinek 	size_t swap = 0;
991*3247Sgjelinek 
992*3247Sgjelinek 	if (seg->s_ops == &segvn_ops) {
993*3247Sgjelinek 		svd = (struct segvn_data *)seg->s_data;
994*3247Sgjelinek 		if (svd->type == MAP_PRIVATE && svd->swresv > 0)
995*3247Sgjelinek 			swap = svd->swresv;
996*3247Sgjelinek 	}
997*3247Sgjelinek 	return (swap);
998*3247Sgjelinek }
999