1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/param.h>
30*0Sstevel@tonic-gate #include <sys/user.h>
31*0Sstevel@tonic-gate #include <sys/mman.h>
32*0Sstevel@tonic-gate #include <sys/kmem.h>
33*0Sstevel@tonic-gate #include <sys/sysmacros.h>
34*0Sstevel@tonic-gate #include <sys/cmn_err.h>
35*0Sstevel@tonic-gate #include <sys/systm.h>
36*0Sstevel@tonic-gate #include <sys/tuneable.h>
37*0Sstevel@tonic-gate #include <vm/hat.h>
38*0Sstevel@tonic-gate #include <vm/seg.h>
39*0Sstevel@tonic-gate #include <vm/as.h>
40*0Sstevel@tonic-gate #include <vm/anon.h>
41*0Sstevel@tonic-gate #include <vm/page.h>
42*0Sstevel@tonic-gate #include <sys/buf.h>
43*0Sstevel@tonic-gate #include <sys/swap.h>
44*0Sstevel@tonic-gate #include <sys/atomic.h>
45*0Sstevel@tonic-gate #include <vm/seg_spt.h>
46*0Sstevel@tonic-gate #include <sys/debug.h>
47*0Sstevel@tonic-gate #include <sys/vtrace.h>
48*0Sstevel@tonic-gate #include <sys/shm.h>
49*0Sstevel@tonic-gate #include <sys/lgrp.h>
50*0Sstevel@tonic-gate #include <sys/vmsystm.h>
51*0Sstevel@tonic-gate 
52*0Sstevel@tonic-gate #include <sys/tnf_probe.h>
53*0Sstevel@tonic-gate 
54*0Sstevel@tonic-gate #define	SEGSPTADDR	(caddr_t)0x0
55*0Sstevel@tonic-gate 
56*0Sstevel@tonic-gate /*
57*0Sstevel@tonic-gate  * # pages used for spt
58*0Sstevel@tonic-gate  */
59*0Sstevel@tonic-gate static size_t	spt_used;
60*0Sstevel@tonic-gate 
61*0Sstevel@tonic-gate /*
62*0Sstevel@tonic-gate  * segspt_minfree is the memory left for system after ISM
63*0Sstevel@tonic-gate  * locked its pages; it is set up to 5% of availrmem in
64*0Sstevel@tonic-gate  * sptcreate when ISM is created.  ISM should not use more
65*0Sstevel@tonic-gate  * than ~90% of availrmem; if it does, then the performance
66*0Sstevel@tonic-gate  * of the system may decrease. Machines with large memories may
67*0Sstevel@tonic-gate  * be able to use up more memory for ISM so we set the default
68*0Sstevel@tonic-gate  * segspt_minfree to 5% (which gives ISM max 95% of availrmem.
69*0Sstevel@tonic-gate  * If somebody wants even more memory for ISM (risking hanging
70*0Sstevel@tonic-gate  * the system) they can patch the segspt_minfree to smaller number.
71*0Sstevel@tonic-gate  */
72*0Sstevel@tonic-gate pgcnt_t segspt_minfree = 0;
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate static int segspt_create(struct seg *seg, caddr_t argsp);
75*0Sstevel@tonic-gate static int segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize);
76*0Sstevel@tonic-gate static void segspt_free(struct seg *seg);
77*0Sstevel@tonic-gate static void segspt_free_pages(struct seg *seg, caddr_t addr, size_t len);
78*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_getpolicy(struct seg *seg, caddr_t addr);
79*0Sstevel@tonic-gate 
80*0Sstevel@tonic-gate static void
81*0Sstevel@tonic-gate segspt_badop()
82*0Sstevel@tonic-gate {
83*0Sstevel@tonic-gate 	panic("segspt_badop called");
84*0Sstevel@tonic-gate 	/*NOTREACHED*/
85*0Sstevel@tonic-gate }
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate #define	SEGSPT_BADOP(t)	(t(*)())segspt_badop
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate struct seg_ops segspt_ops = {
90*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* dup */
91*0Sstevel@tonic-gate 	segspt_unmap,
92*0Sstevel@tonic-gate 	segspt_free,
93*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* fault */
94*0Sstevel@tonic-gate 	SEGSPT_BADOP(faultcode_t),	/* faulta */
95*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* setprot */
96*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* checkprot */
97*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* kluster */
98*0Sstevel@tonic-gate 	SEGSPT_BADOP(size_t),		/* swapout */
99*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* sync */
100*0Sstevel@tonic-gate 	SEGSPT_BADOP(size_t),		/* incore */
101*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* lockop */
102*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* getprot */
103*0Sstevel@tonic-gate 	SEGSPT_BADOP(u_offset_t), 	/* getoffset */
104*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* gettype */
105*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* getvp */
106*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* advise */
107*0Sstevel@tonic-gate 	SEGSPT_BADOP(void),		/* dump */
108*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* pagelock */
109*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* setpgsz */
110*0Sstevel@tonic-gate 	SEGSPT_BADOP(int),		/* getmemid */
111*0Sstevel@tonic-gate 	segspt_getpolicy,		/* getpolicy */
112*0Sstevel@tonic-gate };
113*0Sstevel@tonic-gate 
114*0Sstevel@tonic-gate static int segspt_shmdup(struct seg *seg, struct seg *newseg);
115*0Sstevel@tonic-gate static int segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize);
116*0Sstevel@tonic-gate static void segspt_shmfree(struct seg *seg);
117*0Sstevel@tonic-gate static faultcode_t segspt_shmfault(struct hat *hat, struct seg *seg,
118*0Sstevel@tonic-gate 		caddr_t addr, size_t len, enum fault_type type, enum seg_rw rw);
119*0Sstevel@tonic-gate static faultcode_t segspt_shmfaulta(struct seg *seg, caddr_t addr);
120*0Sstevel@tonic-gate static int segspt_shmsetprot(register struct seg *seg, register caddr_t addr,
121*0Sstevel@tonic-gate 			register size_t len, register uint_t prot);
122*0Sstevel@tonic-gate static int segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size,
123*0Sstevel@tonic-gate 			uint_t prot);
124*0Sstevel@tonic-gate static int	segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta);
125*0Sstevel@tonic-gate static size_t	segspt_shmswapout(struct seg *seg);
126*0Sstevel@tonic-gate static size_t segspt_shmincore(struct seg *seg, caddr_t addr, size_t len,
127*0Sstevel@tonic-gate 			register char *vec);
128*0Sstevel@tonic-gate static int segspt_shmsync(struct seg *seg, register caddr_t addr, size_t len,
129*0Sstevel@tonic-gate 			int attr, uint_t flags);
130*0Sstevel@tonic-gate static int segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
131*0Sstevel@tonic-gate 			int attr, int op, ulong_t *lockmap, size_t pos);
132*0Sstevel@tonic-gate static int segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len,
133*0Sstevel@tonic-gate 			uint_t *protv);
134*0Sstevel@tonic-gate static u_offset_t segspt_shmgetoffset(struct seg *seg, caddr_t addr);
135*0Sstevel@tonic-gate static int segspt_shmgettype(struct seg *seg, caddr_t addr);
136*0Sstevel@tonic-gate static int segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
137*0Sstevel@tonic-gate static int segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len,
138*0Sstevel@tonic-gate 			uint_t behav);
139*0Sstevel@tonic-gate static void segspt_shmdump(struct seg *seg);
140*0Sstevel@tonic-gate static int segspt_shmpagelock(struct seg *, caddr_t, size_t,
141*0Sstevel@tonic-gate 			struct page ***, enum lock_type, enum seg_rw);
142*0Sstevel@tonic-gate static int segspt_shmsetpgsz(struct seg *, caddr_t, size_t, uint_t);
143*0Sstevel@tonic-gate static int segspt_shmgetmemid(struct seg *, caddr_t, memid_t *);
144*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *segspt_shmgetpolicy(struct seg *, caddr_t);
145*0Sstevel@tonic-gate 
146*0Sstevel@tonic-gate struct seg_ops segspt_shmops = {
147*0Sstevel@tonic-gate 	segspt_shmdup,
148*0Sstevel@tonic-gate 	segspt_shmunmap,
149*0Sstevel@tonic-gate 	segspt_shmfree,
150*0Sstevel@tonic-gate 	segspt_shmfault,
151*0Sstevel@tonic-gate 	segspt_shmfaulta,
152*0Sstevel@tonic-gate 	segspt_shmsetprot,
153*0Sstevel@tonic-gate 	segspt_shmcheckprot,
154*0Sstevel@tonic-gate 	segspt_shmkluster,
155*0Sstevel@tonic-gate 	segspt_shmswapout,
156*0Sstevel@tonic-gate 	segspt_shmsync,
157*0Sstevel@tonic-gate 	segspt_shmincore,
158*0Sstevel@tonic-gate 	segspt_shmlockop,
159*0Sstevel@tonic-gate 	segspt_shmgetprot,
160*0Sstevel@tonic-gate 	segspt_shmgetoffset,
161*0Sstevel@tonic-gate 	segspt_shmgettype,
162*0Sstevel@tonic-gate 	segspt_shmgetvp,
163*0Sstevel@tonic-gate 	segspt_shmadvise,	/* advise */
164*0Sstevel@tonic-gate 	segspt_shmdump,
165*0Sstevel@tonic-gate 	segspt_shmpagelock,
166*0Sstevel@tonic-gate 	segspt_shmsetpgsz,
167*0Sstevel@tonic-gate 	segspt_shmgetmemid,
168*0Sstevel@tonic-gate 	segspt_shmgetpolicy,
169*0Sstevel@tonic-gate };
170*0Sstevel@tonic-gate 
171*0Sstevel@tonic-gate static void segspt_purge(struct seg *seg);
172*0Sstevel@tonic-gate static int segspt_reclaim(struct seg *, caddr_t, size_t, struct page **,
173*0Sstevel@tonic-gate 		enum seg_rw);
174*0Sstevel@tonic-gate static int spt_anon_getpages(struct seg *seg, caddr_t addr, size_t len,
175*0Sstevel@tonic-gate 		page_t **ppa);
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 
179*0Sstevel@tonic-gate /*ARGSUSED*/
180*0Sstevel@tonic-gate int
181*0Sstevel@tonic-gate sptcreate(size_t size, struct seg **sptseg, struct anon_map *amp,
182*0Sstevel@tonic-gate     uint_t prot, uint_t flags, uint_t share_szc)
183*0Sstevel@tonic-gate {
184*0Sstevel@tonic-gate 	int 	err;
185*0Sstevel@tonic-gate 	struct  as	*newas;
186*0Sstevel@tonic-gate 	struct	segspt_crargs sptcargs;
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate #ifdef DEBUG
189*0Sstevel@tonic-gate 	TNF_PROBE_1(sptcreate, "spt", /* CSTYLED */,
190*0Sstevel@tonic-gate                 	tnf_ulong, size, size );
191*0Sstevel@tonic-gate #endif
192*0Sstevel@tonic-gate 	if (segspt_minfree == 0)	/* leave min 5% of availrmem for */
193*0Sstevel@tonic-gate 		segspt_minfree = availrmem/20;	/* for the system */
194*0Sstevel@tonic-gate 
195*0Sstevel@tonic-gate 	if (!hat_supported(HAT_SHARED_PT, (void *)0))
196*0Sstevel@tonic-gate 		return (EINVAL);
197*0Sstevel@tonic-gate 
198*0Sstevel@tonic-gate 	/*
199*0Sstevel@tonic-gate 	 * get a new as for this shared memory segment
200*0Sstevel@tonic-gate 	 */
201*0Sstevel@tonic-gate 	newas = as_alloc();
202*0Sstevel@tonic-gate 	sptcargs.amp = amp;
203*0Sstevel@tonic-gate 	sptcargs.prot = prot;
204*0Sstevel@tonic-gate 	sptcargs.flags = flags;
205*0Sstevel@tonic-gate 	sptcargs.szc = share_szc;
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate 	/*
208*0Sstevel@tonic-gate 	 * create a shared page table (spt) segment
209*0Sstevel@tonic-gate 	 */
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate 	if (err = as_map(newas, SEGSPTADDR, size, segspt_create, &sptcargs)) {
212*0Sstevel@tonic-gate 		as_free(newas);
213*0Sstevel@tonic-gate 		return (err);
214*0Sstevel@tonic-gate 	}
215*0Sstevel@tonic-gate 	*sptseg = sptcargs.seg_spt;
216*0Sstevel@tonic-gate 	return (0);
217*0Sstevel@tonic-gate }
218*0Sstevel@tonic-gate 
219*0Sstevel@tonic-gate void
220*0Sstevel@tonic-gate sptdestroy(struct as *as, struct anon_map *amp)
221*0Sstevel@tonic-gate {
222*0Sstevel@tonic-gate 
223*0Sstevel@tonic-gate #ifdef DEBUG
224*0Sstevel@tonic-gate 	TNF_PROBE_0(sptdestroy, "spt", /* CSTYLED */);
225*0Sstevel@tonic-gate #endif
226*0Sstevel@tonic-gate 	(void) as_unmap(as, SEGSPTADDR, amp->size);
227*0Sstevel@tonic-gate 	as_free(as);
228*0Sstevel@tonic-gate }
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate /*
231*0Sstevel@tonic-gate  * called from seg_free().
232*0Sstevel@tonic-gate  * free (i.e., unlock, unmap, return to free list)
233*0Sstevel@tonic-gate  *  all the pages in the given seg.
234*0Sstevel@tonic-gate  */
235*0Sstevel@tonic-gate void
236*0Sstevel@tonic-gate segspt_free(struct seg	*seg)
237*0Sstevel@tonic-gate {
238*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)seg->s_data;
239*0Sstevel@tonic-gate 
240*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
241*0Sstevel@tonic-gate 
242*0Sstevel@tonic-gate 	if (sptd != NULL) {
243*0Sstevel@tonic-gate 		if (sptd->spt_realsize)
244*0Sstevel@tonic-gate 			segspt_free_pages(seg, seg->s_base, sptd->spt_realsize);
245*0Sstevel@tonic-gate 
246*0Sstevel@tonic-gate 		if (sptd->spt_ppa_lckcnt)
247*0Sstevel@tonic-gate 			kmem_free(sptd->spt_ppa_lckcnt,
248*0Sstevel@tonic-gate 				sizeof (*sptd->spt_ppa_lckcnt)
249*0Sstevel@tonic-gate 				* btopr(sptd->spt_amp->size));
250*0Sstevel@tonic-gate 		kmem_free(sptd->spt_vp, sizeof (*sptd->spt_vp));
251*0Sstevel@tonic-gate 		mutex_destroy(&sptd->spt_lock);
252*0Sstevel@tonic-gate 		kmem_free(sptd, sizeof (*sptd));
253*0Sstevel@tonic-gate 	}
254*0Sstevel@tonic-gate }
255*0Sstevel@tonic-gate 
256*0Sstevel@tonic-gate /*ARGSUSED*/
257*0Sstevel@tonic-gate static int
258*0Sstevel@tonic-gate segspt_shmsync(struct seg *seg, caddr_t addr, size_t len, int attr,
259*0Sstevel@tonic-gate 	uint_t flags)
260*0Sstevel@tonic-gate {
261*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate 	return (0);
264*0Sstevel@tonic-gate }
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate /*ARGSUSED*/
267*0Sstevel@tonic-gate static size_t
268*0Sstevel@tonic-gate segspt_shmincore(struct seg *seg, caddr_t addr, size_t len, char *vec)
269*0Sstevel@tonic-gate {
270*0Sstevel@tonic-gate 	caddr_t	eo_seg;
271*0Sstevel@tonic-gate 	pgcnt_t	npages;
272*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
273*0Sstevel@tonic-gate 	struct seg	*sptseg;
274*0Sstevel@tonic-gate 	struct spt_data *sptd;
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
277*0Sstevel@tonic-gate #ifdef lint
278*0Sstevel@tonic-gate 	seg = seg;
279*0Sstevel@tonic-gate #endif
280*0Sstevel@tonic-gate 	sptseg = shmd->shm_sptseg;
281*0Sstevel@tonic-gate 	sptd = sptseg->s_data;
282*0Sstevel@tonic-gate 
283*0Sstevel@tonic-gate 	if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
284*0Sstevel@tonic-gate 		eo_seg = addr + len;
285*0Sstevel@tonic-gate 		while (addr < eo_seg) {
286*0Sstevel@tonic-gate 			/* page exists, and it's locked. */
287*0Sstevel@tonic-gate 			*vec++ = SEG_PAGE_INCORE | SEG_PAGE_LOCKED |
288*0Sstevel@tonic-gate 				SEG_PAGE_ANON;
289*0Sstevel@tonic-gate 			addr += PAGESIZE;
290*0Sstevel@tonic-gate 		}
291*0Sstevel@tonic-gate 		return (len);
292*0Sstevel@tonic-gate 	} else {
293*0Sstevel@tonic-gate 		struct  anon_map *amp = shmd->shm_amp;
294*0Sstevel@tonic-gate 		struct  anon	*ap;
295*0Sstevel@tonic-gate 		page_t		*pp;
296*0Sstevel@tonic-gate 		pgcnt_t 	anon_index;
297*0Sstevel@tonic-gate 		struct vnode 	*vp;
298*0Sstevel@tonic-gate 		u_offset_t 	off;
299*0Sstevel@tonic-gate 		ulong_t		i;
300*0Sstevel@tonic-gate 		int		ret;
301*0Sstevel@tonic-gate 		anon_sync_obj_t	cookie;
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 		addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
304*0Sstevel@tonic-gate 		anon_index = seg_page(seg, addr);
305*0Sstevel@tonic-gate 		npages = btopr(len);
306*0Sstevel@tonic-gate 		if (anon_index + npages > btopr(shmd->shm_amp->size)) {
307*0Sstevel@tonic-gate 			return (EINVAL);
308*0Sstevel@tonic-gate 		}
309*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
310*0Sstevel@tonic-gate 		for (i = 0; i < npages; i++, anon_index++) {
311*0Sstevel@tonic-gate 			ret = 0;
312*0Sstevel@tonic-gate 			anon_array_enter(amp, anon_index, &cookie);
313*0Sstevel@tonic-gate 			ap = anon_get_ptr(amp->ahp, anon_index);
314*0Sstevel@tonic-gate 			if (ap != NULL) {
315*0Sstevel@tonic-gate 				swap_xlate(ap, &vp, &off);
316*0Sstevel@tonic-gate 				anon_array_exit(&cookie);
317*0Sstevel@tonic-gate 				pp = page_lookup_nowait(vp, off, SE_SHARED);
318*0Sstevel@tonic-gate 				if (pp != NULL) {
319*0Sstevel@tonic-gate 					ret |= SEG_PAGE_INCORE | SEG_PAGE_ANON;
320*0Sstevel@tonic-gate 					page_unlock(pp);
321*0Sstevel@tonic-gate 				}
322*0Sstevel@tonic-gate 			} else {
323*0Sstevel@tonic-gate 				anon_array_exit(&cookie);
324*0Sstevel@tonic-gate 			}
325*0Sstevel@tonic-gate 			if (shmd->shm_vpage[anon_index] & DISM_PG_LOCKED) {
326*0Sstevel@tonic-gate 				ret |= SEG_PAGE_LOCKED;
327*0Sstevel@tonic-gate 			}
328*0Sstevel@tonic-gate 			*vec++ = (char)ret;
329*0Sstevel@tonic-gate 		}
330*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
331*0Sstevel@tonic-gate 		return (len);
332*0Sstevel@tonic-gate 	}
333*0Sstevel@tonic-gate }
334*0Sstevel@tonic-gate 
335*0Sstevel@tonic-gate static int
336*0Sstevel@tonic-gate segspt_unmap(struct seg *seg, caddr_t raddr, size_t ssize)
337*0Sstevel@tonic-gate {
338*0Sstevel@tonic-gate 	size_t share_size;
339*0Sstevel@tonic-gate 
340*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
341*0Sstevel@tonic-gate 
342*0Sstevel@tonic-gate 	/*
343*0Sstevel@tonic-gate 	 * seg.s_size may have been rounded up to the largest page size
344*0Sstevel@tonic-gate 	 * in shmat().
345*0Sstevel@tonic-gate 	 * XXX This should be cleanedup. sptdestroy should take a length
346*0Sstevel@tonic-gate 	 * argument which should be the same as sptcreate. Then
347*0Sstevel@tonic-gate 	 * this rounding would not be needed (or is done in shm.c)
348*0Sstevel@tonic-gate 	 * Only the check for full segment will be needed.
349*0Sstevel@tonic-gate 	 *
350*0Sstevel@tonic-gate 	 * XXX -- shouldn't raddr == 0 always? These tests don't seem
351*0Sstevel@tonic-gate 	 * to be useful at all.
352*0Sstevel@tonic-gate 	 */
353*0Sstevel@tonic-gate 	share_size = page_get_pagesize(seg->s_szc);
354*0Sstevel@tonic-gate 	ssize = P2ROUNDUP(ssize, share_size);
355*0Sstevel@tonic-gate 
356*0Sstevel@tonic-gate 	if (raddr == seg->s_base && ssize == seg->s_size) {
357*0Sstevel@tonic-gate 		seg_free(seg);
358*0Sstevel@tonic-gate 		return (0);
359*0Sstevel@tonic-gate 	} else
360*0Sstevel@tonic-gate 		return (EINVAL);
361*0Sstevel@tonic-gate }
362*0Sstevel@tonic-gate 
363*0Sstevel@tonic-gate int
364*0Sstevel@tonic-gate segspt_create(struct seg *seg, caddr_t argsp)
365*0Sstevel@tonic-gate {
366*0Sstevel@tonic-gate 	int		err;
367*0Sstevel@tonic-gate 	caddr_t		addr = seg->s_base;
368*0Sstevel@tonic-gate 	struct spt_data *sptd;
369*0Sstevel@tonic-gate 	struct 	segspt_crargs *sptcargs = (struct segspt_crargs *)argsp;
370*0Sstevel@tonic-gate 	struct anon_map *amp = sptcargs->amp;
371*0Sstevel@tonic-gate 	struct	cred	*cred = CRED();
372*0Sstevel@tonic-gate 	ulong_t		i, j, anon_index = 0;
373*0Sstevel@tonic-gate 	pgcnt_t		npages = btopr(amp->size);
374*0Sstevel@tonic-gate 	struct vnode	*vp;
375*0Sstevel@tonic-gate 	page_t		**ppa;
376*0Sstevel@tonic-gate 	uint_t		hat_flags;
377*0Sstevel@tonic-gate 
378*0Sstevel@tonic-gate 	/*
379*0Sstevel@tonic-gate 	 * We are holding the a_lock on the underlying dummy as,
380*0Sstevel@tonic-gate 	 * so we can make calls to the HAT layer.
381*0Sstevel@tonic-gate 	 */
382*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
383*0Sstevel@tonic-gate 
384*0Sstevel@tonic-gate #ifdef DEBUG
385*0Sstevel@tonic-gate 	TNF_PROBE_2(segspt_create, "spt", /* CSTYLED */,
386*0Sstevel@tonic-gate                                 tnf_opaque, addr, addr,
387*0Sstevel@tonic-gate 				tnf_ulong, len, seg->s_size);
388*0Sstevel@tonic-gate #endif
389*0Sstevel@tonic-gate 	if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
390*0Sstevel@tonic-gate 		if (err = anon_swap_adjust(npages))
391*0Sstevel@tonic-gate 			return (err);
392*0Sstevel@tonic-gate 	}
393*0Sstevel@tonic-gate 	err = ENOMEM;
394*0Sstevel@tonic-gate 
395*0Sstevel@tonic-gate 	if ((sptd = kmem_zalloc(sizeof (*sptd), KM_NOSLEEP)) == NULL)
396*0Sstevel@tonic-gate 		goto out1;
397*0Sstevel@tonic-gate 
398*0Sstevel@tonic-gate 	if ((sptcargs->flags & SHM_PAGEABLE) == 0) {
399*0Sstevel@tonic-gate 		if ((ppa = kmem_zalloc(((sizeof (page_t *)) * npages),
400*0Sstevel@tonic-gate 		    KM_NOSLEEP)) == NULL)
401*0Sstevel@tonic-gate 			goto out2;
402*0Sstevel@tonic-gate 	}
403*0Sstevel@tonic-gate 
404*0Sstevel@tonic-gate 	mutex_init(&sptd->spt_lock, NULL, MUTEX_DEFAULT, NULL);
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate 	if ((vp = kmem_zalloc(sizeof (*vp), KM_NOSLEEP)) == NULL)
407*0Sstevel@tonic-gate 		goto out3;
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate 	seg->s_ops = &segspt_ops;
410*0Sstevel@tonic-gate 	sptd->spt_vp = vp;
411*0Sstevel@tonic-gate 	sptd->spt_amp = amp;
412*0Sstevel@tonic-gate 	sptd->spt_prot = sptcargs->prot;
413*0Sstevel@tonic-gate 	sptd->spt_flags = sptcargs->flags;
414*0Sstevel@tonic-gate 	seg->s_data = (caddr_t)sptd;
415*0Sstevel@tonic-gate 	sptd->spt_ppa = NULL;
416*0Sstevel@tonic-gate 	sptd->spt_ppa_lckcnt = NULL;
417*0Sstevel@tonic-gate 	seg->s_szc = sptcargs->szc;
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
420*0Sstevel@tonic-gate 	amp->a_szc = seg->s_szc;
421*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
422*0Sstevel@tonic-gate 
423*0Sstevel@tonic-gate 	/*
424*0Sstevel@tonic-gate 	 * Set policy to affect initial allocation of pages in
425*0Sstevel@tonic-gate 	 * anon_map_createpages()
426*0Sstevel@tonic-gate 	 */
427*0Sstevel@tonic-gate 	(void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, amp, anon_index,
428*0Sstevel@tonic-gate 	    NULL, 0, ptob(npages));
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 	if (sptcargs->flags & SHM_PAGEABLE) {
431*0Sstevel@tonic-gate 		size_t  share_sz;
432*0Sstevel@tonic-gate 		pgcnt_t new_npgs, more_pgs;
433*0Sstevel@tonic-gate 		struct anon_hdr *nahp;
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate 		share_sz = page_get_pagesize(seg->s_szc);
436*0Sstevel@tonic-gate 		if (!IS_P2ALIGNED(amp->size, share_sz)) {
437*0Sstevel@tonic-gate 			/*
438*0Sstevel@tonic-gate 			 * We are rounding up the size of the anon array
439*0Sstevel@tonic-gate 			 * on 4 M boundary because we always create 4 M
440*0Sstevel@tonic-gate 			 * of page(s) when locking, faulting pages and we
441*0Sstevel@tonic-gate 			 * don't have to check for all corner cases e.g.
442*0Sstevel@tonic-gate 			 * if there is enough space to allocate 4 M
443*0Sstevel@tonic-gate 			 * page.
444*0Sstevel@tonic-gate 			 */
445*0Sstevel@tonic-gate 			new_npgs = btop(P2ROUNDUP(amp->size, share_sz));
446*0Sstevel@tonic-gate 			more_pgs = new_npgs - npages;
447*0Sstevel@tonic-gate 
448*0Sstevel@tonic-gate 			if (anon_resv(ptob(more_pgs)) == 0) {
449*0Sstevel@tonic-gate 				err = ENOMEM;
450*0Sstevel@tonic-gate 				goto out4;
451*0Sstevel@tonic-gate 			}
452*0Sstevel@tonic-gate 			nahp = anon_create(new_npgs, ANON_SLEEP);
453*0Sstevel@tonic-gate 			ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
454*0Sstevel@tonic-gate 			(void) anon_copy_ptr(amp->ahp, 0, nahp, 0, npages,
455*0Sstevel@tonic-gate 			    ANON_SLEEP);
456*0Sstevel@tonic-gate 			anon_release(amp->ahp, npages);
457*0Sstevel@tonic-gate 			amp->ahp = nahp;
458*0Sstevel@tonic-gate 			amp->swresv = amp->size = ptob(new_npgs);
459*0Sstevel@tonic-gate 			ANON_LOCK_EXIT(&amp->a_rwlock);
460*0Sstevel@tonic-gate 			npages = new_npgs;
461*0Sstevel@tonic-gate 		}
462*0Sstevel@tonic-gate 
463*0Sstevel@tonic-gate 		sptd->spt_ppa_lckcnt = kmem_zalloc(npages *
464*0Sstevel@tonic-gate 		    sizeof (*sptd->spt_ppa_lckcnt), KM_SLEEP);
465*0Sstevel@tonic-gate 		sptd->spt_pcachecnt = 0;
466*0Sstevel@tonic-gate 		sptd->spt_realsize = ptob(npages);
467*0Sstevel@tonic-gate 		sptcargs->seg_spt = seg;
468*0Sstevel@tonic-gate 		return (0);
469*0Sstevel@tonic-gate 	}
470*0Sstevel@tonic-gate 
471*0Sstevel@tonic-gate 	/*
472*0Sstevel@tonic-gate 	 * get array of pages for each anon slot in amp
473*0Sstevel@tonic-gate 	 */
474*0Sstevel@tonic-gate 	if ((err = anon_map_createpages(amp, anon_index, ptob(npages), ppa,
475*0Sstevel@tonic-gate 	    seg, addr, S_CREATE, cred)) != 0)
476*0Sstevel@tonic-gate 		goto out4;
477*0Sstevel@tonic-gate 
478*0Sstevel@tonic-gate 	/*
479*0Sstevel@tonic-gate 	 * addr is initial address corresponding to the first page on ppa list
480*0Sstevel@tonic-gate 	 */
481*0Sstevel@tonic-gate 	for (i = 0; i < npages; i++) {
482*0Sstevel@tonic-gate 		/* attempt to lock all pages */
483*0Sstevel@tonic-gate 		if (!page_pp_lock(ppa[i], 0, 1)) {
484*0Sstevel@tonic-gate 			/*
485*0Sstevel@tonic-gate 			 * if unable to lock any page, unlock all
486*0Sstevel@tonic-gate 			 * of them and return error
487*0Sstevel@tonic-gate 			 */
488*0Sstevel@tonic-gate 			for (j = 0; j < i; j++)
489*0Sstevel@tonic-gate 				page_pp_unlock(ppa[j], 0, 1);
490*0Sstevel@tonic-gate 			for (i = 0; i < npages; i++) {
491*0Sstevel@tonic-gate 				page_unlock(ppa[i]);
492*0Sstevel@tonic-gate 			}
493*0Sstevel@tonic-gate 			err = ENOMEM;
494*0Sstevel@tonic-gate 			goto out4;
495*0Sstevel@tonic-gate 		}
496*0Sstevel@tonic-gate 	}
497*0Sstevel@tonic-gate 
498*0Sstevel@tonic-gate 	/*
499*0Sstevel@tonic-gate 	 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
500*0Sstevel@tonic-gate 	 * for the entire life of the segment. For example platforms
501*0Sstevel@tonic-gate 	 * that do not support Dynamic Reconfiguration.
502*0Sstevel@tonic-gate 	 */
503*0Sstevel@tonic-gate 	hat_flags = HAT_LOAD_SHARE;
504*0Sstevel@tonic-gate 	if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, NULL))
505*0Sstevel@tonic-gate 		hat_flags |= HAT_LOAD_LOCK;
506*0Sstevel@tonic-gate 
507*0Sstevel@tonic-gate 	hat_memload_array(seg->s_as->a_hat, addr, ptob(npages),
508*0Sstevel@tonic-gate 	    ppa, sptd->spt_prot, hat_flags);
509*0Sstevel@tonic-gate 
510*0Sstevel@tonic-gate 	/*
511*0Sstevel@tonic-gate 	 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
512*0Sstevel@tonic-gate 	 * we will leave the pages locked SE_SHARED for the life
513*0Sstevel@tonic-gate 	 * of the ISM segment. This will prevent any calls to
514*0Sstevel@tonic-gate 	 * hat_pageunload() on this ISM segment for those platforms.
515*0Sstevel@tonic-gate 	 */
516*0Sstevel@tonic-gate 	if (!(hat_flags & HAT_LOAD_LOCK)) {
517*0Sstevel@tonic-gate 		/*
518*0Sstevel@tonic-gate 		 * On platforms that support HAT_DYNAMIC_ISM_UNMAP,
519*0Sstevel@tonic-gate 		 * we no longer need to hold the SE_SHARED lock on the pages,
520*0Sstevel@tonic-gate 		 * since L_PAGELOCK and F_SOFTLOCK calls will grab the
521*0Sstevel@tonic-gate 		 * SE_SHARED lock on the pages as necessary.
522*0Sstevel@tonic-gate 		 */
523*0Sstevel@tonic-gate 		for (i = 0; i < npages; i++)
524*0Sstevel@tonic-gate 			page_unlock(ppa[i]);
525*0Sstevel@tonic-gate 	}
526*0Sstevel@tonic-gate 	sptd->spt_pcachecnt = 0;
527*0Sstevel@tonic-gate 	kmem_free(ppa, ((sizeof (page_t *)) * npages));
528*0Sstevel@tonic-gate 	sptd->spt_realsize = ptob(npages);
529*0Sstevel@tonic-gate 	atomic_add_long(&spt_used, npages);
530*0Sstevel@tonic-gate 	sptcargs->seg_spt = seg;
531*0Sstevel@tonic-gate 	return (0);
532*0Sstevel@tonic-gate 
533*0Sstevel@tonic-gate out4:
534*0Sstevel@tonic-gate 	seg->s_data = NULL;
535*0Sstevel@tonic-gate 	kmem_free(vp, sizeof (*vp));
536*0Sstevel@tonic-gate out3:
537*0Sstevel@tonic-gate 	mutex_destroy(&sptd->spt_lock);
538*0Sstevel@tonic-gate 	if ((sptcargs->flags & SHM_PAGEABLE) == 0)
539*0Sstevel@tonic-gate 		kmem_free(ppa, (sizeof (*ppa) * npages));
540*0Sstevel@tonic-gate out2:
541*0Sstevel@tonic-gate 	kmem_free(sptd, sizeof (*sptd));
542*0Sstevel@tonic-gate out1:
543*0Sstevel@tonic-gate 	if ((sptcargs->flags & SHM_PAGEABLE) == 0)
544*0Sstevel@tonic-gate 		anon_swap_restore(npages);
545*0Sstevel@tonic-gate 	return (err);
546*0Sstevel@tonic-gate }
547*0Sstevel@tonic-gate 
548*0Sstevel@tonic-gate /*ARGSUSED*/
549*0Sstevel@tonic-gate void
550*0Sstevel@tonic-gate segspt_free_pages(struct seg *seg, caddr_t addr, size_t len)
551*0Sstevel@tonic-gate {
552*0Sstevel@tonic-gate 	struct page 	*pp;
553*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)seg->s_data;
554*0Sstevel@tonic-gate 	pgcnt_t		npages;
555*0Sstevel@tonic-gate 	ulong_t		anon_idx;
556*0Sstevel@tonic-gate 	struct anon_map *amp;
557*0Sstevel@tonic-gate 	struct anon 	*ap;
558*0Sstevel@tonic-gate 	struct vnode 	*vp;
559*0Sstevel@tonic-gate 	u_offset_t 	off;
560*0Sstevel@tonic-gate 	uint_t		hat_flags;
561*0Sstevel@tonic-gate 	int		root = 0;
562*0Sstevel@tonic-gate 	pgcnt_t		pgs, curnpgs = 0;
563*0Sstevel@tonic-gate 	page_t		*rootpp;
564*0Sstevel@tonic-gate 
565*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate 	len = P2ROUNDUP(len, PAGESIZE);
568*0Sstevel@tonic-gate 
569*0Sstevel@tonic-gate 	npages = btop(len);
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate 	hat_flags = HAT_UNLOAD_UNLOCK;
572*0Sstevel@tonic-gate 	if ((hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) ||
573*0Sstevel@tonic-gate 	    (sptd->spt_flags & SHM_PAGEABLE)) {
574*0Sstevel@tonic-gate 		hat_flags = HAT_UNLOAD;
575*0Sstevel@tonic-gate 	}
576*0Sstevel@tonic-gate 
577*0Sstevel@tonic-gate 	hat_unload(seg->s_as->a_hat, addr, len, hat_flags);
578*0Sstevel@tonic-gate 
579*0Sstevel@tonic-gate 	amp = sptd->spt_amp;
580*0Sstevel@tonic-gate 	if (sptd->spt_flags & SHM_PAGEABLE)
581*0Sstevel@tonic-gate 		npages = btop(amp->size);
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate 	ASSERT(amp);
584*0Sstevel@tonic-gate 	for (anon_idx = 0; anon_idx < npages; anon_idx++) {
585*0Sstevel@tonic-gate 		if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
586*0Sstevel@tonic-gate 			if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
587*0Sstevel@tonic-gate 				panic("segspt_free_pages: null app");
588*0Sstevel@tonic-gate 				/*NOTREACHED*/
589*0Sstevel@tonic-gate 			}
590*0Sstevel@tonic-gate 		} else {
591*0Sstevel@tonic-gate 			if ((ap = anon_get_next_ptr(amp->ahp, &anon_idx))
592*0Sstevel@tonic-gate 			    == NULL)
593*0Sstevel@tonic-gate 				continue;
594*0Sstevel@tonic-gate 		}
595*0Sstevel@tonic-gate 		ASSERT(ANON_ISBUSY(anon_get_slot(amp->ahp, anon_idx)) == 0);
596*0Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
597*0Sstevel@tonic-gate 
598*0Sstevel@tonic-gate 		/*
599*0Sstevel@tonic-gate 		 * If this platform supports HAT_DYNAMIC_ISM_UNMAP,
600*0Sstevel@tonic-gate 		 * the pages won't be having SE_SHARED lock at this
601*0Sstevel@tonic-gate 		 * point.
602*0Sstevel@tonic-gate 		 *
603*0Sstevel@tonic-gate 		 * On platforms that do not support HAT_DYNAMIC_ISM_UNMAP,
604*0Sstevel@tonic-gate 		 * the pages are still held SE_SHARED locked from the
605*0Sstevel@tonic-gate 		 * original segspt_create()
606*0Sstevel@tonic-gate 		 *
607*0Sstevel@tonic-gate 		 * Our goal is to get SE_EXCL lock on each page, remove
608*0Sstevel@tonic-gate 		 * permanent lock on it and invalidate the page.
609*0Sstevel@tonic-gate 		 */
610*0Sstevel@tonic-gate 		if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
611*0Sstevel@tonic-gate 			if (hat_flags == HAT_UNLOAD)
612*0Sstevel@tonic-gate 				pp = page_lookup(vp, off, SE_EXCL);
613*0Sstevel@tonic-gate 			else {
614*0Sstevel@tonic-gate 				if ((pp = page_find(vp, off)) == NULL) {
615*0Sstevel@tonic-gate 					panic("segspt_free_pages: "
616*0Sstevel@tonic-gate 					    "page not locked");
617*0Sstevel@tonic-gate 					/*NOTREACHED*/
618*0Sstevel@tonic-gate 				}
619*0Sstevel@tonic-gate 				if (!page_tryupgrade(pp)) {
620*0Sstevel@tonic-gate 					page_unlock(pp);
621*0Sstevel@tonic-gate 					pp = page_lookup(vp, off, SE_EXCL);
622*0Sstevel@tonic-gate 				}
623*0Sstevel@tonic-gate 			}
624*0Sstevel@tonic-gate 			if (pp == NULL) {
625*0Sstevel@tonic-gate 				panic("segspt_free_pages: "
626*0Sstevel@tonic-gate 				    "page not in the system");
627*0Sstevel@tonic-gate 				/*NOTREACHED*/
628*0Sstevel@tonic-gate 			}
629*0Sstevel@tonic-gate 			page_pp_unlock(pp, 0, 1);
630*0Sstevel@tonic-gate 		} else {
631*0Sstevel@tonic-gate 			if ((pp = page_lookup(vp, off, SE_EXCL)) == NULL)
632*0Sstevel@tonic-gate 				continue;
633*0Sstevel@tonic-gate 			page_pp_unlock(pp, 0, 0);
634*0Sstevel@tonic-gate 		}
635*0Sstevel@tonic-gate 		/*
636*0Sstevel@tonic-gate 		 * It's logical to invalidate the pages here as in most cases
637*0Sstevel@tonic-gate 		 * these were created by segspt.
638*0Sstevel@tonic-gate 		 */
639*0Sstevel@tonic-gate 		if (pp->p_szc != 0) {
640*0Sstevel@tonic-gate 			/*
641*0Sstevel@tonic-gate 			 * For DISM swap is released in shm_rm_amp.
642*0Sstevel@tonic-gate 			 */
643*0Sstevel@tonic-gate 			if ((sptd->spt_flags & SHM_PAGEABLE) == 0 &&
644*0Sstevel@tonic-gate 			    ap->an_pvp != NULL) {
645*0Sstevel@tonic-gate 				panic("segspt_free_pages: pvp non NULL");
646*0Sstevel@tonic-gate 				/*NOTREACHED*/
647*0Sstevel@tonic-gate 			}
648*0Sstevel@tonic-gate 			if (root == 0) {
649*0Sstevel@tonic-gate 				ASSERT(curnpgs == 0);
650*0Sstevel@tonic-gate 				root = 1;
651*0Sstevel@tonic-gate 				rootpp = pp;
652*0Sstevel@tonic-gate 				pgs = curnpgs = page_get_pagecnt(pp->p_szc);
653*0Sstevel@tonic-gate 				ASSERT(pgs > 1);
654*0Sstevel@tonic-gate 				ASSERT(IS_P2ALIGNED(pgs, pgs));
655*0Sstevel@tonic-gate 				ASSERT(!(page_pptonum(pp) & (pgs - 1)));
656*0Sstevel@tonic-gate 				curnpgs--;
657*0Sstevel@tonic-gate 			} else if ((page_pptonum(pp) & (pgs - 1)) == pgs - 1) {
658*0Sstevel@tonic-gate 				ASSERT(curnpgs == 1);
659*0Sstevel@tonic-gate 				ASSERT(page_pptonum(pp) ==
660*0Sstevel@tonic-gate 				    page_pptonum(rootpp) + (pgs - 1));
661*0Sstevel@tonic-gate 				page_destroy_pages(rootpp);
662*0Sstevel@tonic-gate 				root = 0;
663*0Sstevel@tonic-gate 				curnpgs = 0;
664*0Sstevel@tonic-gate 			} else {
665*0Sstevel@tonic-gate 				ASSERT(curnpgs > 1);
666*0Sstevel@tonic-gate 				ASSERT(page_pptonum(pp) ==
667*0Sstevel@tonic-gate 				    page_pptonum(rootpp) + (pgs - curnpgs));
668*0Sstevel@tonic-gate 				curnpgs--;
669*0Sstevel@tonic-gate 			}
670*0Sstevel@tonic-gate 		} else {
671*0Sstevel@tonic-gate 			if (root != 0 || curnpgs != 0) {
672*0Sstevel@tonic-gate 				panic("segspt_free_pages: bad large page");
673*0Sstevel@tonic-gate 				/*NOTREACHED*/
674*0Sstevel@tonic-gate 			}
675*0Sstevel@tonic-gate 			/*LINTED: constant in conditional context */
676*0Sstevel@tonic-gate 			VN_DISPOSE(pp, B_INVAL, 0, kcred);
677*0Sstevel@tonic-gate 		}
678*0Sstevel@tonic-gate 	}
679*0Sstevel@tonic-gate 
680*0Sstevel@tonic-gate 	if (root != 0 || curnpgs != 0) {
681*0Sstevel@tonic-gate 		panic("segspt_free_pages: bad large page");
682*0Sstevel@tonic-gate 		/*NOTREACHED*/
683*0Sstevel@tonic-gate 	}
684*0Sstevel@tonic-gate 
685*0Sstevel@tonic-gate 	/*
686*0Sstevel@tonic-gate 	 * mark that pages have been released
687*0Sstevel@tonic-gate 	 */
688*0Sstevel@tonic-gate 	sptd->spt_realsize = 0;
689*0Sstevel@tonic-gate 
690*0Sstevel@tonic-gate 	if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
691*0Sstevel@tonic-gate 		atomic_add_long(&spt_used, -npages);
692*0Sstevel@tonic-gate 		anon_swap_restore(npages);
693*0Sstevel@tonic-gate 	}
694*0Sstevel@tonic-gate }
695*0Sstevel@tonic-gate 
696*0Sstevel@tonic-gate /*
697*0Sstevel@tonic-gate  * Get memory allocation policy info for specified address in given segment
698*0Sstevel@tonic-gate  */
699*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *
700*0Sstevel@tonic-gate segspt_getpolicy(struct seg *seg, caddr_t addr)
701*0Sstevel@tonic-gate {
702*0Sstevel@tonic-gate 	struct anon_map		*amp;
703*0Sstevel@tonic-gate 	ulong_t			anon_index;
704*0Sstevel@tonic-gate 	lgrp_mem_policy_info_t	*policy_info;
705*0Sstevel@tonic-gate 	struct spt_data		*spt_data;
706*0Sstevel@tonic-gate 
707*0Sstevel@tonic-gate 	ASSERT(seg != NULL);
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	/*
710*0Sstevel@tonic-gate 	 * Get anon_map from segspt
711*0Sstevel@tonic-gate 	 *
712*0Sstevel@tonic-gate 	 * Assume that no lock needs to be held on anon_map, since
713*0Sstevel@tonic-gate 	 * it should be protected by its reference count which must be
714*0Sstevel@tonic-gate 	 * nonzero for an existing segment
715*0Sstevel@tonic-gate 	 * Need to grab readers lock on policy tree though
716*0Sstevel@tonic-gate 	 */
717*0Sstevel@tonic-gate 	spt_data = (struct spt_data *)seg->s_data;
718*0Sstevel@tonic-gate 	if (spt_data == NULL)
719*0Sstevel@tonic-gate 		return (NULL);
720*0Sstevel@tonic-gate 	amp = spt_data->spt_amp;
721*0Sstevel@tonic-gate 	ASSERT(amp->refcnt != 0);
722*0Sstevel@tonic-gate 
723*0Sstevel@tonic-gate 	/*
724*0Sstevel@tonic-gate 	 * Get policy info
725*0Sstevel@tonic-gate 	 *
726*0Sstevel@tonic-gate 	 * Assume starting anon index of 0
727*0Sstevel@tonic-gate 	 */
728*0Sstevel@tonic-gate 	anon_index = seg_page(seg, addr);
729*0Sstevel@tonic-gate 	policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
730*0Sstevel@tonic-gate 
731*0Sstevel@tonic-gate 	return (policy_info);
732*0Sstevel@tonic-gate }
733*0Sstevel@tonic-gate 
734*0Sstevel@tonic-gate /*
735*0Sstevel@tonic-gate  * DISM only.
736*0Sstevel@tonic-gate  * Return locked pages over a given range.
737*0Sstevel@tonic-gate  *
738*0Sstevel@tonic-gate  * We will cache all DISM locked pages and save the pplist for the
739*0Sstevel@tonic-gate  * entire segment in the ppa field of the underlying DISM segment structure.
740*0Sstevel@tonic-gate  * Later, during a call to segspt_reclaim() we will use this ppa array
741*0Sstevel@tonic-gate  * to page_unlock() all of the pages and then we will free this ppa list.
742*0Sstevel@tonic-gate  */
743*0Sstevel@tonic-gate /*ARGSUSED*/
744*0Sstevel@tonic-gate static int
745*0Sstevel@tonic-gate segspt_dismpagelock(struct seg *seg, caddr_t addr, size_t len,
746*0Sstevel@tonic-gate     struct page ***ppp, enum lock_type type, enum seg_rw rw)
747*0Sstevel@tonic-gate {
748*0Sstevel@tonic-gate 	struct  shm_data *shmd = (struct shm_data *)seg->s_data;
749*0Sstevel@tonic-gate 	struct  seg	*sptseg = shmd->shm_sptseg;
750*0Sstevel@tonic-gate 	struct  spt_data *sptd = sptseg->s_data;
751*0Sstevel@tonic-gate 	pgcnt_t pg_idx, npages, tot_npages, npgs;
752*0Sstevel@tonic-gate 	struct  page **pplist, **pl, **ppa, *pp;
753*0Sstevel@tonic-gate 	struct  anon_map *amp;
754*0Sstevel@tonic-gate 	spgcnt_t	an_idx;
755*0Sstevel@tonic-gate 	int 	ret = ENOTSUP;
756*0Sstevel@tonic-gate 	uint_t	pl_built = 0;
757*0Sstevel@tonic-gate 	struct  anon *ap;
758*0Sstevel@tonic-gate 	struct  vnode *vp;
759*0Sstevel@tonic-gate 	u_offset_t off;
760*0Sstevel@tonic-gate 	pgcnt_t claim_availrmem = 0;
761*0Sstevel@tonic-gate 	uint_t	szc;
762*0Sstevel@tonic-gate 
763*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
764*0Sstevel@tonic-gate 
765*0Sstevel@tonic-gate 	/*
766*0Sstevel@tonic-gate 	 * We want to lock/unlock the entire ISM segment. Therefore,
767*0Sstevel@tonic-gate 	 * we will be using the underlying sptseg and it's base address
768*0Sstevel@tonic-gate 	 * and length for the caching arguments.
769*0Sstevel@tonic-gate 	 */
770*0Sstevel@tonic-gate 	ASSERT(sptseg);
771*0Sstevel@tonic-gate 	ASSERT(sptd);
772*0Sstevel@tonic-gate 
773*0Sstevel@tonic-gate 	pg_idx = seg_page(seg, addr);
774*0Sstevel@tonic-gate 	npages = btopr(len);
775*0Sstevel@tonic-gate 
776*0Sstevel@tonic-gate 	/*
777*0Sstevel@tonic-gate 	 * check if the request is larger than number of pages covered
778*0Sstevel@tonic-gate 	 * by amp
779*0Sstevel@tonic-gate 	 */
780*0Sstevel@tonic-gate 	if (pg_idx + npages > btopr(sptd->spt_amp->size)) {
781*0Sstevel@tonic-gate 		*ppp = NULL;
782*0Sstevel@tonic-gate 		return (ENOTSUP);
783*0Sstevel@tonic-gate 	}
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 	if (type == L_PAGEUNLOCK) {
786*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
787*0Sstevel@tonic-gate 
788*0Sstevel@tonic-gate 		seg_pinactive(seg, seg->s_base, sptd->spt_amp->size,
789*0Sstevel@tonic-gate 		    sptd->spt_ppa, sptd->spt_prot, segspt_reclaim);
790*0Sstevel@tonic-gate 
791*0Sstevel@tonic-gate 		/*
792*0Sstevel@tonic-gate 		 * If someone is blocked while unmapping, we purge
793*0Sstevel@tonic-gate 		 * segment page cache and thus reclaim pplist synchronously
794*0Sstevel@tonic-gate 		 * without waiting for seg_pasync_thread. This speeds up
795*0Sstevel@tonic-gate 		 * unmapping in cases where munmap(2) is called, while
796*0Sstevel@tonic-gate 		 * raw async i/o is still in progress or where a thread
797*0Sstevel@tonic-gate 		 * exits on data fault in a multithreaded application.
798*0Sstevel@tonic-gate 		 */
799*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) {
800*0Sstevel@tonic-gate 			segspt_purge(seg);
801*0Sstevel@tonic-gate 		}
802*0Sstevel@tonic-gate 		return (0);
803*0Sstevel@tonic-gate 	} else if (type == L_PAGERECLAIM) {
804*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
805*0Sstevel@tonic-gate 		(void) segspt_reclaim(seg, seg->s_base, sptd->spt_amp->size,
806*0Sstevel@tonic-gate 		    sptd->spt_ppa, sptd->spt_prot);
807*0Sstevel@tonic-gate 		return (0);
808*0Sstevel@tonic-gate 	}
809*0Sstevel@tonic-gate 
810*0Sstevel@tonic-gate 	if (sptd->spt_flags & DISM_PPA_CHANGED) {
811*0Sstevel@tonic-gate 		segspt_purge(seg);
812*0Sstevel@tonic-gate 		/*
813*0Sstevel@tonic-gate 		 * for DISM ppa needs to be rebuild since
814*0Sstevel@tonic-gate 		 * number of locked pages could be changed
815*0Sstevel@tonic-gate 		 */
816*0Sstevel@tonic-gate 		*ppp = NULL;
817*0Sstevel@tonic-gate 		return (ENOTSUP);
818*0Sstevel@tonic-gate 	}
819*0Sstevel@tonic-gate 
820*0Sstevel@tonic-gate 	/*
821*0Sstevel@tonic-gate 	 * First try to find pages in segment page cache, without
822*0Sstevel@tonic-gate 	 * holding the segment lock.
823*0Sstevel@tonic-gate 	 */
824*0Sstevel@tonic-gate 	pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size,
825*0Sstevel@tonic-gate 	    sptd->spt_prot);
826*0Sstevel@tonic-gate 	if (pplist != NULL) {
827*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
828*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa == pplist);
829*0Sstevel@tonic-gate 		ppa = sptd->spt_ppa;
830*0Sstevel@tonic-gate 		for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
831*0Sstevel@tonic-gate 			if (ppa[an_idx] == NULL) {
832*0Sstevel@tonic-gate 				seg_pinactive(seg, seg->s_base,
833*0Sstevel@tonic-gate 				    sptd->spt_amp->size, ppa,
834*0Sstevel@tonic-gate 				    sptd->spt_prot, segspt_reclaim);
835*0Sstevel@tonic-gate 				*ppp = NULL;
836*0Sstevel@tonic-gate 				return (ENOTSUP);
837*0Sstevel@tonic-gate 			}
838*0Sstevel@tonic-gate 			if ((szc = ppa[an_idx]->p_szc) != 0) {
839*0Sstevel@tonic-gate 				npgs = page_get_pagecnt(szc);
840*0Sstevel@tonic-gate 				an_idx = P2ROUNDUP(an_idx + 1, npgs);
841*0Sstevel@tonic-gate 			} else {
842*0Sstevel@tonic-gate 				an_idx++;
843*0Sstevel@tonic-gate 			}
844*0Sstevel@tonic-gate 		}
845*0Sstevel@tonic-gate 		/*
846*0Sstevel@tonic-gate 		 * Since we cache the entire DISM segment, we want to
847*0Sstevel@tonic-gate 		 * set ppp to point to the first slot that corresponds
848*0Sstevel@tonic-gate 		 * to the requested addr, i.e. pg_idx.
849*0Sstevel@tonic-gate 		 */
850*0Sstevel@tonic-gate 		*ppp = &(sptd->spt_ppa[pg_idx]);
851*0Sstevel@tonic-gate 		return (0);
852*0Sstevel@tonic-gate 	}
853*0Sstevel@tonic-gate 
854*0Sstevel@tonic-gate 	/* The L_PAGELOCK case... */
855*0Sstevel@tonic-gate 	mutex_enter(&sptd->spt_lock);
856*0Sstevel@tonic-gate 	/*
857*0Sstevel@tonic-gate 	 * try to find pages in segment page cache with mutex
858*0Sstevel@tonic-gate 	 */
859*0Sstevel@tonic-gate 	pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size,
860*0Sstevel@tonic-gate 	    sptd->spt_prot);
861*0Sstevel@tonic-gate 	if (pplist != NULL) {
862*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
863*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa == pplist);
864*0Sstevel@tonic-gate 		ppa = sptd->spt_ppa;
865*0Sstevel@tonic-gate 		for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
866*0Sstevel@tonic-gate 			if (ppa[an_idx] == NULL) {
867*0Sstevel@tonic-gate 				mutex_exit(&sptd->spt_lock);
868*0Sstevel@tonic-gate 				seg_pinactive(seg, seg->s_base,
869*0Sstevel@tonic-gate 				    sptd->spt_amp->size, ppa,
870*0Sstevel@tonic-gate 				    sptd->spt_prot, segspt_reclaim);
871*0Sstevel@tonic-gate 				*ppp = NULL;
872*0Sstevel@tonic-gate 				return (ENOTSUP);
873*0Sstevel@tonic-gate 			}
874*0Sstevel@tonic-gate 			if ((szc = ppa[an_idx]->p_szc) != 0) {
875*0Sstevel@tonic-gate 				npgs = page_get_pagecnt(szc);
876*0Sstevel@tonic-gate 				an_idx = P2ROUNDUP(an_idx + 1, npgs);
877*0Sstevel@tonic-gate 			} else {
878*0Sstevel@tonic-gate 				an_idx++;
879*0Sstevel@tonic-gate 			}
880*0Sstevel@tonic-gate 		}
881*0Sstevel@tonic-gate 		/*
882*0Sstevel@tonic-gate 		 * Since we cache the entire DISM segment, we want to
883*0Sstevel@tonic-gate 		 * set ppp to point to the first slot that corresponds
884*0Sstevel@tonic-gate 		 * to the requested addr, i.e. pg_idx.
885*0Sstevel@tonic-gate 		 */
886*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
887*0Sstevel@tonic-gate 		*ppp = &(sptd->spt_ppa[pg_idx]);
888*0Sstevel@tonic-gate 		return (0);
889*0Sstevel@tonic-gate 	}
890*0Sstevel@tonic-gate 	if (seg_pinsert_check(seg, sptd->spt_amp->size, SEGP_FORCE_WIRED) ==
891*0Sstevel@tonic-gate 	    SEGP_FAIL) {
892*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
893*0Sstevel@tonic-gate 		*ppp = NULL;
894*0Sstevel@tonic-gate 		return (ENOTSUP);
895*0Sstevel@tonic-gate 	}
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate 	/*
898*0Sstevel@tonic-gate 	 * No need to worry about protections because DISM pages are always rw.
899*0Sstevel@tonic-gate 	 */
900*0Sstevel@tonic-gate 	pl = pplist = NULL;
901*0Sstevel@tonic-gate 	amp = sptd->spt_amp;
902*0Sstevel@tonic-gate 
903*0Sstevel@tonic-gate 	/*
904*0Sstevel@tonic-gate 	 * Do we need to build the ppa array?
905*0Sstevel@tonic-gate 	 */
906*0Sstevel@tonic-gate 	if (sptd->spt_ppa == NULL) {
907*0Sstevel@tonic-gate 		pgcnt_t lpg_cnt = 0;
908*0Sstevel@tonic-gate 
909*0Sstevel@tonic-gate 		pl_built = 1;
910*0Sstevel@tonic-gate 		tot_npages = btopr(sptd->spt_amp->size);
911*0Sstevel@tonic-gate 
912*0Sstevel@tonic-gate 		ASSERT(sptd->spt_pcachecnt == 0);
913*0Sstevel@tonic-gate 		pplist = kmem_zalloc(sizeof (page_t *) * tot_npages, KM_SLEEP);
914*0Sstevel@tonic-gate 		pl = pplist;
915*0Sstevel@tonic-gate 
916*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
917*0Sstevel@tonic-gate 		for (an_idx = 0; an_idx < tot_npages; ) {
918*0Sstevel@tonic-gate 			ap = anon_get_ptr(amp->ahp, an_idx);
919*0Sstevel@tonic-gate 			/*
920*0Sstevel@tonic-gate 			 * Cache only mlocked pages. For large pages
921*0Sstevel@tonic-gate 			 * if one (constituent) page is mlocked
922*0Sstevel@tonic-gate 			 * all pages for that large page
923*0Sstevel@tonic-gate 			 * are cached also. This is for quick
924*0Sstevel@tonic-gate 			 * lookups of ppa array;
925*0Sstevel@tonic-gate 			 */
926*0Sstevel@tonic-gate 			if ((ap != NULL) && (lpg_cnt != 0 ||
927*0Sstevel@tonic-gate 			    (sptd->spt_ppa_lckcnt[an_idx] != 0))) {
928*0Sstevel@tonic-gate 
929*0Sstevel@tonic-gate 				swap_xlate(ap, &vp, &off);
930*0Sstevel@tonic-gate 				pp = page_lookup(vp, off, SE_SHARED);
931*0Sstevel@tonic-gate 				ASSERT(pp != NULL);
932*0Sstevel@tonic-gate 				if (lpg_cnt == 0) {
933*0Sstevel@tonic-gate 					npgs = page_get_pagecnt(pp->p_szc);
934*0Sstevel@tonic-gate 					if (!IS_P2ALIGNED(an_idx, npgs)) {
935*0Sstevel@tonic-gate 						an_idx = P2ALIGN(an_idx, npgs);
936*0Sstevel@tonic-gate 						page_unlock(pp);
937*0Sstevel@tonic-gate 						continue;
938*0Sstevel@tonic-gate 					}
939*0Sstevel@tonic-gate 				}
940*0Sstevel@tonic-gate 				if (++lpg_cnt == npgs)
941*0Sstevel@tonic-gate 					lpg_cnt = 0;
942*0Sstevel@tonic-gate 
943*0Sstevel@tonic-gate 				/*
944*0Sstevel@tonic-gate 				 * availrmem is decremented only
945*0Sstevel@tonic-gate 				 * for unlocked pages
946*0Sstevel@tonic-gate 				 */
947*0Sstevel@tonic-gate 				if (sptd->spt_ppa_lckcnt[an_idx] == 0)
948*0Sstevel@tonic-gate 					claim_availrmem++;
949*0Sstevel@tonic-gate 				pplist[an_idx] = pp;
950*0Sstevel@tonic-gate 			}
951*0Sstevel@tonic-gate 			an_idx++;
952*0Sstevel@tonic-gate 		}
953*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
954*0Sstevel@tonic-gate 
955*0Sstevel@tonic-gate 		mutex_enter(&freemem_lock);
956*0Sstevel@tonic-gate 		if (availrmem < tune.t_minarmem + claim_availrmem) {
957*0Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
958*0Sstevel@tonic-gate 			ret = FC_MAKE_ERR(ENOMEM);
959*0Sstevel@tonic-gate 			claim_availrmem = 0;
960*0Sstevel@tonic-gate 			goto insert_fail;
961*0Sstevel@tonic-gate 		} else {
962*0Sstevel@tonic-gate 			availrmem -= claim_availrmem;
963*0Sstevel@tonic-gate 		}
964*0Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
965*0Sstevel@tonic-gate 
966*0Sstevel@tonic-gate 		sptd->spt_ppa = pl;
967*0Sstevel@tonic-gate 	} else {
968*0Sstevel@tonic-gate 		/*
969*0Sstevel@tonic-gate 		 * We already have a valid ppa[].
970*0Sstevel@tonic-gate 		 */
971*0Sstevel@tonic-gate 		pl = sptd->spt_ppa;
972*0Sstevel@tonic-gate 	}
973*0Sstevel@tonic-gate 
974*0Sstevel@tonic-gate 	ASSERT(pl != NULL);
975*0Sstevel@tonic-gate 
976*0Sstevel@tonic-gate 	ret = seg_pinsert(seg, seg->s_base, sptd->spt_amp->size,
977*0Sstevel@tonic-gate 	    pl, sptd->spt_prot, SEGP_FORCE_WIRED | SEGP_ASYNC_FLUSH,
978*0Sstevel@tonic-gate 	    segspt_reclaim);
979*0Sstevel@tonic-gate 	if (ret == SEGP_FAIL) {
980*0Sstevel@tonic-gate 		/*
981*0Sstevel@tonic-gate 		 * seg_pinsert failed. We return
982*0Sstevel@tonic-gate 		 * ENOTSUP, so that the as_pagelock() code will
983*0Sstevel@tonic-gate 		 * then try the slower F_SOFTLOCK path.
984*0Sstevel@tonic-gate 		 */
985*0Sstevel@tonic-gate 		sptd->spt_ppa = NULL;
986*0Sstevel@tonic-gate 		ret = ENOTSUP;
987*0Sstevel@tonic-gate 		goto insert_fail;
988*0Sstevel@tonic-gate 	}
989*0Sstevel@tonic-gate 
990*0Sstevel@tonic-gate 	/*
991*0Sstevel@tonic-gate 	 * In either case, we increment softlockcnt on the 'real' segment.
992*0Sstevel@tonic-gate 	 */
993*0Sstevel@tonic-gate 	sptd->spt_pcachecnt++;
994*0Sstevel@tonic-gate 	atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1);
995*0Sstevel@tonic-gate 
996*0Sstevel@tonic-gate 	ppa = sptd->spt_ppa;
997*0Sstevel@tonic-gate 	for (an_idx = pg_idx; an_idx < pg_idx + npages; ) {
998*0Sstevel@tonic-gate 		if (ppa[an_idx] == NULL) {
999*0Sstevel@tonic-gate 			mutex_exit(&sptd->spt_lock);
1000*0Sstevel@tonic-gate 			seg_pinactive(seg, seg->s_base, sptd->spt_amp->size,
1001*0Sstevel@tonic-gate 			    pl, sptd->spt_prot, segspt_reclaim);
1002*0Sstevel@tonic-gate 			*ppp = NULL;
1003*0Sstevel@tonic-gate 			return (ENOTSUP);
1004*0Sstevel@tonic-gate 		}
1005*0Sstevel@tonic-gate 		if ((szc = ppa[an_idx]->p_szc) != 0) {
1006*0Sstevel@tonic-gate 			npgs = page_get_pagecnt(szc);
1007*0Sstevel@tonic-gate 			an_idx = P2ROUNDUP(an_idx + 1, npgs);
1008*0Sstevel@tonic-gate 		} else {
1009*0Sstevel@tonic-gate 			an_idx++;
1010*0Sstevel@tonic-gate 		}
1011*0Sstevel@tonic-gate 	}
1012*0Sstevel@tonic-gate 	/*
1013*0Sstevel@tonic-gate 	 * We can now drop the sptd->spt_lock since the ppa[]
1014*0Sstevel@tonic-gate 	 * exists and he have incremented pacachecnt.
1015*0Sstevel@tonic-gate 	 */
1016*0Sstevel@tonic-gate 	mutex_exit(&sptd->spt_lock);
1017*0Sstevel@tonic-gate 
1018*0Sstevel@tonic-gate 	/*
1019*0Sstevel@tonic-gate 	 * Since we cache the entire segment, we want to
1020*0Sstevel@tonic-gate 	 * set ppp to point to the first slot that corresponds
1021*0Sstevel@tonic-gate 	 * to the requested addr, i.e. pg_idx.
1022*0Sstevel@tonic-gate 	 */
1023*0Sstevel@tonic-gate 	*ppp = &(sptd->spt_ppa[pg_idx]);
1024*0Sstevel@tonic-gate 	return (ret);
1025*0Sstevel@tonic-gate 
1026*0Sstevel@tonic-gate insert_fail:
1027*0Sstevel@tonic-gate 	/*
1028*0Sstevel@tonic-gate 	 * We will only reach this code if we tried and failed.
1029*0Sstevel@tonic-gate 	 *
1030*0Sstevel@tonic-gate 	 * And we can drop the lock on the dummy seg, once we've failed
1031*0Sstevel@tonic-gate 	 * to set up a new ppa[].
1032*0Sstevel@tonic-gate 	 */
1033*0Sstevel@tonic-gate 	mutex_exit(&sptd->spt_lock);
1034*0Sstevel@tonic-gate 
1035*0Sstevel@tonic-gate 	if (pl_built) {
1036*0Sstevel@tonic-gate 		mutex_enter(&freemem_lock);
1037*0Sstevel@tonic-gate 		availrmem += claim_availrmem;
1038*0Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
1039*0Sstevel@tonic-gate 
1040*0Sstevel@tonic-gate 		/*
1041*0Sstevel@tonic-gate 		 * We created pl and we need to destroy it.
1042*0Sstevel@tonic-gate 		 */
1043*0Sstevel@tonic-gate 		pplist = pl;
1044*0Sstevel@tonic-gate 		for (an_idx = 0; an_idx < tot_npages; an_idx++) {
1045*0Sstevel@tonic-gate 			if (pplist[an_idx] != NULL)
1046*0Sstevel@tonic-gate 				page_unlock(pplist[an_idx]);
1047*0Sstevel@tonic-gate 		}
1048*0Sstevel@tonic-gate 		kmem_free(pl, sizeof (page_t *) * tot_npages);
1049*0Sstevel@tonic-gate 	}
1050*0Sstevel@tonic-gate 
1051*0Sstevel@tonic-gate 	if (shmd->shm_softlockcnt <= 0) {
1052*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as)) {
1053*0Sstevel@tonic-gate 			mutex_enter(&seg->s_as->a_contents);
1054*0Sstevel@tonic-gate 			if (AS_ISUNMAPWAIT(seg->s_as)) {
1055*0Sstevel@tonic-gate 				AS_CLRUNMAPWAIT(seg->s_as);
1056*0Sstevel@tonic-gate 				cv_broadcast(&seg->s_as->a_cv);
1057*0Sstevel@tonic-gate 			}
1058*0Sstevel@tonic-gate 			mutex_exit(&seg->s_as->a_contents);
1059*0Sstevel@tonic-gate 		}
1060*0Sstevel@tonic-gate 	}
1061*0Sstevel@tonic-gate 	*ppp = NULL;
1062*0Sstevel@tonic-gate 	return (ret);
1063*0Sstevel@tonic-gate }
1064*0Sstevel@tonic-gate 
1065*0Sstevel@tonic-gate 
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate /*
1068*0Sstevel@tonic-gate  * return locked pages over a given range.
1069*0Sstevel@tonic-gate  *
1070*0Sstevel@tonic-gate  * We will cache the entire ISM segment and save the pplist for the
1071*0Sstevel@tonic-gate  * entire segment in the ppa field of the underlying ISM segment structure.
1072*0Sstevel@tonic-gate  * Later, during a call to segspt_reclaim() we will use this ppa array
1073*0Sstevel@tonic-gate  * to page_unlock() all of the pages and then we will free this ppa list.
1074*0Sstevel@tonic-gate  */
1075*0Sstevel@tonic-gate /*ARGSUSED*/
1076*0Sstevel@tonic-gate static int
1077*0Sstevel@tonic-gate segspt_shmpagelock(struct seg *seg, caddr_t addr, size_t len,
1078*0Sstevel@tonic-gate     struct page ***ppp, enum lock_type type, enum seg_rw rw)
1079*0Sstevel@tonic-gate {
1080*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
1081*0Sstevel@tonic-gate 	struct seg	*sptseg = shmd->shm_sptseg;
1082*0Sstevel@tonic-gate 	struct spt_data *sptd = sptseg->s_data;
1083*0Sstevel@tonic-gate 	pgcnt_t np, page_index, npages;
1084*0Sstevel@tonic-gate 	caddr_t a, spt_base;
1085*0Sstevel@tonic-gate 	struct page **pplist, **pl, *pp;
1086*0Sstevel@tonic-gate 	struct anon_map *amp;
1087*0Sstevel@tonic-gate 	ulong_t anon_index;
1088*0Sstevel@tonic-gate 	int ret = ENOTSUP;
1089*0Sstevel@tonic-gate 	uint_t	pl_built = 0;
1090*0Sstevel@tonic-gate 	struct anon *ap;
1091*0Sstevel@tonic-gate 	struct vnode *vp;
1092*0Sstevel@tonic-gate 	u_offset_t off;
1093*0Sstevel@tonic-gate 
1094*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1095*0Sstevel@tonic-gate 
1096*0Sstevel@tonic-gate 	/*
1097*0Sstevel@tonic-gate 	 * We want to lock/unlock the entire ISM segment. Therefore,
1098*0Sstevel@tonic-gate 	 * we will be using the underlying sptseg and it's base address
1099*0Sstevel@tonic-gate 	 * and length for the caching arguments.
1100*0Sstevel@tonic-gate 	 */
1101*0Sstevel@tonic-gate 	ASSERT(sptseg);
1102*0Sstevel@tonic-gate 	ASSERT(sptd);
1103*0Sstevel@tonic-gate 
1104*0Sstevel@tonic-gate 	if (sptd->spt_flags & SHM_PAGEABLE) {
1105*0Sstevel@tonic-gate 		return (segspt_dismpagelock(seg, addr, len, ppp, type, rw));
1106*0Sstevel@tonic-gate 	}
1107*0Sstevel@tonic-gate 
1108*0Sstevel@tonic-gate 	page_index = seg_page(seg, addr);
1109*0Sstevel@tonic-gate 	npages = btopr(len);
1110*0Sstevel@tonic-gate 
1111*0Sstevel@tonic-gate 	/*
1112*0Sstevel@tonic-gate 	 * check if the request is larger than number of pages covered
1113*0Sstevel@tonic-gate 	 * by amp
1114*0Sstevel@tonic-gate 	 */
1115*0Sstevel@tonic-gate 	if (page_index + npages > btopr(sptd->spt_amp->size)) {
1116*0Sstevel@tonic-gate 		*ppp = NULL;
1117*0Sstevel@tonic-gate 		return (ENOTSUP);
1118*0Sstevel@tonic-gate 	}
1119*0Sstevel@tonic-gate 
1120*0Sstevel@tonic-gate 	if (type == L_PAGEUNLOCK) {
1121*0Sstevel@tonic-gate 
1122*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
1123*0Sstevel@tonic-gate 
1124*0Sstevel@tonic-gate 		seg_pinactive(seg, seg->s_base, sptd->spt_amp->size,
1125*0Sstevel@tonic-gate 		    sptd->spt_ppa, sptd->spt_prot, segspt_reclaim);
1126*0Sstevel@tonic-gate 
1127*0Sstevel@tonic-gate 		/*
1128*0Sstevel@tonic-gate 		 * If someone is blocked while unmapping, we purge
1129*0Sstevel@tonic-gate 		 * segment page cache and thus reclaim pplist synchronously
1130*0Sstevel@tonic-gate 		 * without waiting for seg_pasync_thread. This speeds up
1131*0Sstevel@tonic-gate 		 * unmapping in cases where munmap(2) is called, while
1132*0Sstevel@tonic-gate 		 * raw async i/o is still in progress or where a thread
1133*0Sstevel@tonic-gate 		 * exits on data fault in a multithreaded application.
1134*0Sstevel@tonic-gate 		 */
1135*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as) && (shmd->shm_softlockcnt > 0)) {
1136*0Sstevel@tonic-gate 			segspt_purge(seg);
1137*0Sstevel@tonic-gate 		}
1138*0Sstevel@tonic-gate 		return (0);
1139*0Sstevel@tonic-gate 	} else if (type == L_PAGERECLAIM) {
1140*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa != NULL);
1141*0Sstevel@tonic-gate 
1142*0Sstevel@tonic-gate 		(void) segspt_reclaim(seg, seg->s_base, sptd->spt_amp->size,
1143*0Sstevel@tonic-gate 		    sptd->spt_ppa, sptd->spt_prot);
1144*0Sstevel@tonic-gate 		return (0);
1145*0Sstevel@tonic-gate 	}
1146*0Sstevel@tonic-gate 
1147*0Sstevel@tonic-gate 	/*
1148*0Sstevel@tonic-gate 	 * First try to find pages in segment page cache, without
1149*0Sstevel@tonic-gate 	 * holding the segment lock.
1150*0Sstevel@tonic-gate 	 */
1151*0Sstevel@tonic-gate 	pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size,
1152*0Sstevel@tonic-gate 	    sptd->spt_prot);
1153*0Sstevel@tonic-gate 	if (pplist != NULL) {
1154*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa == pplist);
1155*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa[page_index]);
1156*0Sstevel@tonic-gate 		/*
1157*0Sstevel@tonic-gate 		 * Since we cache the entire ISM segment, we want to
1158*0Sstevel@tonic-gate 		 * set ppp to point to the first slot that corresponds
1159*0Sstevel@tonic-gate 		 * to the requested addr, i.e. page_index.
1160*0Sstevel@tonic-gate 		 */
1161*0Sstevel@tonic-gate 		*ppp = &(sptd->spt_ppa[page_index]);
1162*0Sstevel@tonic-gate 		return (0);
1163*0Sstevel@tonic-gate 	}
1164*0Sstevel@tonic-gate 
1165*0Sstevel@tonic-gate 	/* The L_PAGELOCK case... */
1166*0Sstevel@tonic-gate 	mutex_enter(&sptd->spt_lock);
1167*0Sstevel@tonic-gate 
1168*0Sstevel@tonic-gate 	/*
1169*0Sstevel@tonic-gate 	 * try to find pages in segment page cache
1170*0Sstevel@tonic-gate 	 */
1171*0Sstevel@tonic-gate 	pplist = seg_plookup(seg, seg->s_base, sptd->spt_amp->size,
1172*0Sstevel@tonic-gate 	    sptd->spt_prot);
1173*0Sstevel@tonic-gate 	if (pplist != NULL) {
1174*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa == pplist);
1175*0Sstevel@tonic-gate 		/*
1176*0Sstevel@tonic-gate 		 * Since we cache the entire segment, we want to
1177*0Sstevel@tonic-gate 		 * set ppp to point to the first slot that corresponds
1178*0Sstevel@tonic-gate 		 * to the requested addr, i.e. page_index.
1179*0Sstevel@tonic-gate 		 */
1180*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
1181*0Sstevel@tonic-gate 		*ppp = &(sptd->spt_ppa[page_index]);
1182*0Sstevel@tonic-gate 		return (0);
1183*0Sstevel@tonic-gate 	}
1184*0Sstevel@tonic-gate 
1185*0Sstevel@tonic-gate 	if (seg_pinsert_check(seg, sptd->spt_amp->size, SEGP_FORCE_WIRED) ==
1186*0Sstevel@tonic-gate 	    SEGP_FAIL) {
1187*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
1188*0Sstevel@tonic-gate 		*ppp = NULL;
1189*0Sstevel@tonic-gate 		return (ENOTSUP);
1190*0Sstevel@tonic-gate 	}
1191*0Sstevel@tonic-gate 
1192*0Sstevel@tonic-gate 	/*
1193*0Sstevel@tonic-gate 	 * No need to worry about protections because ISM pages
1194*0Sstevel@tonic-gate 	 * are always rw.
1195*0Sstevel@tonic-gate 	 */
1196*0Sstevel@tonic-gate 	pl = pplist = NULL;
1197*0Sstevel@tonic-gate 
1198*0Sstevel@tonic-gate 	/*
1199*0Sstevel@tonic-gate 	 * Do we need to build the ppa array?
1200*0Sstevel@tonic-gate 	 */
1201*0Sstevel@tonic-gate 	if (sptd->spt_ppa == NULL) {
1202*0Sstevel@tonic-gate 		ASSERT(sptd->spt_ppa == pplist);
1203*0Sstevel@tonic-gate 
1204*0Sstevel@tonic-gate 		spt_base = sptseg->s_base;
1205*0Sstevel@tonic-gate 		pl_built = 1;
1206*0Sstevel@tonic-gate 
1207*0Sstevel@tonic-gate 		/*
1208*0Sstevel@tonic-gate 		 * availrmem is decremented once during anon_swap_adjust()
1209*0Sstevel@tonic-gate 		 * and is incremented during the anon_unresv(), which is
1210*0Sstevel@tonic-gate 		 * called from shm_rm_amp() when the segment is destroyed.
1211*0Sstevel@tonic-gate 		 */
1212*0Sstevel@tonic-gate 		amp = sptd->spt_amp;
1213*0Sstevel@tonic-gate 		ASSERT(amp != NULL);
1214*0Sstevel@tonic-gate 
1215*0Sstevel@tonic-gate 		/* pcachecnt is protected by sptd->spt_lock */
1216*0Sstevel@tonic-gate 		ASSERT(sptd->spt_pcachecnt == 0);
1217*0Sstevel@tonic-gate 		pplist = kmem_zalloc(sizeof (page_t *)
1218*0Sstevel@tonic-gate 		    * btopr(sptd->spt_amp->size), KM_SLEEP);
1219*0Sstevel@tonic-gate 		pl = pplist;
1220*0Sstevel@tonic-gate 
1221*0Sstevel@tonic-gate 		anon_index = seg_page(sptseg, spt_base);
1222*0Sstevel@tonic-gate 
1223*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
1224*0Sstevel@tonic-gate 		for (a = spt_base; a < (spt_base + sptd->spt_amp->size);
1225*0Sstevel@tonic-gate 		    a += PAGESIZE, anon_index++, pplist++) {
1226*0Sstevel@tonic-gate 			ap = anon_get_ptr(amp->ahp, anon_index);
1227*0Sstevel@tonic-gate 			ASSERT(ap != NULL);
1228*0Sstevel@tonic-gate 			swap_xlate(ap, &vp, &off);
1229*0Sstevel@tonic-gate 			pp = page_lookup(vp, off, SE_SHARED);
1230*0Sstevel@tonic-gate 			ASSERT(pp != NULL);
1231*0Sstevel@tonic-gate 			*pplist = pp;
1232*0Sstevel@tonic-gate 		}
1233*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
1234*0Sstevel@tonic-gate 
1235*0Sstevel@tonic-gate 		if (a < (spt_base + sptd->spt_amp->size)) {
1236*0Sstevel@tonic-gate 			ret = ENOTSUP;
1237*0Sstevel@tonic-gate 			goto insert_fail;
1238*0Sstevel@tonic-gate 		}
1239*0Sstevel@tonic-gate 		sptd->spt_ppa = pl;
1240*0Sstevel@tonic-gate 	} else {
1241*0Sstevel@tonic-gate 		/*
1242*0Sstevel@tonic-gate 		 * We already have a valid ppa[].
1243*0Sstevel@tonic-gate 		 */
1244*0Sstevel@tonic-gate 		pl = sptd->spt_ppa;
1245*0Sstevel@tonic-gate 	}
1246*0Sstevel@tonic-gate 
1247*0Sstevel@tonic-gate 	ASSERT(pl != NULL);
1248*0Sstevel@tonic-gate 
1249*0Sstevel@tonic-gate 	ret = seg_pinsert(seg, seg->s_base, sptd->spt_amp->size,
1250*0Sstevel@tonic-gate 	    pl, sptd->spt_prot, SEGP_FORCE_WIRED, segspt_reclaim);
1251*0Sstevel@tonic-gate 	if (ret == SEGP_FAIL) {
1252*0Sstevel@tonic-gate 		/*
1253*0Sstevel@tonic-gate 		 * seg_pinsert failed. We return
1254*0Sstevel@tonic-gate 		 * ENOTSUP, so that the as_pagelock() code will
1255*0Sstevel@tonic-gate 		 * then try the slower F_SOFTLOCK path.
1256*0Sstevel@tonic-gate 		 */
1257*0Sstevel@tonic-gate 		if (pl_built) {
1258*0Sstevel@tonic-gate 			/*
1259*0Sstevel@tonic-gate 			 * No one else has referenced the ppa[].
1260*0Sstevel@tonic-gate 			 * We created it and we need to destroy it.
1261*0Sstevel@tonic-gate 			 */
1262*0Sstevel@tonic-gate 			sptd->spt_ppa = NULL;
1263*0Sstevel@tonic-gate 		}
1264*0Sstevel@tonic-gate 		ret = ENOTSUP;
1265*0Sstevel@tonic-gate 		goto insert_fail;
1266*0Sstevel@tonic-gate 	}
1267*0Sstevel@tonic-gate 
1268*0Sstevel@tonic-gate 	/*
1269*0Sstevel@tonic-gate 	 * In either case, we increment softlockcnt on the 'real' segment.
1270*0Sstevel@tonic-gate 	 */
1271*0Sstevel@tonic-gate 	sptd->spt_pcachecnt++;
1272*0Sstevel@tonic-gate 	atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), 1);
1273*0Sstevel@tonic-gate 
1274*0Sstevel@tonic-gate 	/*
1275*0Sstevel@tonic-gate 	 * We can now drop the sptd->spt_lock since the ppa[]
1276*0Sstevel@tonic-gate 	 * exists and he have incremented pacachecnt.
1277*0Sstevel@tonic-gate 	 */
1278*0Sstevel@tonic-gate 	mutex_exit(&sptd->spt_lock);
1279*0Sstevel@tonic-gate 
1280*0Sstevel@tonic-gate 	/*
1281*0Sstevel@tonic-gate 	 * Since we cache the entire segment, we want to
1282*0Sstevel@tonic-gate 	 * set ppp to point to the first slot that corresponds
1283*0Sstevel@tonic-gate 	 * to the requested addr, i.e. page_index.
1284*0Sstevel@tonic-gate 	 */
1285*0Sstevel@tonic-gate 	*ppp = &(sptd->spt_ppa[page_index]);
1286*0Sstevel@tonic-gate 	return (ret);
1287*0Sstevel@tonic-gate 
1288*0Sstevel@tonic-gate insert_fail:
1289*0Sstevel@tonic-gate 	/*
1290*0Sstevel@tonic-gate 	 * We will only reach this code if we tried and failed.
1291*0Sstevel@tonic-gate 	 *
1292*0Sstevel@tonic-gate 	 * And we can drop the lock on the dummy seg, once we've failed
1293*0Sstevel@tonic-gate 	 * to set up a new ppa[].
1294*0Sstevel@tonic-gate 	 */
1295*0Sstevel@tonic-gate 	mutex_exit(&sptd->spt_lock);
1296*0Sstevel@tonic-gate 
1297*0Sstevel@tonic-gate 	if (pl_built) {
1298*0Sstevel@tonic-gate 		/*
1299*0Sstevel@tonic-gate 		 * We created pl and we need to destroy it.
1300*0Sstevel@tonic-gate 		 */
1301*0Sstevel@tonic-gate 		pplist = pl;
1302*0Sstevel@tonic-gate 		np = (((uintptr_t)(a - spt_base)) >> PAGESHIFT);
1303*0Sstevel@tonic-gate 		while (np) {
1304*0Sstevel@tonic-gate 			page_unlock(*pplist);
1305*0Sstevel@tonic-gate 			np--;
1306*0Sstevel@tonic-gate 			pplist++;
1307*0Sstevel@tonic-gate 		}
1308*0Sstevel@tonic-gate 		kmem_free(pl, sizeof (page_t *) *
1309*0Sstevel@tonic-gate 				btopr(sptd->spt_amp->size));
1310*0Sstevel@tonic-gate 	}
1311*0Sstevel@tonic-gate 	if (shmd->shm_softlockcnt <= 0) {
1312*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as)) {
1313*0Sstevel@tonic-gate 			mutex_enter(&seg->s_as->a_contents);
1314*0Sstevel@tonic-gate 			if (AS_ISUNMAPWAIT(seg->s_as)) {
1315*0Sstevel@tonic-gate 				AS_CLRUNMAPWAIT(seg->s_as);
1316*0Sstevel@tonic-gate 				cv_broadcast(&seg->s_as->a_cv);
1317*0Sstevel@tonic-gate 			}
1318*0Sstevel@tonic-gate 			mutex_exit(&seg->s_as->a_contents);
1319*0Sstevel@tonic-gate 		}
1320*0Sstevel@tonic-gate 	}
1321*0Sstevel@tonic-gate 	*ppp = NULL;
1322*0Sstevel@tonic-gate 	return (ret);
1323*0Sstevel@tonic-gate }
1324*0Sstevel@tonic-gate 
1325*0Sstevel@tonic-gate /*
1326*0Sstevel@tonic-gate  * purge any cached pages in the I/O page cache
1327*0Sstevel@tonic-gate  */
1328*0Sstevel@tonic-gate static void
1329*0Sstevel@tonic-gate segspt_purge(struct seg *seg)
1330*0Sstevel@tonic-gate {
1331*0Sstevel@tonic-gate 	seg_ppurge(seg);
1332*0Sstevel@tonic-gate }
1333*0Sstevel@tonic-gate 
1334*0Sstevel@tonic-gate static int
1335*0Sstevel@tonic-gate segspt_reclaim(struct seg *seg, caddr_t addr, size_t len, struct page **pplist,
1336*0Sstevel@tonic-gate 	enum seg_rw rw)
1337*0Sstevel@tonic-gate {
1338*0Sstevel@tonic-gate 	struct	shm_data *shmd = (struct shm_data *)seg->s_data;
1339*0Sstevel@tonic-gate 	struct	seg	*sptseg;
1340*0Sstevel@tonic-gate 	struct	spt_data *sptd;
1341*0Sstevel@tonic-gate 	pgcnt_t npages, i, free_availrmem = 0;
1342*0Sstevel@tonic-gate 	int	done = 0;
1343*0Sstevel@tonic-gate 
1344*0Sstevel@tonic-gate #ifdef lint
1345*0Sstevel@tonic-gate 	addr = addr;
1346*0Sstevel@tonic-gate #endif
1347*0Sstevel@tonic-gate 	sptseg = shmd->shm_sptseg;
1348*0Sstevel@tonic-gate 	sptd = sptseg->s_data;
1349*0Sstevel@tonic-gate 	npages = (len >> PAGESHIFT);
1350*0Sstevel@tonic-gate 	ASSERT(npages);
1351*0Sstevel@tonic-gate 	ASSERT(sptd->spt_pcachecnt != 0);
1352*0Sstevel@tonic-gate 	ASSERT(sptd->spt_ppa == pplist);
1353*0Sstevel@tonic-gate 	ASSERT(npages == btopr(sptd->spt_amp->size));
1354*0Sstevel@tonic-gate 
1355*0Sstevel@tonic-gate 	/*
1356*0Sstevel@tonic-gate 	 * Acquire the lock on the dummy seg and destroy the
1357*0Sstevel@tonic-gate 	 * ppa array IF this is the last pcachecnt.
1358*0Sstevel@tonic-gate 	 */
1359*0Sstevel@tonic-gate 	mutex_enter(&sptd->spt_lock);
1360*0Sstevel@tonic-gate 	if (--sptd->spt_pcachecnt == 0) {
1361*0Sstevel@tonic-gate 		for (i = 0; i < npages; i++) {
1362*0Sstevel@tonic-gate 			if (pplist[i] == NULL) {
1363*0Sstevel@tonic-gate 				continue;
1364*0Sstevel@tonic-gate 			}
1365*0Sstevel@tonic-gate 			if (rw == S_WRITE) {
1366*0Sstevel@tonic-gate 				hat_setrefmod(pplist[i]);
1367*0Sstevel@tonic-gate 			} else {
1368*0Sstevel@tonic-gate 				hat_setref(pplist[i]);
1369*0Sstevel@tonic-gate 			}
1370*0Sstevel@tonic-gate 			if ((sptd->spt_flags & SHM_PAGEABLE) &&
1371*0Sstevel@tonic-gate 				(sptd->spt_ppa_lckcnt[i] == 0))
1372*0Sstevel@tonic-gate 				free_availrmem++;
1373*0Sstevel@tonic-gate 			page_unlock(pplist[i]);
1374*0Sstevel@tonic-gate 		}
1375*0Sstevel@tonic-gate 		if (sptd->spt_flags & SHM_PAGEABLE) {
1376*0Sstevel@tonic-gate 			mutex_enter(&freemem_lock);
1377*0Sstevel@tonic-gate 			availrmem += free_availrmem;
1378*0Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
1379*0Sstevel@tonic-gate 		}
1380*0Sstevel@tonic-gate 		/*
1381*0Sstevel@tonic-gate 		 * Since we want to cach/uncache the entire ISM segment,
1382*0Sstevel@tonic-gate 		 * we will track the pplist in a segspt specific field
1383*0Sstevel@tonic-gate 		 * ppa, that is initialized at the time we add an entry to
1384*0Sstevel@tonic-gate 		 * the cache.
1385*0Sstevel@tonic-gate 		 */
1386*0Sstevel@tonic-gate 		ASSERT(sptd->spt_pcachecnt == 0);
1387*0Sstevel@tonic-gate 		kmem_free(pplist, sizeof (page_t *) * npages);
1388*0Sstevel@tonic-gate 		sptd->spt_ppa = NULL;
1389*0Sstevel@tonic-gate 		sptd->spt_flags &= ~DISM_PPA_CHANGED;
1390*0Sstevel@tonic-gate 		done = 1;
1391*0Sstevel@tonic-gate 	}
1392*0Sstevel@tonic-gate 	mutex_exit(&sptd->spt_lock);
1393*0Sstevel@tonic-gate 	/*
1394*0Sstevel@tonic-gate 	 * Now decrement softlockcnt.
1395*0Sstevel@tonic-gate 	 */
1396*0Sstevel@tonic-gate 	atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -1);
1397*0Sstevel@tonic-gate 
1398*0Sstevel@tonic-gate 	if (shmd->shm_softlockcnt <= 0) {
1399*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as)) {
1400*0Sstevel@tonic-gate 			mutex_enter(&seg->s_as->a_contents);
1401*0Sstevel@tonic-gate 			if (AS_ISUNMAPWAIT(seg->s_as)) {
1402*0Sstevel@tonic-gate 				AS_CLRUNMAPWAIT(seg->s_as);
1403*0Sstevel@tonic-gate 				cv_broadcast(&seg->s_as->a_cv);
1404*0Sstevel@tonic-gate 			}
1405*0Sstevel@tonic-gate 			mutex_exit(&seg->s_as->a_contents);
1406*0Sstevel@tonic-gate 		}
1407*0Sstevel@tonic-gate 	}
1408*0Sstevel@tonic-gate 	return (done);
1409*0Sstevel@tonic-gate }
1410*0Sstevel@tonic-gate 
1411*0Sstevel@tonic-gate /*
1412*0Sstevel@tonic-gate  * Do a F_SOFTUNLOCK call over the range requested.
1413*0Sstevel@tonic-gate  * The range must have already been F_SOFTLOCK'ed.
1414*0Sstevel@tonic-gate  *
1415*0Sstevel@tonic-gate  * The calls to acquire and release the anon map lock mutex were
1416*0Sstevel@tonic-gate  * removed in order to avoid a deadly embrace during a DR
1417*0Sstevel@tonic-gate  * memory delete operation.  (Eg. DR blocks while waiting for a
1418*0Sstevel@tonic-gate  * exclusive lock on a page that is being used for kaio; the
1419*0Sstevel@tonic-gate  * thread that will complete the kaio and call segspt_softunlock
1420*0Sstevel@tonic-gate  * blocks on the anon map lock; another thread holding the anon
1421*0Sstevel@tonic-gate  * map lock blocks on another page lock via the segspt_shmfault
1422*0Sstevel@tonic-gate  * -> page_lookup -> page_lookup_create -> page_lock_es code flow.)
1423*0Sstevel@tonic-gate  *
1424*0Sstevel@tonic-gate  * The appropriateness of the removal is based upon the following:
1425*0Sstevel@tonic-gate  * 1. If we are holding a segment's reader lock and the page is held
1426*0Sstevel@tonic-gate  * shared, then the corresponding element in anonmap which points to
1427*0Sstevel@tonic-gate  * anon struct cannot change and there is no need to acquire the
1428*0Sstevel@tonic-gate  * anonymous map lock.
1429*0Sstevel@tonic-gate  * 2. Threads in segspt_softunlock have a reader lock on the segment
1430*0Sstevel@tonic-gate  * and already have the shared page lock, so we are guaranteed that
1431*0Sstevel@tonic-gate  * the anon map slot cannot change and therefore can call anon_get_ptr()
1432*0Sstevel@tonic-gate  * without grabbing the anonymous map lock.
1433*0Sstevel@tonic-gate  * 3. Threads that softlock a shared page break copy-on-write, even if
1434*0Sstevel@tonic-gate  * its a read.  Thus cow faults can be ignored with respect to soft
1435*0Sstevel@tonic-gate  * unlocking, since the breaking of cow means that the anon slot(s) will
1436*0Sstevel@tonic-gate  * not be shared.
1437*0Sstevel@tonic-gate  */
1438*0Sstevel@tonic-gate static void
1439*0Sstevel@tonic-gate segspt_softunlock(struct seg *seg, caddr_t sptseg_addr,
1440*0Sstevel@tonic-gate 	size_t len, enum seg_rw rw)
1441*0Sstevel@tonic-gate {
1442*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
1443*0Sstevel@tonic-gate 	struct seg	*sptseg;
1444*0Sstevel@tonic-gate 	struct spt_data *sptd;
1445*0Sstevel@tonic-gate 	page_t *pp;
1446*0Sstevel@tonic-gate 	caddr_t adr;
1447*0Sstevel@tonic-gate 	struct vnode *vp;
1448*0Sstevel@tonic-gate 	u_offset_t offset;
1449*0Sstevel@tonic-gate 	ulong_t anon_index;
1450*0Sstevel@tonic-gate 	struct anon_map *amp;		/* XXX - for locknest */
1451*0Sstevel@tonic-gate 	struct anon *ap = NULL;
1452*0Sstevel@tonic-gate 	pgcnt_t npages;
1453*0Sstevel@tonic-gate 
1454*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1455*0Sstevel@tonic-gate 
1456*0Sstevel@tonic-gate 	sptseg = shmd->shm_sptseg;
1457*0Sstevel@tonic-gate 	sptd = sptseg->s_data;
1458*0Sstevel@tonic-gate 
1459*0Sstevel@tonic-gate 	/*
1460*0Sstevel@tonic-gate 	 * Some platforms assume that ISM mappings are HAT_LOAD_LOCK
1461*0Sstevel@tonic-gate 	 * and therefore their pages are SE_SHARED locked
1462*0Sstevel@tonic-gate 	 * for the entire life of the segment.
1463*0Sstevel@tonic-gate 	 */
1464*0Sstevel@tonic-gate 	if ((!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) &&
1465*0Sstevel@tonic-gate 		((sptd->spt_flags & SHM_PAGEABLE) == 0)) {
1466*0Sstevel@tonic-gate 		goto softlock_decrement;
1467*0Sstevel@tonic-gate 	}
1468*0Sstevel@tonic-gate 
1469*0Sstevel@tonic-gate 	/*
1470*0Sstevel@tonic-gate 	 * Any thread is free to do a page_find and
1471*0Sstevel@tonic-gate 	 * page_unlock() on the pages within this seg.
1472*0Sstevel@tonic-gate 	 *
1473*0Sstevel@tonic-gate 	 * We are already holding the as->a_lock on the user's
1474*0Sstevel@tonic-gate 	 * real segment, but we need to hold the a_lock on the
1475*0Sstevel@tonic-gate 	 * underlying dummy as. This is mostly to satisfy the
1476*0Sstevel@tonic-gate 	 * underlying HAT layer.
1477*0Sstevel@tonic-gate 	 */
1478*0Sstevel@tonic-gate 	AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1479*0Sstevel@tonic-gate 	hat_unlock(sptseg->s_as->a_hat, sptseg_addr, len);
1480*0Sstevel@tonic-gate 	AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1481*0Sstevel@tonic-gate 
1482*0Sstevel@tonic-gate 	amp = sptd->spt_amp;
1483*0Sstevel@tonic-gate 	ASSERT(amp != NULL);
1484*0Sstevel@tonic-gate 	anon_index = seg_page(sptseg, sptseg_addr);
1485*0Sstevel@tonic-gate 
1486*0Sstevel@tonic-gate 	for (adr = sptseg_addr; adr < sptseg_addr + len; adr += PAGESIZE) {
1487*0Sstevel@tonic-gate 		ap = anon_get_ptr(amp->ahp, anon_index++);
1488*0Sstevel@tonic-gate 		ASSERT(ap != NULL);
1489*0Sstevel@tonic-gate 		swap_xlate(ap, &vp, &offset);
1490*0Sstevel@tonic-gate 
1491*0Sstevel@tonic-gate 		/*
1492*0Sstevel@tonic-gate 		 * Use page_find() instead of page_lookup() to
1493*0Sstevel@tonic-gate 		 * find the page since we know that it has a
1494*0Sstevel@tonic-gate 		 * "shared" lock.
1495*0Sstevel@tonic-gate 		 */
1496*0Sstevel@tonic-gate 		pp = page_find(vp, offset);
1497*0Sstevel@tonic-gate 		ASSERT(ap == anon_get_ptr(amp->ahp, anon_index - 1));
1498*0Sstevel@tonic-gate 		if (pp == NULL) {
1499*0Sstevel@tonic-gate 			panic("segspt_softunlock: "
1500*0Sstevel@tonic-gate 			    "addr %p, ap %p, vp %p, off %llx",
1501*0Sstevel@tonic-gate 			    (void *)adr, (void *)ap, (void *)vp, offset);
1502*0Sstevel@tonic-gate 			/*NOTREACHED*/
1503*0Sstevel@tonic-gate 		}
1504*0Sstevel@tonic-gate 
1505*0Sstevel@tonic-gate 		if (rw == S_WRITE) {
1506*0Sstevel@tonic-gate 			hat_setrefmod(pp);
1507*0Sstevel@tonic-gate 		} else if (rw != S_OTHER) {
1508*0Sstevel@tonic-gate 			hat_setref(pp);
1509*0Sstevel@tonic-gate 		}
1510*0Sstevel@tonic-gate 		page_unlock(pp);
1511*0Sstevel@tonic-gate 	}
1512*0Sstevel@tonic-gate 
1513*0Sstevel@tonic-gate softlock_decrement:
1514*0Sstevel@tonic-gate 	npages = btopr(len);
1515*0Sstevel@tonic-gate 	atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), -npages);
1516*0Sstevel@tonic-gate 	if (shmd->shm_softlockcnt == 0) {
1517*0Sstevel@tonic-gate 		/*
1518*0Sstevel@tonic-gate 		 * All SOFTLOCKS are gone. Wakeup any waiting
1519*0Sstevel@tonic-gate 		 * unmappers so they can try again to unmap.
1520*0Sstevel@tonic-gate 		 * Check for waiters first without the mutex
1521*0Sstevel@tonic-gate 		 * held so we don't always grab the mutex on
1522*0Sstevel@tonic-gate 		 * softunlocks.
1523*0Sstevel@tonic-gate 		 */
1524*0Sstevel@tonic-gate 		if (AS_ISUNMAPWAIT(seg->s_as)) {
1525*0Sstevel@tonic-gate 			mutex_enter(&seg->s_as->a_contents);
1526*0Sstevel@tonic-gate 			if (AS_ISUNMAPWAIT(seg->s_as)) {
1527*0Sstevel@tonic-gate 				AS_CLRUNMAPWAIT(seg->s_as);
1528*0Sstevel@tonic-gate 				cv_broadcast(&seg->s_as->a_cv);
1529*0Sstevel@tonic-gate 			}
1530*0Sstevel@tonic-gate 			mutex_exit(&seg->s_as->a_contents);
1531*0Sstevel@tonic-gate 		}
1532*0Sstevel@tonic-gate 	}
1533*0Sstevel@tonic-gate }
1534*0Sstevel@tonic-gate 
1535*0Sstevel@tonic-gate int
1536*0Sstevel@tonic-gate segspt_shmattach(struct seg *seg, caddr_t *argsp)
1537*0Sstevel@tonic-gate {
1538*0Sstevel@tonic-gate 	struct shm_data *shmd_arg = (struct shm_data *)argsp;
1539*0Sstevel@tonic-gate 	struct shm_data *shmd;
1540*0Sstevel@tonic-gate 	struct anon_map *shm_amp = shmd_arg->shm_amp;
1541*0Sstevel@tonic-gate 	struct spt_data *sptd;
1542*0Sstevel@tonic-gate 	int error = 0;
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1545*0Sstevel@tonic-gate 
1546*0Sstevel@tonic-gate 	shmd = kmem_zalloc((sizeof (*shmd)), KM_NOSLEEP);
1547*0Sstevel@tonic-gate 	if (shmd == NULL)
1548*0Sstevel@tonic-gate 		return (ENOMEM);
1549*0Sstevel@tonic-gate 
1550*0Sstevel@tonic-gate 	shmd->shm_sptas = shmd_arg->shm_sptas;
1551*0Sstevel@tonic-gate 	shmd->shm_amp = shm_amp;
1552*0Sstevel@tonic-gate 	shmd->shm_sptseg = shmd_arg->shm_sptseg;
1553*0Sstevel@tonic-gate 
1554*0Sstevel@tonic-gate 	(void) lgrp_shm_policy_set(LGRP_MEM_POLICY_DEFAULT, shm_amp, 0,
1555*0Sstevel@tonic-gate 	    NULL, 0, seg->s_size);
1556*0Sstevel@tonic-gate 
1557*0Sstevel@tonic-gate 	seg->s_data = (void *)shmd;
1558*0Sstevel@tonic-gate 	seg->s_ops = &segspt_shmops;
1559*0Sstevel@tonic-gate 	seg->s_szc = shmd->shm_sptseg->s_szc;
1560*0Sstevel@tonic-gate 	sptd = shmd->shm_sptseg->s_data;
1561*0Sstevel@tonic-gate 
1562*0Sstevel@tonic-gate 	if (sptd->spt_flags & SHM_PAGEABLE) {
1563*0Sstevel@tonic-gate 		if ((shmd->shm_vpage = kmem_zalloc(btopr(shm_amp->size),
1564*0Sstevel@tonic-gate 		    KM_NOSLEEP)) == NULL) {
1565*0Sstevel@tonic-gate 			seg->s_data = (void *)NULL;
1566*0Sstevel@tonic-gate 			kmem_free(shmd, (sizeof (*shmd)));
1567*0Sstevel@tonic-gate 			return (ENOMEM);
1568*0Sstevel@tonic-gate 		}
1569*0Sstevel@tonic-gate 		shmd->shm_lckpgs = 0;
1570*0Sstevel@tonic-gate 		if (hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1571*0Sstevel@tonic-gate 			if ((error = hat_share(seg->s_as->a_hat, seg->s_base,
1572*0Sstevel@tonic-gate 			    shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1573*0Sstevel@tonic-gate 			    seg->s_size, seg->s_szc)) != 0) {
1574*0Sstevel@tonic-gate 				kmem_free(shmd->shm_vpage,
1575*0Sstevel@tonic-gate 					btopr(shm_amp->size));
1576*0Sstevel@tonic-gate 			}
1577*0Sstevel@tonic-gate 		}
1578*0Sstevel@tonic-gate 	} else {
1579*0Sstevel@tonic-gate 		error = hat_share(seg->s_as->a_hat, seg->s_base,
1580*0Sstevel@tonic-gate 				shmd_arg->shm_sptas->a_hat, SEGSPTADDR,
1581*0Sstevel@tonic-gate 				seg->s_size, seg->s_szc);
1582*0Sstevel@tonic-gate 	}
1583*0Sstevel@tonic-gate 	if (error) {
1584*0Sstevel@tonic-gate 		seg->s_szc = 0;
1585*0Sstevel@tonic-gate 		seg->s_data = (void *)NULL;
1586*0Sstevel@tonic-gate 		kmem_free(shmd, (sizeof (*shmd)));
1587*0Sstevel@tonic-gate 	} else {
1588*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1589*0Sstevel@tonic-gate 		shm_amp->refcnt++;
1590*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1591*0Sstevel@tonic-gate 	}
1592*0Sstevel@tonic-gate 	return (error);
1593*0Sstevel@tonic-gate }
1594*0Sstevel@tonic-gate 
1595*0Sstevel@tonic-gate int
1596*0Sstevel@tonic-gate segspt_shmunmap(struct seg *seg, caddr_t raddr, size_t ssize)
1597*0Sstevel@tonic-gate {
1598*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
1599*0Sstevel@tonic-gate 	int reclaim = 1;
1600*0Sstevel@tonic-gate 
1601*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1602*0Sstevel@tonic-gate retry:
1603*0Sstevel@tonic-gate 	if (shmd->shm_softlockcnt > 0) {
1604*0Sstevel@tonic-gate 		if (reclaim == 1) {
1605*0Sstevel@tonic-gate 			segspt_purge(seg);
1606*0Sstevel@tonic-gate 			reclaim = 0;
1607*0Sstevel@tonic-gate 			goto retry;
1608*0Sstevel@tonic-gate 		}
1609*0Sstevel@tonic-gate 		return (EAGAIN);
1610*0Sstevel@tonic-gate 	}
1611*0Sstevel@tonic-gate 
1612*0Sstevel@tonic-gate 	if (ssize != seg->s_size) {
1613*0Sstevel@tonic-gate #ifdef DEBUG
1614*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "Incompatible ssize %lx s_size %lx\n",
1615*0Sstevel@tonic-gate 		    ssize, seg->s_size);
1616*0Sstevel@tonic-gate #endif
1617*0Sstevel@tonic-gate 		return (EINVAL);
1618*0Sstevel@tonic-gate 	}
1619*0Sstevel@tonic-gate 
1620*0Sstevel@tonic-gate 	(void) segspt_shmlockop(seg, raddr, shmd->shm_amp->size, 0, MC_UNLOCK,
1621*0Sstevel@tonic-gate 	    NULL, 0);
1622*0Sstevel@tonic-gate 	hat_unshare(seg->s_as->a_hat, raddr, ssize, seg->s_szc);
1623*0Sstevel@tonic-gate 
1624*0Sstevel@tonic-gate 	seg_free(seg);
1625*0Sstevel@tonic-gate 
1626*0Sstevel@tonic-gate 	return (0);
1627*0Sstevel@tonic-gate }
1628*0Sstevel@tonic-gate 
1629*0Sstevel@tonic-gate void
1630*0Sstevel@tonic-gate segspt_shmfree(struct seg *seg)
1631*0Sstevel@tonic-gate {
1632*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
1633*0Sstevel@tonic-gate 	struct anon_map *shm_amp = shmd->shm_amp;
1634*0Sstevel@tonic-gate 
1635*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1636*0Sstevel@tonic-gate 
1637*0Sstevel@tonic-gate 	(void) segspt_shmlockop(seg, seg->s_base, shm_amp->size, 0,
1638*0Sstevel@tonic-gate 		MC_UNLOCK, NULL, 0);
1639*0Sstevel@tonic-gate 
1640*0Sstevel@tonic-gate 	/*
1641*0Sstevel@tonic-gate 	 * Need to increment refcnt when attaching
1642*0Sstevel@tonic-gate 	 * and decrement when detaching because of dup().
1643*0Sstevel@tonic-gate 	 */
1644*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&shm_amp->a_rwlock, RW_WRITER);
1645*0Sstevel@tonic-gate 	shm_amp->refcnt--;
1646*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&shm_amp->a_rwlock);
1647*0Sstevel@tonic-gate 
1648*0Sstevel@tonic-gate 	if (shmd->shm_vpage) {	/* only for DISM */
1649*0Sstevel@tonic-gate 		kmem_free(shmd->shm_vpage, btopr(shm_amp->size));
1650*0Sstevel@tonic-gate 		shmd->shm_vpage = NULL;
1651*0Sstevel@tonic-gate 	}
1652*0Sstevel@tonic-gate 	kmem_free(shmd, sizeof (*shmd));
1653*0Sstevel@tonic-gate }
1654*0Sstevel@tonic-gate 
1655*0Sstevel@tonic-gate /*ARGSUSED*/
1656*0Sstevel@tonic-gate int
1657*0Sstevel@tonic-gate segspt_shmsetprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
1658*0Sstevel@tonic-gate {
1659*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1660*0Sstevel@tonic-gate 
1661*0Sstevel@tonic-gate 	/*
1662*0Sstevel@tonic-gate 	 * Shared page table is more than shared mapping.
1663*0Sstevel@tonic-gate 	 *  Individual process sharing page tables can't change prot
1664*0Sstevel@tonic-gate 	 *  because there is only one set of page tables.
1665*0Sstevel@tonic-gate 	 *  This will be allowed after private page table is
1666*0Sstevel@tonic-gate 	 *  supported.
1667*0Sstevel@tonic-gate 	 */
1668*0Sstevel@tonic-gate /* need to return correct status error? */
1669*0Sstevel@tonic-gate 	return (0);
1670*0Sstevel@tonic-gate }
1671*0Sstevel@tonic-gate 
1672*0Sstevel@tonic-gate 
1673*0Sstevel@tonic-gate faultcode_t
1674*0Sstevel@tonic-gate segspt_dismfault(struct hat *hat, struct seg *seg, caddr_t addr,
1675*0Sstevel@tonic-gate     size_t len, enum fault_type type, enum seg_rw rw)
1676*0Sstevel@tonic-gate {
1677*0Sstevel@tonic-gate 	struct  shm_data 	*shmd = (struct shm_data *)seg->s_data;
1678*0Sstevel@tonic-gate 	struct  seg		*sptseg = shmd->shm_sptseg;
1679*0Sstevel@tonic-gate 	struct  as		*curspt = shmd->shm_sptas;
1680*0Sstevel@tonic-gate 	struct  spt_data 	*sptd = sptseg->s_data;
1681*0Sstevel@tonic-gate 	pgcnt_t npages;
1682*0Sstevel@tonic-gate 	size_t  share_sz, size;
1683*0Sstevel@tonic-gate 	caddr_t segspt_addr, shm_addr;
1684*0Sstevel@tonic-gate 	page_t  **ppa;
1685*0Sstevel@tonic-gate 	int	i;
1686*0Sstevel@tonic-gate 	ulong_t an_idx = 0;
1687*0Sstevel@tonic-gate 	int	err = 0;
1688*0Sstevel@tonic-gate 
1689*0Sstevel@tonic-gate #ifdef lint
1690*0Sstevel@tonic-gate 	hat = hat;
1691*0Sstevel@tonic-gate #endif
1692*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1693*0Sstevel@tonic-gate 
1694*0Sstevel@tonic-gate 	/*
1695*0Sstevel@tonic-gate 	 * Because of the way spt is implemented
1696*0Sstevel@tonic-gate 	 * the realsize of the segment does not have to be
1697*0Sstevel@tonic-gate 	 * equal to the segment size itself. The segment size is
1698*0Sstevel@tonic-gate 	 * often in multiples of a page size larger than PAGESIZE.
1699*0Sstevel@tonic-gate 	 * The realsize is rounded up to the nearest PAGESIZE
1700*0Sstevel@tonic-gate 	 * based on what the user requested. This is a bit of
1701*0Sstevel@tonic-gate 	 * ungliness that is historical but not easily fixed
1702*0Sstevel@tonic-gate 	 * without re-designing the higher levels of ISM.
1703*0Sstevel@tonic-gate 	 */
1704*0Sstevel@tonic-gate 	ASSERT(addr >= seg->s_base);
1705*0Sstevel@tonic-gate 	if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1706*0Sstevel@tonic-gate 		return (FC_NOMAP);
1707*0Sstevel@tonic-gate 	/*
1708*0Sstevel@tonic-gate 	 * For all of the following cases except F_PROT, we need to
1709*0Sstevel@tonic-gate 	 * make any necessary adjustments to addr and len
1710*0Sstevel@tonic-gate 	 * and get all of the necessary page_t's into an array called ppa[].
1711*0Sstevel@tonic-gate 	 *
1712*0Sstevel@tonic-gate 	 * The code in shmat() forces base addr and len of ISM segment
1713*0Sstevel@tonic-gate 	 * to be aligned to largest page size supported. Therefore,
1714*0Sstevel@tonic-gate 	 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
1715*0Sstevel@tonic-gate 	 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
1716*0Sstevel@tonic-gate 	 * in large pagesize chunks, or else we will screw up the HAT
1717*0Sstevel@tonic-gate 	 * layer by calling hat_memload_array() with differing page sizes
1718*0Sstevel@tonic-gate 	 * over a given virtual range.
1719*0Sstevel@tonic-gate 	 */
1720*0Sstevel@tonic-gate 	share_sz = page_get_pagesize(sptseg->s_szc);
1721*0Sstevel@tonic-gate 	shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
1722*0Sstevel@tonic-gate 	size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_sz);
1723*0Sstevel@tonic-gate 	npages = btopr(size);
1724*0Sstevel@tonic-gate 
1725*0Sstevel@tonic-gate 	/*
1726*0Sstevel@tonic-gate 	 * Now we need to convert from addr in segshm to addr in segspt.
1727*0Sstevel@tonic-gate 	 */
1728*0Sstevel@tonic-gate 	an_idx = seg_page(seg, shm_addr);
1729*0Sstevel@tonic-gate 	segspt_addr = sptseg->s_base + ptob(an_idx);
1730*0Sstevel@tonic-gate 
1731*0Sstevel@tonic-gate 	ASSERT((segspt_addr + ptob(npages)) <=
1732*0Sstevel@tonic-gate 		(sptseg->s_base + sptd->spt_realsize));
1733*0Sstevel@tonic-gate 	ASSERT(segspt_addr < (sptseg->s_base + sptseg->s_size));
1734*0Sstevel@tonic-gate 
1735*0Sstevel@tonic-gate 	switch (type) {
1736*0Sstevel@tonic-gate 
1737*0Sstevel@tonic-gate 	case F_SOFTLOCK:
1738*0Sstevel@tonic-gate 
1739*0Sstevel@tonic-gate 		mutex_enter(&freemem_lock);
1740*0Sstevel@tonic-gate 		if (availrmem < tune.t_minarmem + npages) {
1741*0Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
1742*0Sstevel@tonic-gate 			return (FC_MAKE_ERR(ENOMEM));
1743*0Sstevel@tonic-gate 		} else {
1744*0Sstevel@tonic-gate 			availrmem -= npages;
1745*0Sstevel@tonic-gate 		}
1746*0Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
1747*0Sstevel@tonic-gate 		atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
1748*0Sstevel@tonic-gate 		/*
1749*0Sstevel@tonic-gate 		 * Fall through to the F_INVAL case to load up the hat layer
1750*0Sstevel@tonic-gate 		 * entries with the HAT_LOAD_LOCK flag.
1751*0Sstevel@tonic-gate 		 */
1752*0Sstevel@tonic-gate 		/* FALLTHRU */
1753*0Sstevel@tonic-gate 	case F_INVAL:
1754*0Sstevel@tonic-gate 
1755*0Sstevel@tonic-gate 		if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1756*0Sstevel@tonic-gate 			return (FC_NOMAP);
1757*0Sstevel@tonic-gate 
1758*0Sstevel@tonic-gate 		ppa = kmem_zalloc(npages * sizeof (page_t *), KM_SLEEP);
1759*0Sstevel@tonic-gate 
1760*0Sstevel@tonic-gate 		err = spt_anon_getpages(sptseg, segspt_addr, size, ppa);
1761*0Sstevel@tonic-gate 		if (err != 0) {
1762*0Sstevel@tonic-gate 			if (type == F_SOFTLOCK) {
1763*0Sstevel@tonic-gate 				mutex_enter(&freemem_lock);
1764*0Sstevel@tonic-gate 				availrmem += npages;
1765*0Sstevel@tonic-gate 				mutex_exit(&freemem_lock);
1766*0Sstevel@tonic-gate 				atomic_add_long((ulong_t *)(
1767*0Sstevel@tonic-gate 				    &(shmd->shm_softlockcnt)), -npages);
1768*0Sstevel@tonic-gate 			}
1769*0Sstevel@tonic-gate 			goto dism_err;
1770*0Sstevel@tonic-gate 		}
1771*0Sstevel@tonic-gate 		AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
1772*0Sstevel@tonic-gate 		if (type == F_SOFTLOCK) {
1773*0Sstevel@tonic-gate 
1774*0Sstevel@tonic-gate 			/*
1775*0Sstevel@tonic-gate 			 * Load up the translation keeping it
1776*0Sstevel@tonic-gate 			 * locked and don't unlock the page.
1777*0Sstevel@tonic-gate 			 */
1778*0Sstevel@tonic-gate 			hat_memload_array(sptseg->s_as->a_hat, segspt_addr,
1779*0Sstevel@tonic-gate 			    size, ppa, sptd->spt_prot,
1780*0Sstevel@tonic-gate 			    HAT_LOAD_LOCK | HAT_LOAD_SHARE);
1781*0Sstevel@tonic-gate 		} else {
1782*0Sstevel@tonic-gate 			if (hat == seg->s_as->a_hat) {
1783*0Sstevel@tonic-gate 
1784*0Sstevel@tonic-gate 				/*
1785*0Sstevel@tonic-gate 				 * Migrate pages marked for migration
1786*0Sstevel@tonic-gate 				 */
1787*0Sstevel@tonic-gate 				if (lgrp_optimizations())
1788*0Sstevel@tonic-gate 					page_migrate(seg, shm_addr, ppa,
1789*0Sstevel@tonic-gate 					    npages);
1790*0Sstevel@tonic-gate 
1791*0Sstevel@tonic-gate 				/* CPU HAT */
1792*0Sstevel@tonic-gate 				hat_memload_array(sptseg->s_as->a_hat,
1793*0Sstevel@tonic-gate 				    segspt_addr, size, ppa, sptd->spt_prot,
1794*0Sstevel@tonic-gate 				    HAT_LOAD_SHARE);
1795*0Sstevel@tonic-gate 			} else {
1796*0Sstevel@tonic-gate 				/* XHAT. Pass real address */
1797*0Sstevel@tonic-gate 				hat_memload_array(hat, shm_addr,
1798*0Sstevel@tonic-gate 				    size, ppa, sptd->spt_prot, HAT_LOAD_SHARE);
1799*0Sstevel@tonic-gate 			}
1800*0Sstevel@tonic-gate 
1801*0Sstevel@tonic-gate 			/*
1802*0Sstevel@tonic-gate 			 * And now drop the SE_SHARED lock(s).
1803*0Sstevel@tonic-gate 			 */
1804*0Sstevel@tonic-gate 			for (i = 0; i < npages; i++)
1805*0Sstevel@tonic-gate 				page_unlock(ppa[i]);
1806*0Sstevel@tonic-gate 		}
1807*0Sstevel@tonic-gate 
1808*0Sstevel@tonic-gate 		if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1809*0Sstevel@tonic-gate 			if (hat_share(seg->s_as->a_hat, shm_addr,
1810*0Sstevel@tonic-gate 			    curspt->a_hat, segspt_addr, ptob(npages),
1811*0Sstevel@tonic-gate 			    seg->s_szc) != 0) {
1812*0Sstevel@tonic-gate 				panic("hat_share err in DISM fault");
1813*0Sstevel@tonic-gate 				/* NOTREACHED */
1814*0Sstevel@tonic-gate 			}
1815*0Sstevel@tonic-gate 		}
1816*0Sstevel@tonic-gate 		AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
1817*0Sstevel@tonic-gate dism_err:
1818*0Sstevel@tonic-gate 		kmem_free(ppa, npages * sizeof (page_t *));
1819*0Sstevel@tonic-gate 		return (err);
1820*0Sstevel@tonic-gate 
1821*0Sstevel@tonic-gate 	case F_SOFTUNLOCK:
1822*0Sstevel@tonic-gate 
1823*0Sstevel@tonic-gate 		mutex_enter(&freemem_lock);
1824*0Sstevel@tonic-gate 		availrmem += npages;
1825*0Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
1826*0Sstevel@tonic-gate 
1827*0Sstevel@tonic-gate 		/*
1828*0Sstevel@tonic-gate 		 * This is a bit ugly, we pass in the real seg pointer,
1829*0Sstevel@tonic-gate 		 * but the segspt_addr is the virtual address within the
1830*0Sstevel@tonic-gate 		 * dummy seg.
1831*0Sstevel@tonic-gate 		 */
1832*0Sstevel@tonic-gate 		segspt_softunlock(seg, segspt_addr, size, rw);
1833*0Sstevel@tonic-gate 		return (0);
1834*0Sstevel@tonic-gate 
1835*0Sstevel@tonic-gate 	case F_PROT:
1836*0Sstevel@tonic-gate 
1837*0Sstevel@tonic-gate 		/*
1838*0Sstevel@tonic-gate 		 * This takes care of the unusual case where a user
1839*0Sstevel@tonic-gate 		 * allocates a stack in shared memory and a register
1840*0Sstevel@tonic-gate 		 * window overflow is written to that stack page before
1841*0Sstevel@tonic-gate 		 * it is otherwise modified.
1842*0Sstevel@tonic-gate 		 *
1843*0Sstevel@tonic-gate 		 * We can get away with this because ISM segments are
1844*0Sstevel@tonic-gate 		 * always rw. Other than this unusual case, there
1845*0Sstevel@tonic-gate 		 * should be no instances of protection violations.
1846*0Sstevel@tonic-gate 		 */
1847*0Sstevel@tonic-gate 		return (0);
1848*0Sstevel@tonic-gate 
1849*0Sstevel@tonic-gate 	default:
1850*0Sstevel@tonic-gate #ifdef DEBUG
1851*0Sstevel@tonic-gate 		panic("segspt_dismfault default type?");
1852*0Sstevel@tonic-gate #else
1853*0Sstevel@tonic-gate 		return (FC_NOMAP);
1854*0Sstevel@tonic-gate #endif
1855*0Sstevel@tonic-gate 	}
1856*0Sstevel@tonic-gate }
1857*0Sstevel@tonic-gate 
1858*0Sstevel@tonic-gate 
1859*0Sstevel@tonic-gate faultcode_t
1860*0Sstevel@tonic-gate segspt_shmfault(struct hat *hat, struct seg *seg, caddr_t addr,
1861*0Sstevel@tonic-gate     size_t len, enum fault_type type, enum seg_rw rw)
1862*0Sstevel@tonic-gate {
1863*0Sstevel@tonic-gate 	struct shm_data 	*shmd = (struct shm_data *)seg->s_data;
1864*0Sstevel@tonic-gate 	struct seg		*sptseg = shmd->shm_sptseg;
1865*0Sstevel@tonic-gate 	struct as		*curspt = shmd->shm_sptas;
1866*0Sstevel@tonic-gate 	struct spt_data 	*sptd   = sptseg->s_data;
1867*0Sstevel@tonic-gate 	pgcnt_t npages;
1868*0Sstevel@tonic-gate 	size_t share_size, size;
1869*0Sstevel@tonic-gate 	caddr_t sptseg_addr, shm_addr;
1870*0Sstevel@tonic-gate 	page_t *pp, **ppa;
1871*0Sstevel@tonic-gate 	int	i;
1872*0Sstevel@tonic-gate 	u_offset_t offset;
1873*0Sstevel@tonic-gate 	ulong_t anon_index = 0;
1874*0Sstevel@tonic-gate 	struct vnode *vp;
1875*0Sstevel@tonic-gate 	struct anon_map *amp;		/* XXX - for locknest */
1876*0Sstevel@tonic-gate 	struct anon *ap = NULL;
1877*0Sstevel@tonic-gate 	anon_sync_obj_t cookie;
1878*0Sstevel@tonic-gate 
1879*0Sstevel@tonic-gate #ifdef lint
1880*0Sstevel@tonic-gate 	hat = hat;
1881*0Sstevel@tonic-gate #endif
1882*0Sstevel@tonic-gate 
1883*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1884*0Sstevel@tonic-gate 
1885*0Sstevel@tonic-gate 	if (sptd->spt_flags & SHM_PAGEABLE) {
1886*0Sstevel@tonic-gate 		return (segspt_dismfault(hat, seg, addr, len, type, rw));
1887*0Sstevel@tonic-gate 	}
1888*0Sstevel@tonic-gate 
1889*0Sstevel@tonic-gate 	/*
1890*0Sstevel@tonic-gate 	 * Because of the way spt is implemented
1891*0Sstevel@tonic-gate 	 * the realsize of the segment does not have to be
1892*0Sstevel@tonic-gate 	 * equal to the segment size itself. The segment size is
1893*0Sstevel@tonic-gate 	 * often in multiples of a page size larger than PAGESIZE.
1894*0Sstevel@tonic-gate 	 * The realsize is rounded up to the nearest PAGESIZE
1895*0Sstevel@tonic-gate 	 * based on what the user requested. This is a bit of
1896*0Sstevel@tonic-gate 	 * ungliness that is historical but not easily fixed
1897*0Sstevel@tonic-gate 	 * without re-designing the higher levels of ISM.
1898*0Sstevel@tonic-gate 	 */
1899*0Sstevel@tonic-gate 	ASSERT(addr >= seg->s_base);
1900*0Sstevel@tonic-gate 	if (((addr + len) - seg->s_base) > sptd->spt_realsize)
1901*0Sstevel@tonic-gate 		return (FC_NOMAP);
1902*0Sstevel@tonic-gate 	/*
1903*0Sstevel@tonic-gate 	 * For all of the following cases except F_PROT, we need to
1904*0Sstevel@tonic-gate 	 * make any necessary adjustments to addr and len
1905*0Sstevel@tonic-gate 	 * and get all of the necessary page_t's into an array called ppa[].
1906*0Sstevel@tonic-gate 	 *
1907*0Sstevel@tonic-gate 	 * The code in shmat() forces base addr and len of ISM segment
1908*0Sstevel@tonic-gate 	 * to be aligned to largest page size supported. Therefore,
1909*0Sstevel@tonic-gate 	 * we are able to handle F_SOFTLOCK and F_INVAL calls in "large
1910*0Sstevel@tonic-gate 	 * pagesize" chunks. We want to make sure that we HAT_LOAD_LOCK
1911*0Sstevel@tonic-gate 	 * in large pagesize chunks, or else we will screw up the HAT
1912*0Sstevel@tonic-gate 	 * layer by calling hat_memload_array() with differing page sizes
1913*0Sstevel@tonic-gate 	 * over a given virtual range.
1914*0Sstevel@tonic-gate 	 */
1915*0Sstevel@tonic-gate 	share_size = page_get_pagesize(sptseg->s_szc);
1916*0Sstevel@tonic-gate 	shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
1917*0Sstevel@tonic-gate 	size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)), share_size);
1918*0Sstevel@tonic-gate 	npages = btopr(size);
1919*0Sstevel@tonic-gate 
1920*0Sstevel@tonic-gate 	/*
1921*0Sstevel@tonic-gate 	 * Now we need to convert from addr in segshm to addr in segspt.
1922*0Sstevel@tonic-gate 	 */
1923*0Sstevel@tonic-gate 	anon_index = seg_page(seg, shm_addr);
1924*0Sstevel@tonic-gate 	sptseg_addr = sptseg->s_base + ptob(anon_index);
1925*0Sstevel@tonic-gate 
1926*0Sstevel@tonic-gate 	/*
1927*0Sstevel@tonic-gate 	 * And now we may have to adjust npages downward if we have
1928*0Sstevel@tonic-gate 	 * exceeded the realsize of the segment or initial anon
1929*0Sstevel@tonic-gate 	 * allocations.
1930*0Sstevel@tonic-gate 	 */
1931*0Sstevel@tonic-gate 	if ((sptseg_addr + ptob(npages)) >
1932*0Sstevel@tonic-gate 	    (sptseg->s_base + sptd->spt_realsize))
1933*0Sstevel@tonic-gate 		size = (sptseg->s_base + sptd->spt_realsize) - sptseg_addr;
1934*0Sstevel@tonic-gate 
1935*0Sstevel@tonic-gate 	npages = btopr(size);
1936*0Sstevel@tonic-gate 
1937*0Sstevel@tonic-gate 	ASSERT(sptseg_addr < (sptseg->s_base + sptseg->s_size));
1938*0Sstevel@tonic-gate 	ASSERT((sptd->spt_flags & SHM_PAGEABLE) == 0);
1939*0Sstevel@tonic-gate 
1940*0Sstevel@tonic-gate 	switch (type) {
1941*0Sstevel@tonic-gate 
1942*0Sstevel@tonic-gate 	case F_SOFTLOCK:
1943*0Sstevel@tonic-gate 
1944*0Sstevel@tonic-gate 		/*
1945*0Sstevel@tonic-gate 		 * availrmem is decremented once during anon_swap_adjust()
1946*0Sstevel@tonic-gate 		 * and is incremented during the anon_unresv(), which is
1947*0Sstevel@tonic-gate 		 * called from shm_rm_amp() when the segment is destroyed.
1948*0Sstevel@tonic-gate 		 */
1949*0Sstevel@tonic-gate 		atomic_add_long((ulong_t *)(&(shmd->shm_softlockcnt)), npages);
1950*0Sstevel@tonic-gate 		/*
1951*0Sstevel@tonic-gate 		 * Some platforms assume that ISM pages are SE_SHARED
1952*0Sstevel@tonic-gate 		 * locked for the entire life of the segment.
1953*0Sstevel@tonic-gate 		 */
1954*0Sstevel@tonic-gate 		if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0))
1955*0Sstevel@tonic-gate 			return (0);
1956*0Sstevel@tonic-gate 		/*
1957*0Sstevel@tonic-gate 		 * Fall through to the F_INVAL case to load up the hat layer
1958*0Sstevel@tonic-gate 		 * entries with the HAT_LOAD_LOCK flag.
1959*0Sstevel@tonic-gate 		 */
1960*0Sstevel@tonic-gate 
1961*0Sstevel@tonic-gate 		/* FALLTHRU */
1962*0Sstevel@tonic-gate 	case F_INVAL:
1963*0Sstevel@tonic-gate 
1964*0Sstevel@tonic-gate 		if ((rw == S_EXEC) && !(sptd->spt_prot & PROT_EXEC))
1965*0Sstevel@tonic-gate 			return (FC_NOMAP);
1966*0Sstevel@tonic-gate 
1967*0Sstevel@tonic-gate 		/*
1968*0Sstevel@tonic-gate 		 * Some platforms that do NOT support DYNAMIC_ISM_UNMAP
1969*0Sstevel@tonic-gate 		 * may still rely on this call to hat_share(). That
1970*0Sstevel@tonic-gate 		 * would imply that those hat's can fault on a
1971*0Sstevel@tonic-gate 		 * HAT_LOAD_LOCK translation, which would seem
1972*0Sstevel@tonic-gate 		 * contradictory.
1973*0Sstevel@tonic-gate 		 */
1974*0Sstevel@tonic-gate 		if (!hat_supported(HAT_DYNAMIC_ISM_UNMAP, (void *)0)) {
1975*0Sstevel@tonic-gate 			if (hat_share(seg->s_as->a_hat, seg->s_base,
1976*0Sstevel@tonic-gate 			    curspt->a_hat, sptseg->s_base,
1977*0Sstevel@tonic-gate 			    sptseg->s_size, sptseg->s_szc) != 0) {
1978*0Sstevel@tonic-gate 				panic("hat_share error in ISM fault");
1979*0Sstevel@tonic-gate 				/*NOTREACHED*/
1980*0Sstevel@tonic-gate 			}
1981*0Sstevel@tonic-gate 			return (0);
1982*0Sstevel@tonic-gate 		}
1983*0Sstevel@tonic-gate 		ppa = kmem_zalloc(sizeof (page_t *) * npages, KM_SLEEP);
1984*0Sstevel@tonic-gate 
1985*0Sstevel@tonic-gate 		/*
1986*0Sstevel@tonic-gate 		 * I see no need to lock the real seg,
1987*0Sstevel@tonic-gate 		 * here, because all of our work will be on the underlying
1988*0Sstevel@tonic-gate 		 * dummy seg.
1989*0Sstevel@tonic-gate 		 *
1990*0Sstevel@tonic-gate 		 * sptseg_addr and npages now account for large pages.
1991*0Sstevel@tonic-gate 		 */
1992*0Sstevel@tonic-gate 		amp = sptd->spt_amp;
1993*0Sstevel@tonic-gate 		ASSERT(amp != NULL);
1994*0Sstevel@tonic-gate 		anon_index = seg_page(sptseg, sptseg_addr);
1995*0Sstevel@tonic-gate 
1996*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
1997*0Sstevel@tonic-gate 		for (i = 0; i < npages; i++) {
1998*0Sstevel@tonic-gate 			anon_array_enter(amp, anon_index, &cookie);
1999*0Sstevel@tonic-gate 			ap = anon_get_ptr(amp->ahp, anon_index++);
2000*0Sstevel@tonic-gate 			ASSERT(ap != NULL);
2001*0Sstevel@tonic-gate 			swap_xlate(ap, &vp, &offset);
2002*0Sstevel@tonic-gate 			anon_array_exit(&cookie);
2003*0Sstevel@tonic-gate 			pp = page_lookup(vp, offset, SE_SHARED);
2004*0Sstevel@tonic-gate 			ASSERT(pp != NULL);
2005*0Sstevel@tonic-gate 			ppa[i] = pp;
2006*0Sstevel@tonic-gate 		}
2007*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
2008*0Sstevel@tonic-gate 		ASSERT(i == npages);
2009*0Sstevel@tonic-gate 
2010*0Sstevel@tonic-gate 		/*
2011*0Sstevel@tonic-gate 		 * We are already holding the as->a_lock on the user's
2012*0Sstevel@tonic-gate 		 * real segment, but we need to hold the a_lock on the
2013*0Sstevel@tonic-gate 		 * underlying dummy as. This is mostly to satisfy the
2014*0Sstevel@tonic-gate 		 * underlying HAT layer.
2015*0Sstevel@tonic-gate 		 */
2016*0Sstevel@tonic-gate 		AS_LOCK_ENTER(sptseg->s_as, &sptseg->s_as->a_lock, RW_READER);
2017*0Sstevel@tonic-gate 		if (type == F_SOFTLOCK) {
2018*0Sstevel@tonic-gate 			/*
2019*0Sstevel@tonic-gate 			 * Load up the translation keeping it
2020*0Sstevel@tonic-gate 			 * locked and don't unlock the page.
2021*0Sstevel@tonic-gate 			 */
2022*0Sstevel@tonic-gate 			hat_memload_array(sptseg->s_as->a_hat, sptseg_addr,
2023*0Sstevel@tonic-gate 			    ptob(npages), ppa, sptd->spt_prot,
2024*0Sstevel@tonic-gate 			    HAT_LOAD_LOCK | HAT_LOAD_SHARE);
2025*0Sstevel@tonic-gate 		} else {
2026*0Sstevel@tonic-gate 			if (hat == seg->s_as->a_hat) {
2027*0Sstevel@tonic-gate 
2028*0Sstevel@tonic-gate 				/*
2029*0Sstevel@tonic-gate 				 * Migrate pages marked for migration.
2030*0Sstevel@tonic-gate 				 */
2031*0Sstevel@tonic-gate 				if (lgrp_optimizations())
2032*0Sstevel@tonic-gate 					page_migrate(seg, shm_addr, ppa,
2033*0Sstevel@tonic-gate 					    npages);
2034*0Sstevel@tonic-gate 
2035*0Sstevel@tonic-gate 				/* CPU HAT */
2036*0Sstevel@tonic-gate 				hat_memload_array(sptseg->s_as->a_hat,
2037*0Sstevel@tonic-gate 				    sptseg_addr, ptob(npages), ppa,
2038*0Sstevel@tonic-gate 				    sptd->spt_prot, HAT_LOAD_SHARE);
2039*0Sstevel@tonic-gate 			} else {
2040*0Sstevel@tonic-gate 				/* XHAT. Pass real address */
2041*0Sstevel@tonic-gate 				hat_memload_array(hat, shm_addr,
2042*0Sstevel@tonic-gate 				    ptob(npages), ppa, sptd->spt_prot,
2043*0Sstevel@tonic-gate 				    HAT_LOAD_SHARE);
2044*0Sstevel@tonic-gate 			}
2045*0Sstevel@tonic-gate 
2046*0Sstevel@tonic-gate 			/*
2047*0Sstevel@tonic-gate 			 * And now drop the SE_SHARED lock(s).
2048*0Sstevel@tonic-gate 			 */
2049*0Sstevel@tonic-gate 			for (i = 0; i < npages; i++)
2050*0Sstevel@tonic-gate 				page_unlock(ppa[i]);
2051*0Sstevel@tonic-gate 		}
2052*0Sstevel@tonic-gate 		AS_LOCK_EXIT(sptseg->s_as, &sptseg->s_as->a_lock);
2053*0Sstevel@tonic-gate 
2054*0Sstevel@tonic-gate 		kmem_free(ppa, sizeof (page_t *) * npages);
2055*0Sstevel@tonic-gate 		return (0);
2056*0Sstevel@tonic-gate 	case F_SOFTUNLOCK:
2057*0Sstevel@tonic-gate 
2058*0Sstevel@tonic-gate 		/*
2059*0Sstevel@tonic-gate 		 * This is a bit ugly, we pass in the real seg pointer,
2060*0Sstevel@tonic-gate 		 * but the sptseg_addr is the virtual address within the
2061*0Sstevel@tonic-gate 		 * dummy seg.
2062*0Sstevel@tonic-gate 		 */
2063*0Sstevel@tonic-gate 		segspt_softunlock(seg, sptseg_addr, ptob(npages), rw);
2064*0Sstevel@tonic-gate 		return (0);
2065*0Sstevel@tonic-gate 
2066*0Sstevel@tonic-gate 	case F_PROT:
2067*0Sstevel@tonic-gate 
2068*0Sstevel@tonic-gate 		/*
2069*0Sstevel@tonic-gate 		 * This takes care of the unusual case where a user
2070*0Sstevel@tonic-gate 		 * allocates a stack in shared memory and a register
2071*0Sstevel@tonic-gate 		 * window overflow is written to that stack page before
2072*0Sstevel@tonic-gate 		 * it is otherwise modified.
2073*0Sstevel@tonic-gate 		 *
2074*0Sstevel@tonic-gate 		 * We can get away with this because ISM segments are
2075*0Sstevel@tonic-gate 		 * always rw. Other than this unusual case, there
2076*0Sstevel@tonic-gate 		 * should be no instances of protection violations.
2077*0Sstevel@tonic-gate 		 */
2078*0Sstevel@tonic-gate 		return (0);
2079*0Sstevel@tonic-gate 
2080*0Sstevel@tonic-gate 	default:
2081*0Sstevel@tonic-gate #ifdef DEBUG
2082*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "segspt_shmfault default type?");
2083*0Sstevel@tonic-gate #endif
2084*0Sstevel@tonic-gate 		return (FC_NOMAP);
2085*0Sstevel@tonic-gate 	}
2086*0Sstevel@tonic-gate }
2087*0Sstevel@tonic-gate 
2088*0Sstevel@tonic-gate /*ARGSUSED*/
2089*0Sstevel@tonic-gate static faultcode_t
2090*0Sstevel@tonic-gate segspt_shmfaulta(struct seg *seg, caddr_t addr)
2091*0Sstevel@tonic-gate {
2092*0Sstevel@tonic-gate 	return (0);
2093*0Sstevel@tonic-gate }
2094*0Sstevel@tonic-gate 
2095*0Sstevel@tonic-gate /*ARGSUSED*/
2096*0Sstevel@tonic-gate static int
2097*0Sstevel@tonic-gate segspt_shmkluster(struct seg *seg, caddr_t addr, ssize_t delta)
2098*0Sstevel@tonic-gate {
2099*0Sstevel@tonic-gate 	return (0);
2100*0Sstevel@tonic-gate }
2101*0Sstevel@tonic-gate 
2102*0Sstevel@tonic-gate /*ARGSUSED*/
2103*0Sstevel@tonic-gate static size_t
2104*0Sstevel@tonic-gate segspt_shmswapout(struct seg *seg)
2105*0Sstevel@tonic-gate {
2106*0Sstevel@tonic-gate 	return (0);
2107*0Sstevel@tonic-gate }
2108*0Sstevel@tonic-gate 
2109*0Sstevel@tonic-gate /*
2110*0Sstevel@tonic-gate  * duplicate the shared page tables
2111*0Sstevel@tonic-gate  */
2112*0Sstevel@tonic-gate int
2113*0Sstevel@tonic-gate segspt_shmdup(struct seg *seg, struct seg *newseg)
2114*0Sstevel@tonic-gate {
2115*0Sstevel@tonic-gate 	struct shm_data		*shmd = (struct shm_data *)seg->s_data;
2116*0Sstevel@tonic-gate 	struct anon_map 	*amp = shmd->shm_amp;
2117*0Sstevel@tonic-gate 	struct shm_data 	*shmd_new;
2118*0Sstevel@tonic-gate 	struct seg		*spt_seg = shmd->shm_sptseg;
2119*0Sstevel@tonic-gate 	struct spt_data		*sptd = spt_seg->s_data;
2120*0Sstevel@tonic-gate 
2121*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
2122*0Sstevel@tonic-gate 
2123*0Sstevel@tonic-gate 	shmd_new = kmem_zalloc((sizeof (*shmd_new)), KM_SLEEP);
2124*0Sstevel@tonic-gate 	newseg->s_data = (void *)shmd_new;
2125*0Sstevel@tonic-gate 	shmd_new->shm_sptas = shmd->shm_sptas;
2126*0Sstevel@tonic-gate 	shmd_new->shm_amp = amp;
2127*0Sstevel@tonic-gate 	shmd_new->shm_sptseg = shmd->shm_sptseg;
2128*0Sstevel@tonic-gate 	newseg->s_ops = &segspt_shmops;
2129*0Sstevel@tonic-gate 	newseg->s_szc = seg->s_szc;
2130*0Sstevel@tonic-gate 	ASSERT(seg->s_szc == shmd->shm_sptseg->s_szc);
2131*0Sstevel@tonic-gate 
2132*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
2133*0Sstevel@tonic-gate 	amp->refcnt++;
2134*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
2135*0Sstevel@tonic-gate 
2136*0Sstevel@tonic-gate 	if (sptd->spt_flags & SHM_PAGEABLE) {
2137*0Sstevel@tonic-gate 		shmd_new->shm_vpage = kmem_zalloc(btopr(amp->size), KM_SLEEP);
2138*0Sstevel@tonic-gate 		shmd_new->shm_lckpgs = 0;
2139*0Sstevel@tonic-gate 	}
2140*0Sstevel@tonic-gate 	return (hat_share(newseg->s_as->a_hat, newseg->s_base,
2141*0Sstevel@tonic-gate 	    shmd->shm_sptas->a_hat, SEGSPTADDR, seg->s_size, seg->s_szc));
2142*0Sstevel@tonic-gate }
2143*0Sstevel@tonic-gate 
2144*0Sstevel@tonic-gate /*ARGSUSED*/
2145*0Sstevel@tonic-gate int
2146*0Sstevel@tonic-gate segspt_shmcheckprot(struct seg *seg, caddr_t addr, size_t size, uint_t prot)
2147*0Sstevel@tonic-gate {
2148*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
2149*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2150*0Sstevel@tonic-gate 
2151*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2152*0Sstevel@tonic-gate 
2153*0Sstevel@tonic-gate 	/*
2154*0Sstevel@tonic-gate 	 * ISM segment is always rw.
2155*0Sstevel@tonic-gate 	 */
2156*0Sstevel@tonic-gate 	return (((sptd->spt_prot & prot) != prot) ? EACCES : 0);
2157*0Sstevel@tonic-gate }
2158*0Sstevel@tonic-gate 
2159*0Sstevel@tonic-gate /*
2160*0Sstevel@tonic-gate  * Return an array of locked large pages, for empty slots allocate
2161*0Sstevel@tonic-gate  * private zero-filled anon pages.
2162*0Sstevel@tonic-gate  */
2163*0Sstevel@tonic-gate static int
2164*0Sstevel@tonic-gate spt_anon_getpages(
2165*0Sstevel@tonic-gate 	struct seg *sptseg,
2166*0Sstevel@tonic-gate 	caddr_t sptaddr,
2167*0Sstevel@tonic-gate 	size_t len,
2168*0Sstevel@tonic-gate 	page_t *ppa[])
2169*0Sstevel@tonic-gate {
2170*0Sstevel@tonic-gate 	struct  spt_data *sptd = sptseg->s_data;
2171*0Sstevel@tonic-gate 	struct  anon_map *amp = sptd->spt_amp;
2172*0Sstevel@tonic-gate 	enum 	seg_rw rw = sptd->spt_prot;
2173*0Sstevel@tonic-gate 	uint_t	szc = sptseg->s_szc;
2174*0Sstevel@tonic-gate 	size_t	pg_sz, share_sz = page_get_pagesize(szc);
2175*0Sstevel@tonic-gate 	pgcnt_t	lp_npgs;
2176*0Sstevel@tonic-gate 	caddr_t	lp_addr, e_sptaddr;
2177*0Sstevel@tonic-gate 	uint_t	vpprot, ppa_szc = 0;
2178*0Sstevel@tonic-gate 	struct  vpage *vpage = NULL;
2179*0Sstevel@tonic-gate 	ulong_t	j, ppa_idx;
2180*0Sstevel@tonic-gate 	int	err, ierr = 0;
2181*0Sstevel@tonic-gate 	pgcnt_t	an_idx;
2182*0Sstevel@tonic-gate 	anon_sync_obj_t cookie;
2183*0Sstevel@tonic-gate 
2184*0Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(sptaddr, share_sz) && IS_P2ALIGNED(len, share_sz));
2185*0Sstevel@tonic-gate 	ASSERT(len != 0);
2186*0Sstevel@tonic-gate 
2187*0Sstevel@tonic-gate 	pg_sz = share_sz;
2188*0Sstevel@tonic-gate 	lp_npgs = btop(pg_sz);
2189*0Sstevel@tonic-gate 	lp_addr = sptaddr;
2190*0Sstevel@tonic-gate 	e_sptaddr = sptaddr + len;
2191*0Sstevel@tonic-gate 	an_idx = seg_page(sptseg, sptaddr);
2192*0Sstevel@tonic-gate 	ppa_idx = 0;
2193*0Sstevel@tonic-gate 
2194*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2195*0Sstevel@tonic-gate 	/*CONSTCOND*/
2196*0Sstevel@tonic-gate 	while (1) {
2197*0Sstevel@tonic-gate 		for (; lp_addr < e_sptaddr;
2198*0Sstevel@tonic-gate 			an_idx += lp_npgs, lp_addr += pg_sz,
2199*0Sstevel@tonic-gate 			ppa_idx += lp_npgs) {
2200*0Sstevel@tonic-gate 
2201*0Sstevel@tonic-gate 			anon_array_enter(amp, an_idx, &cookie);
2202*0Sstevel@tonic-gate 			ppa_szc = (uint_t)-1;
2203*0Sstevel@tonic-gate 			ierr = anon_map_getpages(amp, an_idx, szc, sptseg,
2204*0Sstevel@tonic-gate 			    lp_addr, sptd->spt_prot, &vpprot, &ppa[ppa_idx],
2205*0Sstevel@tonic-gate 			    &ppa_szc, vpage, rw, 0, segvn_anypgsz, kcred);
2206*0Sstevel@tonic-gate 			anon_array_exit(&cookie);
2207*0Sstevel@tonic-gate 
2208*0Sstevel@tonic-gate 			if (ierr != 0) {
2209*0Sstevel@tonic-gate 				if (ierr > 0) {
2210*0Sstevel@tonic-gate 					err = FC_MAKE_ERR(ierr);
2211*0Sstevel@tonic-gate 					goto lpgs_err;
2212*0Sstevel@tonic-gate 				}
2213*0Sstevel@tonic-gate 				break;
2214*0Sstevel@tonic-gate 			}
2215*0Sstevel@tonic-gate 		}
2216*0Sstevel@tonic-gate 		if (lp_addr == e_sptaddr) {
2217*0Sstevel@tonic-gate 			break;
2218*0Sstevel@tonic-gate 		}
2219*0Sstevel@tonic-gate 		ASSERT(lp_addr < e_sptaddr);
2220*0Sstevel@tonic-gate 
2221*0Sstevel@tonic-gate 		/*
2222*0Sstevel@tonic-gate 		 * ierr == -1 means we failed to allocate a large page.
2223*0Sstevel@tonic-gate 		 * so do a size down operation.
2224*0Sstevel@tonic-gate 		 *
2225*0Sstevel@tonic-gate 		 * ierr == -2 means some other process that privately shares
2226*0Sstevel@tonic-gate 		 * pages with this process has allocated a larger page and we
2227*0Sstevel@tonic-gate 		 * need to retry with larger pages. So do a size up
2228*0Sstevel@tonic-gate 		 * operation. This relies on the fact that large pages are
2229*0Sstevel@tonic-gate 		 * never partially shared i.e. if we share any constituent
2230*0Sstevel@tonic-gate 		 * page of a large page with another process we must share the
2231*0Sstevel@tonic-gate 		 * entire large page. Note this cannot happen for SOFTLOCK
2232*0Sstevel@tonic-gate 		 * case, unless current address (lpaddr) is at the beginning
2233*0Sstevel@tonic-gate 		 * of the next page size boundary because the other process
2234*0Sstevel@tonic-gate 		 * couldn't have relocated locked pages.
2235*0Sstevel@tonic-gate 		 */
2236*0Sstevel@tonic-gate 		ASSERT(ierr == -1 || ierr == -2);
2237*0Sstevel@tonic-gate 		if (segvn_anypgsz) {
2238*0Sstevel@tonic-gate 			ASSERT(ierr == -2 || szc != 0);
2239*0Sstevel@tonic-gate 			ASSERT(ierr == -1 || szc < sptseg->s_szc);
2240*0Sstevel@tonic-gate 			szc = (ierr == -1) ? szc - 1 : szc + 1;
2241*0Sstevel@tonic-gate 		} else {
2242*0Sstevel@tonic-gate 			/*
2243*0Sstevel@tonic-gate 			 * For faults and segvn_anypgsz == 0
2244*0Sstevel@tonic-gate 			 * we need to be careful not to loop forever
2245*0Sstevel@tonic-gate 			 * if existing page is found with szc other
2246*0Sstevel@tonic-gate 			 * than 0 or seg->s_szc. This could be due
2247*0Sstevel@tonic-gate 			 * to page relocations on behalf of DR or
2248*0Sstevel@tonic-gate 			 * more likely large page creation. For this
2249*0Sstevel@tonic-gate 			 * case simply re-size to existing page's szc
2250*0Sstevel@tonic-gate 			 * if returned by anon_map_getpages().
2251*0Sstevel@tonic-gate 			 */
2252*0Sstevel@tonic-gate 			if (ppa_szc == (uint_t)-1) {
2253*0Sstevel@tonic-gate 				szc = (ierr == -1) ? 0 : sptseg->s_szc;
2254*0Sstevel@tonic-gate 			} else {
2255*0Sstevel@tonic-gate 				ASSERT(ppa_szc <= sptseg->s_szc);
2256*0Sstevel@tonic-gate 				ASSERT(ierr == -2 || ppa_szc < szc);
2257*0Sstevel@tonic-gate 				ASSERT(ierr == -1 || ppa_szc > szc);
2258*0Sstevel@tonic-gate 				szc = ppa_szc;
2259*0Sstevel@tonic-gate 			}
2260*0Sstevel@tonic-gate 		}
2261*0Sstevel@tonic-gate 		pg_sz = page_get_pagesize(szc);
2262*0Sstevel@tonic-gate 		lp_npgs = btop(pg_sz);
2263*0Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(lp_addr, pg_sz));
2264*0Sstevel@tonic-gate 	}
2265*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
2266*0Sstevel@tonic-gate 	return (0);
2267*0Sstevel@tonic-gate 
2268*0Sstevel@tonic-gate lpgs_err:
2269*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
2270*0Sstevel@tonic-gate 	for (j = 0; j < ppa_idx; j++)
2271*0Sstevel@tonic-gate 		page_unlock(ppa[j]);
2272*0Sstevel@tonic-gate 	return (err);
2273*0Sstevel@tonic-gate }
2274*0Sstevel@tonic-gate 
2275*0Sstevel@tonic-gate int
2276*0Sstevel@tonic-gate spt_lockpages(struct seg *seg, pgcnt_t anon_index, pgcnt_t npages,
2277*0Sstevel@tonic-gate     page_t **ppa, ulong_t *lockmap, size_t pos)
2278*0Sstevel@tonic-gate {
2279*0Sstevel@tonic-gate 	struct shm_data *shmd = seg->s_data;
2280*0Sstevel@tonic-gate 	struct spt_data *sptd = shmd->shm_sptseg->s_data;
2281*0Sstevel@tonic-gate 	ulong_t	i;
2282*0Sstevel@tonic-gate 	int	kernel;
2283*0Sstevel@tonic-gate 
2284*0Sstevel@tonic-gate 	for (i = 0; i < npages; anon_index++, pos++, i++) {
2285*0Sstevel@tonic-gate 		if (!(shmd->shm_vpage[anon_index] & DISM_PG_LOCKED)) {
2286*0Sstevel@tonic-gate 			if (sptd->spt_ppa_lckcnt[anon_index] <
2287*0Sstevel@tonic-gate 			    (ushort_t)DISM_LOCK_MAX) {
2288*0Sstevel@tonic-gate 				if (++sptd->spt_ppa_lckcnt[anon_index] ==
2289*0Sstevel@tonic-gate 				    (ushort_t)DISM_LOCK_MAX) {
2290*0Sstevel@tonic-gate 					cmn_err(CE_WARN,
2291*0Sstevel@tonic-gate 					    "DISM page lock limit "
2292*0Sstevel@tonic-gate 					    "reached on DISM offset 0x%lx\n",
2293*0Sstevel@tonic-gate 					    anon_index << PAGESHIFT);
2294*0Sstevel@tonic-gate 				}
2295*0Sstevel@tonic-gate 				kernel = (sptd->spt_ppa &&
2296*0Sstevel@tonic-gate 				    sptd->spt_ppa[anon_index]) ? 1 : 0;
2297*0Sstevel@tonic-gate 				if (!page_pp_lock(ppa[i], 0, kernel)) {
2298*0Sstevel@tonic-gate 					/* unlock rest of the pages */
2299*0Sstevel@tonic-gate 					for (; i < npages; i++)
2300*0Sstevel@tonic-gate 						page_unlock(ppa[i]);
2301*0Sstevel@tonic-gate 					sptd->spt_ppa_lckcnt[anon_index]--;
2302*0Sstevel@tonic-gate 					return (EAGAIN);
2303*0Sstevel@tonic-gate 				}
2304*0Sstevel@tonic-gate 				shmd->shm_lckpgs++;
2305*0Sstevel@tonic-gate 				shmd->shm_vpage[anon_index] |= DISM_PG_LOCKED;
2306*0Sstevel@tonic-gate 				if (lockmap != NULL)
2307*0Sstevel@tonic-gate 					BT_SET(lockmap, pos);
2308*0Sstevel@tonic-gate 			}
2309*0Sstevel@tonic-gate 		}
2310*0Sstevel@tonic-gate 		page_unlock(ppa[i]);
2311*0Sstevel@tonic-gate 	}
2312*0Sstevel@tonic-gate 	return (0);
2313*0Sstevel@tonic-gate }
2314*0Sstevel@tonic-gate 
2315*0Sstevel@tonic-gate /*ARGSUSED*/
2316*0Sstevel@tonic-gate static int
2317*0Sstevel@tonic-gate segspt_shmlockop(struct seg *seg, caddr_t addr, size_t len,
2318*0Sstevel@tonic-gate     int attr, int op, ulong_t *lockmap, size_t pos)
2319*0Sstevel@tonic-gate {
2320*0Sstevel@tonic-gate 	struct shm_data *shmd = seg->s_data;
2321*0Sstevel@tonic-gate 	struct seg	*sptseg = shmd->shm_sptseg;
2322*0Sstevel@tonic-gate 	struct spt_data *sptd = sptseg->s_data;
2323*0Sstevel@tonic-gate 	pgcnt_t		npages, a_npages;
2324*0Sstevel@tonic-gate 	page_t		**ppa;
2325*0Sstevel@tonic-gate 	pgcnt_t 	an_idx, a_an_idx, ppa_idx;
2326*0Sstevel@tonic-gate 	caddr_t		spt_addr, a_addr;	/* spt and aligned address */
2327*0Sstevel@tonic-gate 	size_t		a_len;			/* aligned len */
2328*0Sstevel@tonic-gate 	size_t		share_sz;
2329*0Sstevel@tonic-gate 	ulong_t		i;
2330*0Sstevel@tonic-gate 	int		sts = 0;
2331*0Sstevel@tonic-gate 
2332*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2333*0Sstevel@tonic-gate 
2334*0Sstevel@tonic-gate 	if ((sptd->spt_flags & SHM_PAGEABLE) == 0) {
2335*0Sstevel@tonic-gate 		return (0);
2336*0Sstevel@tonic-gate 	}
2337*0Sstevel@tonic-gate 
2338*0Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
2339*0Sstevel@tonic-gate 	an_idx = seg_page(seg, addr);
2340*0Sstevel@tonic-gate 	npages = btopr(len);
2341*0Sstevel@tonic-gate 
2342*0Sstevel@tonic-gate 	if (an_idx + npages > btopr(shmd->shm_amp->size)) {
2343*0Sstevel@tonic-gate 		return (ENOMEM);
2344*0Sstevel@tonic-gate 	}
2345*0Sstevel@tonic-gate 
2346*0Sstevel@tonic-gate 	if (op == MC_LOCK) {
2347*0Sstevel@tonic-gate 		/*
2348*0Sstevel@tonic-gate 		 * Need to align addr and size request if they are not
2349*0Sstevel@tonic-gate 		 * aligned so we can always allocate large page(s) however
2350*0Sstevel@tonic-gate 		 * we only lock what was requested in initial request.
2351*0Sstevel@tonic-gate 		 */
2352*0Sstevel@tonic-gate 		share_sz = page_get_pagesize(sptseg->s_szc);
2353*0Sstevel@tonic-gate 		a_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_sz);
2354*0Sstevel@tonic-gate 		a_len = P2ROUNDUP((uintptr_t)(((addr + len) - a_addr)),
2355*0Sstevel@tonic-gate 				share_sz);
2356*0Sstevel@tonic-gate 		a_npages = btop(a_len);
2357*0Sstevel@tonic-gate 		a_an_idx = seg_page(seg, a_addr);
2358*0Sstevel@tonic-gate 		spt_addr = sptseg->s_base + ptob(a_an_idx);
2359*0Sstevel@tonic-gate 		ppa_idx = an_idx - a_an_idx;
2360*0Sstevel@tonic-gate 
2361*0Sstevel@tonic-gate 		if ((ppa = kmem_zalloc(((sizeof (page_t *)) * a_npages),
2362*0Sstevel@tonic-gate 			KM_NOSLEEP)) == NULL) {
2363*0Sstevel@tonic-gate 			return (ENOMEM);
2364*0Sstevel@tonic-gate 		}
2365*0Sstevel@tonic-gate 
2366*0Sstevel@tonic-gate 		/*
2367*0Sstevel@tonic-gate 		 * Don't cache any new pages for IO and
2368*0Sstevel@tonic-gate 		 * flush any cached pages.
2369*0Sstevel@tonic-gate 		 */
2370*0Sstevel@tonic-gate 		mutex_enter(&sptd->spt_lock);
2371*0Sstevel@tonic-gate 		if (sptd->spt_ppa != NULL)
2372*0Sstevel@tonic-gate 			sptd->spt_flags |= DISM_PPA_CHANGED;
2373*0Sstevel@tonic-gate 
2374*0Sstevel@tonic-gate 		sts = spt_anon_getpages(sptseg, spt_addr, a_len, ppa);
2375*0Sstevel@tonic-gate 		if (sts != 0) {
2376*0Sstevel@tonic-gate 			mutex_exit(&sptd->spt_lock);
2377*0Sstevel@tonic-gate 			kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2378*0Sstevel@tonic-gate 			return (sts);
2379*0Sstevel@tonic-gate 		}
2380*0Sstevel@tonic-gate 
2381*0Sstevel@tonic-gate 		sts = spt_lockpages(seg, an_idx, npages,
2382*0Sstevel@tonic-gate 		    &ppa[ppa_idx], lockmap, pos);
2383*0Sstevel@tonic-gate 		/*
2384*0Sstevel@tonic-gate 		 * unlock remaining pages for requests which are not
2385*0Sstevel@tonic-gate 		 * aligned or not in 4 M chunks
2386*0Sstevel@tonic-gate 		 */
2387*0Sstevel@tonic-gate 		for (i = 0; i < ppa_idx; i++)
2388*0Sstevel@tonic-gate 			page_unlock(ppa[i]);
2389*0Sstevel@tonic-gate 		for (i = ppa_idx + npages; i < a_npages; i++)
2390*0Sstevel@tonic-gate 			page_unlock(ppa[i]);
2391*0Sstevel@tonic-gate 		if (sptd->spt_ppa != NULL)
2392*0Sstevel@tonic-gate 			sptd->spt_flags |= DISM_PPA_CHANGED;
2393*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
2394*0Sstevel@tonic-gate 
2395*0Sstevel@tonic-gate 		kmem_free(ppa, ((sizeof (page_t *)) * a_npages));
2396*0Sstevel@tonic-gate 
2397*0Sstevel@tonic-gate 	} else if (op == MC_UNLOCK) { /* unlock */
2398*0Sstevel@tonic-gate 		struct anon_map *amp;
2399*0Sstevel@tonic-gate 		struct anon 	*ap;
2400*0Sstevel@tonic-gate 		struct vnode 	*vp;
2401*0Sstevel@tonic-gate 		u_offset_t 	off;
2402*0Sstevel@tonic-gate 		struct page	*pp;
2403*0Sstevel@tonic-gate 		int		kernel;
2404*0Sstevel@tonic-gate 		anon_sync_obj_t cookie;
2405*0Sstevel@tonic-gate 
2406*0Sstevel@tonic-gate 		amp = sptd->spt_amp;
2407*0Sstevel@tonic-gate 		mutex_enter(&sptd->spt_lock);
2408*0Sstevel@tonic-gate 		if (shmd->shm_lckpgs == 0) {
2409*0Sstevel@tonic-gate 			mutex_exit(&sptd->spt_lock);
2410*0Sstevel@tonic-gate 			return (0);
2411*0Sstevel@tonic-gate 		}
2412*0Sstevel@tonic-gate 		/*
2413*0Sstevel@tonic-gate 		 * Don't cache new IO pages.
2414*0Sstevel@tonic-gate 		 */
2415*0Sstevel@tonic-gate 		if (sptd->spt_ppa != NULL)
2416*0Sstevel@tonic-gate 			sptd->spt_flags |= DISM_PPA_CHANGED;
2417*0Sstevel@tonic-gate 
2418*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2419*0Sstevel@tonic-gate 		for (i = 0; i < npages; i++, an_idx++) {
2420*0Sstevel@tonic-gate 			if (shmd->shm_vpage[an_idx] & DISM_PG_LOCKED) {
2421*0Sstevel@tonic-gate 				anon_array_enter(amp, an_idx, &cookie);
2422*0Sstevel@tonic-gate 				ap = anon_get_ptr(amp->ahp, an_idx);
2423*0Sstevel@tonic-gate 				ASSERT(ap);
2424*0Sstevel@tonic-gate 				ASSERT(sptd->spt_ppa_lckcnt[an_idx] > 0);
2425*0Sstevel@tonic-gate 
2426*0Sstevel@tonic-gate 				swap_xlate(ap, &vp, &off);
2427*0Sstevel@tonic-gate 				anon_array_exit(&cookie);
2428*0Sstevel@tonic-gate 				pp = page_lookup(vp, off, SE_SHARED);
2429*0Sstevel@tonic-gate 				ASSERT(pp);
2430*0Sstevel@tonic-gate 				/*
2431*0Sstevel@tonic-gate 				 * the availrmem is decremented only for
2432*0Sstevel@tonic-gate 				 * pages which are not in seg pcache,
2433*0Sstevel@tonic-gate 				 * for pages in seg pcache availrmem was
2434*0Sstevel@tonic-gate 				 * decremented in _dismpagelock() (if
2435*0Sstevel@tonic-gate 				 * they were not locked here)
2436*0Sstevel@tonic-gate 				 */
2437*0Sstevel@tonic-gate 				kernel = (sptd->spt_ppa &&
2438*0Sstevel@tonic-gate 				    sptd->spt_ppa[an_idx]) ? 1 : 0;
2439*0Sstevel@tonic-gate 				page_pp_unlock(pp, 0, kernel);
2440*0Sstevel@tonic-gate 				page_unlock(pp);
2441*0Sstevel@tonic-gate 				shmd->shm_vpage[an_idx] &= ~DISM_PG_LOCKED;
2442*0Sstevel@tonic-gate 				sptd->spt_ppa_lckcnt[an_idx]--;
2443*0Sstevel@tonic-gate 				shmd->shm_lckpgs--;
2444*0Sstevel@tonic-gate 			}
2445*0Sstevel@tonic-gate 		}
2446*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
2447*0Sstevel@tonic-gate 		if (sptd->spt_ppa != NULL)
2448*0Sstevel@tonic-gate 			sptd->spt_flags |= DISM_PPA_CHANGED;
2449*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
2450*0Sstevel@tonic-gate 	}
2451*0Sstevel@tonic-gate 	return (sts);
2452*0Sstevel@tonic-gate }
2453*0Sstevel@tonic-gate 
2454*0Sstevel@tonic-gate /*ARGSUSED*/
2455*0Sstevel@tonic-gate int
2456*0Sstevel@tonic-gate segspt_shmgetprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2457*0Sstevel@tonic-gate {
2458*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
2459*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2460*0Sstevel@tonic-gate 	spgcnt_t pgno = seg_page(seg, addr+len) - seg_page(seg, addr) + 1;
2461*0Sstevel@tonic-gate 
2462*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2463*0Sstevel@tonic-gate 
2464*0Sstevel@tonic-gate 	/*
2465*0Sstevel@tonic-gate 	 * ISM segment is always rw.
2466*0Sstevel@tonic-gate 	 */
2467*0Sstevel@tonic-gate 	while (--pgno >= 0)
2468*0Sstevel@tonic-gate 		*protv++ = sptd->spt_prot;
2469*0Sstevel@tonic-gate 	return (0);
2470*0Sstevel@tonic-gate }
2471*0Sstevel@tonic-gate 
2472*0Sstevel@tonic-gate /*ARGSUSED*/
2473*0Sstevel@tonic-gate u_offset_t
2474*0Sstevel@tonic-gate segspt_shmgetoffset(struct seg *seg, caddr_t addr)
2475*0Sstevel@tonic-gate {
2476*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2477*0Sstevel@tonic-gate 
2478*0Sstevel@tonic-gate 	/* Offset does not matter in ISM memory */
2479*0Sstevel@tonic-gate 
2480*0Sstevel@tonic-gate 	return ((u_offset_t)0);
2481*0Sstevel@tonic-gate }
2482*0Sstevel@tonic-gate 
2483*0Sstevel@tonic-gate /* ARGSUSED */
2484*0Sstevel@tonic-gate int
2485*0Sstevel@tonic-gate segspt_shmgettype(struct seg *seg, caddr_t addr)
2486*0Sstevel@tonic-gate {
2487*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
2488*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2489*0Sstevel@tonic-gate 
2490*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2491*0Sstevel@tonic-gate 
2492*0Sstevel@tonic-gate 	/*
2493*0Sstevel@tonic-gate 	 * The shared memory mapping is always MAP_SHARED, SWAP is only
2494*0Sstevel@tonic-gate 	 * reserved for DISM
2495*0Sstevel@tonic-gate 	 */
2496*0Sstevel@tonic-gate 	return (MAP_SHARED |
2497*0Sstevel@tonic-gate 		((sptd->spt_flags & SHM_PAGEABLE) ? 0 : MAP_NORESERVE));
2498*0Sstevel@tonic-gate }
2499*0Sstevel@tonic-gate 
2500*0Sstevel@tonic-gate /*ARGSUSED*/
2501*0Sstevel@tonic-gate int
2502*0Sstevel@tonic-gate segspt_shmgetvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
2503*0Sstevel@tonic-gate {
2504*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
2505*0Sstevel@tonic-gate 	struct spt_data *sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2506*0Sstevel@tonic-gate 
2507*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2508*0Sstevel@tonic-gate 
2509*0Sstevel@tonic-gate 	*vpp = sptd->spt_vp;
2510*0Sstevel@tonic-gate 	return (0);
2511*0Sstevel@tonic-gate }
2512*0Sstevel@tonic-gate 
2513*0Sstevel@tonic-gate /*ARGSUSED*/
2514*0Sstevel@tonic-gate static int
2515*0Sstevel@tonic-gate segspt_shmadvise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2516*0Sstevel@tonic-gate {
2517*0Sstevel@tonic-gate 	struct shm_data 	*shmd = (struct shm_data *)seg->s_data;
2518*0Sstevel@tonic-gate 	struct spt_data	*sptd = (struct spt_data *)shmd->shm_sptseg->s_data;
2519*0Sstevel@tonic-gate 	struct anon_map	*amp;
2520*0Sstevel@tonic-gate 	pgcnt_t		pg_idx;
2521*0Sstevel@tonic-gate 
2522*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2523*0Sstevel@tonic-gate 
2524*0Sstevel@tonic-gate 	if (behav == MADV_FREE) {
2525*0Sstevel@tonic-gate 		if ((sptd->spt_flags & SHM_PAGEABLE) == 0)
2526*0Sstevel@tonic-gate 			return (0);
2527*0Sstevel@tonic-gate 
2528*0Sstevel@tonic-gate 		amp = sptd->spt_amp;
2529*0Sstevel@tonic-gate 		pg_idx = seg_page(seg, addr);
2530*0Sstevel@tonic-gate 
2531*0Sstevel@tonic-gate 		mutex_enter(&sptd->spt_lock);
2532*0Sstevel@tonic-gate 		if (sptd->spt_ppa != NULL)
2533*0Sstevel@tonic-gate 			sptd->spt_flags |= DISM_PPA_CHANGED;
2534*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
2535*0Sstevel@tonic-gate 
2536*0Sstevel@tonic-gate 		/*
2537*0Sstevel@tonic-gate 		 * Purge all DISM cached pages
2538*0Sstevel@tonic-gate 		 */
2539*0Sstevel@tonic-gate 		seg_ppurge_seg(segspt_reclaim);
2540*0Sstevel@tonic-gate 
2541*0Sstevel@tonic-gate 		mutex_enter(&sptd->spt_lock);
2542*0Sstevel@tonic-gate 		ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2543*0Sstevel@tonic-gate 		anon_disclaim(amp, pg_idx, len, ANON_PGLOOKUP_BLK);
2544*0Sstevel@tonic-gate 		ANON_LOCK_EXIT(&amp->a_rwlock);
2545*0Sstevel@tonic-gate 		mutex_exit(&sptd->spt_lock);
2546*0Sstevel@tonic-gate 	} else if (lgrp_optimizations() && (behav == MADV_ACCESS_LWP ||
2547*0Sstevel@tonic-gate 	    behav == MADV_ACCESS_MANY || behav == MADV_ACCESS_DEFAULT)) {
2548*0Sstevel@tonic-gate 		int			already_set;
2549*0Sstevel@tonic-gate 		ulong_t			anon_index;
2550*0Sstevel@tonic-gate 		lgrp_mem_policy_t	policy;
2551*0Sstevel@tonic-gate 		caddr_t			shm_addr;
2552*0Sstevel@tonic-gate 		size_t			share_size;
2553*0Sstevel@tonic-gate 		size_t			size;
2554*0Sstevel@tonic-gate 		struct seg		*sptseg = shmd->shm_sptseg;
2555*0Sstevel@tonic-gate 		caddr_t			sptseg_addr;
2556*0Sstevel@tonic-gate 
2557*0Sstevel@tonic-gate 		/*
2558*0Sstevel@tonic-gate 		 * Align address and length to page size of underlying segment
2559*0Sstevel@tonic-gate 		 */
2560*0Sstevel@tonic-gate 		share_size = page_get_pagesize(shmd->shm_sptseg->s_szc);
2561*0Sstevel@tonic-gate 		shm_addr = (caddr_t)P2ALIGN((uintptr_t)(addr), share_size);
2562*0Sstevel@tonic-gate 		size = P2ROUNDUP((uintptr_t)(((addr + len) - shm_addr)),
2563*0Sstevel@tonic-gate 		    share_size);
2564*0Sstevel@tonic-gate 
2565*0Sstevel@tonic-gate 		amp = shmd->shm_amp;
2566*0Sstevel@tonic-gate 		anon_index = seg_page(seg, shm_addr);
2567*0Sstevel@tonic-gate 
2568*0Sstevel@tonic-gate 		/*
2569*0Sstevel@tonic-gate 		 * And now we may have to adjust size downward if we have
2570*0Sstevel@tonic-gate 		 * exceeded the realsize of the segment or initial anon
2571*0Sstevel@tonic-gate 		 * allocations.
2572*0Sstevel@tonic-gate 		 */
2573*0Sstevel@tonic-gate 		sptseg_addr = sptseg->s_base + ptob(anon_index);
2574*0Sstevel@tonic-gate 		if ((sptseg_addr + size) >
2575*0Sstevel@tonic-gate 		    (sptseg->s_base + sptd->spt_realsize))
2576*0Sstevel@tonic-gate 			size = (sptseg->s_base + sptd->spt_realsize) -
2577*0Sstevel@tonic-gate 			    sptseg_addr;
2578*0Sstevel@tonic-gate 
2579*0Sstevel@tonic-gate 		/*
2580*0Sstevel@tonic-gate 		 * Set memory allocation policy for this segment
2581*0Sstevel@tonic-gate 		 */
2582*0Sstevel@tonic-gate 		policy = lgrp_madv_to_policy(behav, len, MAP_SHARED);
2583*0Sstevel@tonic-gate 		already_set = lgrp_shm_policy_set(policy, amp, anon_index,
2584*0Sstevel@tonic-gate 		    NULL, 0, len);
2585*0Sstevel@tonic-gate 
2586*0Sstevel@tonic-gate 		/*
2587*0Sstevel@tonic-gate 		 * If random memory allocation policy set already,
2588*0Sstevel@tonic-gate 		 * don't bother reapplying it.
2589*0Sstevel@tonic-gate 		 */
2590*0Sstevel@tonic-gate 		if (already_set && !LGRP_MEM_POLICY_REAPPLICABLE(policy))
2591*0Sstevel@tonic-gate 			return (0);
2592*0Sstevel@tonic-gate 
2593*0Sstevel@tonic-gate 		/*
2594*0Sstevel@tonic-gate 		 * Mark any existing pages in the given range for
2595*0Sstevel@tonic-gate 		 * migration, flushing the I/O page cache, and using
2596*0Sstevel@tonic-gate 		 * underlying segment to calculate anon index and get
2597*0Sstevel@tonic-gate 		 * anonmap and vnode pointer from
2598*0Sstevel@tonic-gate 		 */
2599*0Sstevel@tonic-gate 		if (shmd->shm_softlockcnt > 0)
2600*0Sstevel@tonic-gate 			segspt_purge(seg);
2601*0Sstevel@tonic-gate 
2602*0Sstevel@tonic-gate 		page_mark_migrate(seg, shm_addr, size, amp, 0, NULL, 0, 0);
2603*0Sstevel@tonic-gate 	}
2604*0Sstevel@tonic-gate 
2605*0Sstevel@tonic-gate 	return (0);
2606*0Sstevel@tonic-gate }
2607*0Sstevel@tonic-gate 
2608*0Sstevel@tonic-gate /*ARGSUSED*/
2609*0Sstevel@tonic-gate void
2610*0Sstevel@tonic-gate segspt_shmdump(struct seg *seg)
2611*0Sstevel@tonic-gate {
2612*0Sstevel@tonic-gate 	/* no-op for ISM segment */
2613*0Sstevel@tonic-gate }
2614*0Sstevel@tonic-gate 
2615*0Sstevel@tonic-gate /*ARGSUSED*/
2616*0Sstevel@tonic-gate static faultcode_t
2617*0Sstevel@tonic-gate segspt_shmsetpgsz(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
2618*0Sstevel@tonic-gate {
2619*0Sstevel@tonic-gate 	return (ENOTSUP);
2620*0Sstevel@tonic-gate }
2621*0Sstevel@tonic-gate 
2622*0Sstevel@tonic-gate /*
2623*0Sstevel@tonic-gate  * get a memory ID for an addr in a given segment
2624*0Sstevel@tonic-gate  */
2625*0Sstevel@tonic-gate static int
2626*0Sstevel@tonic-gate segspt_shmgetmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2627*0Sstevel@tonic-gate {
2628*0Sstevel@tonic-gate 	struct shm_data *shmd = (struct shm_data *)seg->s_data;
2629*0Sstevel@tonic-gate 	struct anon 	*ap;
2630*0Sstevel@tonic-gate 	size_t		anon_index;
2631*0Sstevel@tonic-gate 	struct anon_map	*amp = shmd->shm_amp;
2632*0Sstevel@tonic-gate 	struct spt_data	*sptd = shmd->shm_sptseg->s_data;
2633*0Sstevel@tonic-gate 	struct seg	*sptseg = shmd->shm_sptseg;
2634*0Sstevel@tonic-gate 	anon_sync_obj_t	cookie;
2635*0Sstevel@tonic-gate 
2636*0Sstevel@tonic-gate 	anon_index = seg_page(seg, addr);
2637*0Sstevel@tonic-gate 
2638*0Sstevel@tonic-gate 	if (addr > (seg->s_base + sptd->spt_realsize)) {
2639*0Sstevel@tonic-gate 		return (EFAULT);
2640*0Sstevel@tonic-gate 	}
2641*0Sstevel@tonic-gate 
2642*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
2643*0Sstevel@tonic-gate 	anon_array_enter(amp, anon_index, &cookie);
2644*0Sstevel@tonic-gate 	ap = anon_get_ptr(amp->ahp, anon_index);
2645*0Sstevel@tonic-gate 	if (ap == NULL) {
2646*0Sstevel@tonic-gate 		struct page *pp;
2647*0Sstevel@tonic-gate 		caddr_t spt_addr = sptseg->s_base + ptob(anon_index);
2648*0Sstevel@tonic-gate 
2649*0Sstevel@tonic-gate 		pp = anon_zero(sptseg, spt_addr, &ap, kcred);
2650*0Sstevel@tonic-gate 		if (pp == NULL) {
2651*0Sstevel@tonic-gate 			anon_array_exit(&cookie);
2652*0Sstevel@tonic-gate 			ANON_LOCK_EXIT(&amp->a_rwlock);
2653*0Sstevel@tonic-gate 			return (ENOMEM);
2654*0Sstevel@tonic-gate 		}
2655*0Sstevel@tonic-gate 		(void) anon_set_ptr(amp->ahp, anon_index, ap, ANON_SLEEP);
2656*0Sstevel@tonic-gate 		page_unlock(pp);
2657*0Sstevel@tonic-gate 	}
2658*0Sstevel@tonic-gate 	anon_array_exit(&cookie);
2659*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
2660*0Sstevel@tonic-gate 	memidp->val[0] = (uintptr_t)ap;
2661*0Sstevel@tonic-gate 	memidp->val[1] = (uintptr_t)addr & PAGEOFFSET;
2662*0Sstevel@tonic-gate 	return (0);
2663*0Sstevel@tonic-gate }
2664*0Sstevel@tonic-gate 
2665*0Sstevel@tonic-gate /*
2666*0Sstevel@tonic-gate  * Get memory allocation policy info for specified address in given segment
2667*0Sstevel@tonic-gate  */
2668*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *
2669*0Sstevel@tonic-gate segspt_shmgetpolicy(struct seg *seg, caddr_t addr)
2670*0Sstevel@tonic-gate {
2671*0Sstevel@tonic-gate 	struct anon_map		*amp;
2672*0Sstevel@tonic-gate 	ulong_t			anon_index;
2673*0Sstevel@tonic-gate 	lgrp_mem_policy_info_t	*policy_info;
2674*0Sstevel@tonic-gate 	struct shm_data		*shm_data;
2675*0Sstevel@tonic-gate 
2676*0Sstevel@tonic-gate 	ASSERT(seg != NULL);
2677*0Sstevel@tonic-gate 
2678*0Sstevel@tonic-gate 	/*
2679*0Sstevel@tonic-gate 	 * Get anon_map from segshm
2680*0Sstevel@tonic-gate 	 *
2681*0Sstevel@tonic-gate 	 * Assume that no lock needs to be held on anon_map, since
2682*0Sstevel@tonic-gate 	 * it should be protected by its reference count which must be
2683*0Sstevel@tonic-gate 	 * nonzero for an existing segment
2684*0Sstevel@tonic-gate 	 * Need to grab readers lock on policy tree though
2685*0Sstevel@tonic-gate 	 */
2686*0Sstevel@tonic-gate 	shm_data = (struct shm_data *)seg->s_data;
2687*0Sstevel@tonic-gate 	if (shm_data == NULL)
2688*0Sstevel@tonic-gate 		return (NULL);
2689*0Sstevel@tonic-gate 	amp = shm_data->shm_amp;
2690*0Sstevel@tonic-gate 	ASSERT(amp->refcnt != 0);
2691*0Sstevel@tonic-gate 
2692*0Sstevel@tonic-gate 	/*
2693*0Sstevel@tonic-gate 	 * Get policy info
2694*0Sstevel@tonic-gate 	 *
2695*0Sstevel@tonic-gate 	 * Assume starting anon index of 0
2696*0Sstevel@tonic-gate 	 */
2697*0Sstevel@tonic-gate 	anon_index = seg_page(seg, addr);
2698*0Sstevel@tonic-gate 	policy_info = lgrp_shm_policy_get(amp, anon_index, NULL, 0);
2699*0Sstevel@tonic-gate 
2700*0Sstevel@tonic-gate 	return (policy_info);
2701*0Sstevel@tonic-gate }
2702