1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
28*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
32*0Sstevel@tonic-gate  * under license from the Regents of the University of California.
33*0Sstevel@tonic-gate  */
34*0Sstevel@tonic-gate 
35*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
36*0Sstevel@tonic-gate 
37*0Sstevel@tonic-gate /*
38*0Sstevel@tonic-gate  * VM - generic vnode mapping segment.
39*0Sstevel@tonic-gate  *
40*0Sstevel@tonic-gate  * The segmap driver is used only by the kernel to get faster (than seg_vn)
41*0Sstevel@tonic-gate  * mappings [lower routine overhead; more persistent cache] to random
42*0Sstevel@tonic-gate  * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
43*0Sstevel@tonic-gate  */
44*0Sstevel@tonic-gate 
45*0Sstevel@tonic-gate #include <sys/types.h>
46*0Sstevel@tonic-gate #include <sys/t_lock.h>
47*0Sstevel@tonic-gate #include <sys/param.h>
48*0Sstevel@tonic-gate #include <sys/sysmacros.h>
49*0Sstevel@tonic-gate #include <sys/buf.h>
50*0Sstevel@tonic-gate #include <sys/systm.h>
51*0Sstevel@tonic-gate #include <sys/vnode.h>
52*0Sstevel@tonic-gate #include <sys/mman.h>
53*0Sstevel@tonic-gate #include <sys/errno.h>
54*0Sstevel@tonic-gate #include <sys/cred.h>
55*0Sstevel@tonic-gate #include <sys/kmem.h>
56*0Sstevel@tonic-gate #include <sys/vtrace.h>
57*0Sstevel@tonic-gate #include <sys/cmn_err.h>
58*0Sstevel@tonic-gate #include <sys/debug.h>
59*0Sstevel@tonic-gate #include <sys/thread.h>
60*0Sstevel@tonic-gate #include <sys/dumphdr.h>
61*0Sstevel@tonic-gate #include <sys/bitmap.h>
62*0Sstevel@tonic-gate #include <sys/lgrp.h>
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate #include <vm/seg_kmem.h>
65*0Sstevel@tonic-gate #include <vm/hat.h>
66*0Sstevel@tonic-gate #include <vm/as.h>
67*0Sstevel@tonic-gate #include <vm/seg.h>
68*0Sstevel@tonic-gate #include <vm/seg_kpm.h>
69*0Sstevel@tonic-gate #include <vm/seg_map.h>
70*0Sstevel@tonic-gate #include <vm/page.h>
71*0Sstevel@tonic-gate #include <vm/pvn.h>
72*0Sstevel@tonic-gate #include <vm/rm.h>
73*0Sstevel@tonic-gate 
74*0Sstevel@tonic-gate /*
75*0Sstevel@tonic-gate  * Private seg op routines.
76*0Sstevel@tonic-gate  */
77*0Sstevel@tonic-gate static void	segmap_free(struct seg *seg);
78*0Sstevel@tonic-gate faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
79*0Sstevel@tonic-gate 			size_t len, enum fault_type type, enum seg_rw rw);
80*0Sstevel@tonic-gate static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
81*0Sstevel@tonic-gate static int	segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
82*0Sstevel@tonic-gate 			uint_t prot);
83*0Sstevel@tonic-gate static int	segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
84*0Sstevel@tonic-gate static int	segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
85*0Sstevel@tonic-gate 			uint_t *protv);
86*0Sstevel@tonic-gate static u_offset_t	segmap_getoffset(struct seg *seg, caddr_t addr);
87*0Sstevel@tonic-gate static int	segmap_gettype(struct seg *seg, caddr_t addr);
88*0Sstevel@tonic-gate static int	segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
89*0Sstevel@tonic-gate static void	segmap_dump(struct seg *seg);
90*0Sstevel@tonic-gate static int	segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
91*0Sstevel@tonic-gate 			struct page ***ppp, enum lock_type type,
92*0Sstevel@tonic-gate 			enum seg_rw rw);
93*0Sstevel@tonic-gate static void	segmap_badop(void);
94*0Sstevel@tonic-gate static int	segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
95*0Sstevel@tonic-gate static lgrp_mem_policy_info_t	*segmap_getpolicy(struct seg *seg,
96*0Sstevel@tonic-gate     caddr_t addr);
97*0Sstevel@tonic-gate 
98*0Sstevel@tonic-gate /* segkpm support */
99*0Sstevel@tonic-gate static caddr_t	segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
100*0Sstevel@tonic-gate 			struct smap *, enum seg_rw);
101*0Sstevel@tonic-gate struct smap	*get_smap_kpm(caddr_t, page_t **);
102*0Sstevel@tonic-gate 
103*0Sstevel@tonic-gate #define	SEGMAP_BADOP(t)	(t(*)())segmap_badop
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate static struct seg_ops segmap_ops = {
106*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* dup */
107*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* unmap */
108*0Sstevel@tonic-gate 	segmap_free,
109*0Sstevel@tonic-gate 	segmap_fault,
110*0Sstevel@tonic-gate 	segmap_faulta,
111*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setprot */
112*0Sstevel@tonic-gate 	segmap_checkprot,
113*0Sstevel@tonic-gate 	segmap_kluster,
114*0Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* swapout */
115*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* sync */
116*0Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* incore */
117*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* lockop */
118*0Sstevel@tonic-gate 	segmap_getprot,
119*0Sstevel@tonic-gate 	segmap_getoffset,
120*0Sstevel@tonic-gate 	segmap_gettype,
121*0Sstevel@tonic-gate 	segmap_getvp,
122*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* advise */
123*0Sstevel@tonic-gate 	segmap_dump,
124*0Sstevel@tonic-gate 	segmap_pagelock,	/* pagelock */
125*0Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setpgsz */
126*0Sstevel@tonic-gate 	segmap_getmemid,	/* getmemid */
127*0Sstevel@tonic-gate 	segmap_getpolicy,	/* getpolicy */
128*0Sstevel@tonic-gate };
129*0Sstevel@tonic-gate 
130*0Sstevel@tonic-gate /*
131*0Sstevel@tonic-gate  * Private segmap routines.
132*0Sstevel@tonic-gate  */
133*0Sstevel@tonic-gate static void	segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
134*0Sstevel@tonic-gate 			size_t len, enum seg_rw rw, struct smap *smp);
135*0Sstevel@tonic-gate static void	segmap_smapadd(struct smap *smp);
136*0Sstevel@tonic-gate static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
137*0Sstevel@tonic-gate 			u_offset_t off, int hashid);
138*0Sstevel@tonic-gate static void	segmap_hashout(struct smap *smp);
139*0Sstevel@tonic-gate 
140*0Sstevel@tonic-gate 
141*0Sstevel@tonic-gate /*
142*0Sstevel@tonic-gate  * Statistics for segmap operations.
143*0Sstevel@tonic-gate  *
144*0Sstevel@tonic-gate  * No explicit locking to protect these stats.
145*0Sstevel@tonic-gate  */
146*0Sstevel@tonic-gate struct segmapcnt segmapcnt = {
147*0Sstevel@tonic-gate 	{ "fault",		KSTAT_DATA_ULONG },
148*0Sstevel@tonic-gate 	{ "faulta",		KSTAT_DATA_ULONG },
149*0Sstevel@tonic-gate 	{ "getmap",		KSTAT_DATA_ULONG },
150*0Sstevel@tonic-gate 	{ "get_use",		KSTAT_DATA_ULONG },
151*0Sstevel@tonic-gate 	{ "get_reclaim",	KSTAT_DATA_ULONG },
152*0Sstevel@tonic-gate 	{ "get_reuse",		KSTAT_DATA_ULONG },
153*0Sstevel@tonic-gate 	{ "get_unused",		KSTAT_DATA_ULONG },
154*0Sstevel@tonic-gate 	{ "get_nofree",		KSTAT_DATA_ULONG },
155*0Sstevel@tonic-gate 	{ "rel_async",		KSTAT_DATA_ULONG },
156*0Sstevel@tonic-gate 	{ "rel_write",		KSTAT_DATA_ULONG },
157*0Sstevel@tonic-gate 	{ "rel_free",		KSTAT_DATA_ULONG },
158*0Sstevel@tonic-gate 	{ "rel_abort",		KSTAT_DATA_ULONG },
159*0Sstevel@tonic-gate 	{ "rel_dontneed",	KSTAT_DATA_ULONG },
160*0Sstevel@tonic-gate 	{ "release",		KSTAT_DATA_ULONG },
161*0Sstevel@tonic-gate 	{ "pagecreate",		KSTAT_DATA_ULONG },
162*0Sstevel@tonic-gate 	{ "free_notfree",	KSTAT_DATA_ULONG },
163*0Sstevel@tonic-gate 	{ "free_dirty",		KSTAT_DATA_ULONG },
164*0Sstevel@tonic-gate 	{ "free",		KSTAT_DATA_ULONG },
165*0Sstevel@tonic-gate 	{ "stolen",		KSTAT_DATA_ULONG },
166*0Sstevel@tonic-gate 	{ "get_nomtx",		KSTAT_DATA_ULONG }
167*0Sstevel@tonic-gate };
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
170*0Sstevel@tonic-gate uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate /*
173*0Sstevel@tonic-gate  * Return number of map pages in segment.
174*0Sstevel@tonic-gate  */
175*0Sstevel@tonic-gate #define	MAP_PAGES(seg)		((seg)->s_size >> MAXBSHIFT)
176*0Sstevel@tonic-gate 
177*0Sstevel@tonic-gate /*
178*0Sstevel@tonic-gate  * Translate addr into smap number within segment.
179*0Sstevel@tonic-gate  */
180*0Sstevel@tonic-gate #define	MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
181*0Sstevel@tonic-gate 
182*0Sstevel@tonic-gate /*
183*0Sstevel@tonic-gate  * Translate addr in seg into struct smap pointer.
184*0Sstevel@tonic-gate  */
185*0Sstevel@tonic-gate #define	GET_SMAP(seg, addr)	\
186*0Sstevel@tonic-gate 	&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
187*0Sstevel@tonic-gate 
188*0Sstevel@tonic-gate /*
189*0Sstevel@tonic-gate  * Bit in map (16 bit bitmap).
190*0Sstevel@tonic-gate  */
191*0Sstevel@tonic-gate #define	SMAP_BIT_MASK(bitindex)	(1 << ((bitindex) & 0xf))
192*0Sstevel@tonic-gate 
193*0Sstevel@tonic-gate static int smd_colormsk = 0;
194*0Sstevel@tonic-gate static int smd_ncolor = 0;
195*0Sstevel@tonic-gate static int smd_nfree = 0;
196*0Sstevel@tonic-gate static int smd_freemsk = 0;
197*0Sstevel@tonic-gate #ifdef DEBUG
198*0Sstevel@tonic-gate static int *colors_used;
199*0Sstevel@tonic-gate #endif
200*0Sstevel@tonic-gate static struct smap *smd_smap;
201*0Sstevel@tonic-gate static struct smaphash *smd_hash;
202*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
203*0Sstevel@tonic-gate static unsigned int *smd_hash_len;
204*0Sstevel@tonic-gate #endif
205*0Sstevel@tonic-gate static struct smfree *smd_free;
206*0Sstevel@tonic-gate static ulong_t smd_hashmsk = 0;
207*0Sstevel@tonic-gate 
208*0Sstevel@tonic-gate #define	SEGMAP_MAXCOLOR		2
209*0Sstevel@tonic-gate #define	SEGMAP_CACHE_PAD	64
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate union segmap_cpu {
212*0Sstevel@tonic-gate 	struct {
213*0Sstevel@tonic-gate 		uint32_t	scpu_free_ndx[SEGMAP_MAXCOLOR];
214*0Sstevel@tonic-gate 		struct smap	*scpu_last_smap;
215*0Sstevel@tonic-gate 		ulong_t		scpu_getmap;
216*0Sstevel@tonic-gate 		ulong_t		scpu_release;
217*0Sstevel@tonic-gate 		ulong_t		scpu_get_reclaim;
218*0Sstevel@tonic-gate 		ulong_t		scpu_fault;
219*0Sstevel@tonic-gate 		ulong_t		scpu_pagecreate;
220*0Sstevel@tonic-gate 		ulong_t		scpu_get_reuse;
221*0Sstevel@tonic-gate 	} scpu;
222*0Sstevel@tonic-gate 	char	scpu_pad[SEGMAP_CACHE_PAD];
223*0Sstevel@tonic-gate };
224*0Sstevel@tonic-gate static union segmap_cpu *smd_cpu;
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate /*
227*0Sstevel@tonic-gate  * There are three locks in seg_map:
228*0Sstevel@tonic-gate  *	- per freelist mutexes
229*0Sstevel@tonic-gate  *	- per hashchain mutexes
230*0Sstevel@tonic-gate  *	- per smap mutexes
231*0Sstevel@tonic-gate  *
232*0Sstevel@tonic-gate  * The lock ordering is to get the smap mutex to lock down the slot
233*0Sstevel@tonic-gate  * first then the hash lock (for hash in/out (vp, off) list) or the
234*0Sstevel@tonic-gate  * freelist lock to put the slot back on the free list.
235*0Sstevel@tonic-gate  *
236*0Sstevel@tonic-gate  * The hash search is done by only holding the hashchain lock, when a wanted
237*0Sstevel@tonic-gate  * slot is found, we drop the hashchain lock then lock the slot so there
238*0Sstevel@tonic-gate  * is no overlapping of hashchain and smap locks. After the slot is
239*0Sstevel@tonic-gate  * locked, we verify again if the slot is still what we are looking
240*0Sstevel@tonic-gate  * for.
241*0Sstevel@tonic-gate  *
242*0Sstevel@tonic-gate  * Allocation of a free slot is done by holding the freelist lock,
243*0Sstevel@tonic-gate  * then locking the smap slot at the head of the freelist. This is
244*0Sstevel@tonic-gate  * in reversed lock order so mutex_tryenter() is used.
245*0Sstevel@tonic-gate  *
246*0Sstevel@tonic-gate  * The smap lock protects all fields in smap structure except for
247*0Sstevel@tonic-gate  * the link fields for hash/free lists which are protected by
248*0Sstevel@tonic-gate  * hashchain and freelist locks.
249*0Sstevel@tonic-gate  */
250*0Sstevel@tonic-gate 
251*0Sstevel@tonic-gate #define	SHASHMTX(hashid)	(&smd_hash[hashid].sh_mtx)
252*0Sstevel@tonic-gate 
253*0Sstevel@tonic-gate #define	SMP2SMF(smp)		(&smd_free[(smp - smd_smap) & smd_freemsk])
254*0Sstevel@tonic-gate #define	SMP2SMF_NDX(smp)	(ushort_t)((smp - smd_smap) & smd_freemsk)
255*0Sstevel@tonic-gate 
256*0Sstevel@tonic-gate #define	SMAPMTX(smp) (&smp->sm_mtx)
257*0Sstevel@tonic-gate 
258*0Sstevel@tonic-gate #define	SMAP_HASHFUNC(vp, off, hashid) \
259*0Sstevel@tonic-gate 	{ \
260*0Sstevel@tonic-gate 	hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
261*0Sstevel@tonic-gate 		((off) >> MAXBSHIFT)) & smd_hashmsk); \
262*0Sstevel@tonic-gate 	}
263*0Sstevel@tonic-gate 
264*0Sstevel@tonic-gate /*
265*0Sstevel@tonic-gate  * The most frequently updated kstat counters are kept in the
266*0Sstevel@tonic-gate  * per cpu array to avoid hot cache blocks. The update function
267*0Sstevel@tonic-gate  * sums the cpu local counters to update the global counters.
268*0Sstevel@tonic-gate  */
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate /* ARGSUSED */
271*0Sstevel@tonic-gate int
272*0Sstevel@tonic-gate segmap_kstat_update(kstat_t *ksp, int rw)
273*0Sstevel@tonic-gate {
274*0Sstevel@tonic-gate 	int i;
275*0Sstevel@tonic-gate 	ulong_t	getmap, release, get_reclaim;
276*0Sstevel@tonic-gate 	ulong_t	fault, pagecreate, get_reuse;
277*0Sstevel@tonic-gate 
278*0Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
279*0Sstevel@tonic-gate 		return (EACCES);
280*0Sstevel@tonic-gate 	getmap = release = get_reclaim = (ulong_t)0;
281*0Sstevel@tonic-gate 	fault = pagecreate = get_reuse = (ulong_t)0;
282*0Sstevel@tonic-gate 	for (i = 0; i < max_ncpus; i++) {
283*0Sstevel@tonic-gate 		getmap += smd_cpu[i].scpu.scpu_getmap;
284*0Sstevel@tonic-gate 		release  += smd_cpu[i].scpu.scpu_release;
285*0Sstevel@tonic-gate 		get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
286*0Sstevel@tonic-gate 		fault  += smd_cpu[i].scpu.scpu_fault;
287*0Sstevel@tonic-gate 		pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
288*0Sstevel@tonic-gate 		get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
289*0Sstevel@tonic-gate 	}
290*0Sstevel@tonic-gate 	segmapcnt.smp_getmap.value.ul = getmap;
291*0Sstevel@tonic-gate 	segmapcnt.smp_release.value.ul = release;
292*0Sstevel@tonic-gate 	segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
293*0Sstevel@tonic-gate 	segmapcnt.smp_fault.value.ul = fault;
294*0Sstevel@tonic-gate 	segmapcnt.smp_pagecreate.value.ul = pagecreate;
295*0Sstevel@tonic-gate 	segmapcnt.smp_get_reuse.value.ul = get_reuse;
296*0Sstevel@tonic-gate 	return (0);
297*0Sstevel@tonic-gate }
298*0Sstevel@tonic-gate 
299*0Sstevel@tonic-gate int
300*0Sstevel@tonic-gate segmap_create(struct seg *seg, void *argsp)
301*0Sstevel@tonic-gate {
302*0Sstevel@tonic-gate 	struct segmap_data *smd;
303*0Sstevel@tonic-gate 	struct smap *smp;
304*0Sstevel@tonic-gate 	struct smfree *sm;
305*0Sstevel@tonic-gate 	struct segmap_crargs *a = (struct segmap_crargs *)argsp;
306*0Sstevel@tonic-gate 	struct smaphash *shashp;
307*0Sstevel@tonic-gate 	union segmap_cpu *scpu;
308*0Sstevel@tonic-gate 	long i, npages;
309*0Sstevel@tonic-gate 	size_t hashsz;
310*0Sstevel@tonic-gate 	uint_t nfreelist;
311*0Sstevel@tonic-gate 	extern void prefetch_smap_w(void *);
312*0Sstevel@tonic-gate 	extern int max_ncpus;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
315*0Sstevel@tonic-gate 
316*0Sstevel@tonic-gate 	if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
317*0Sstevel@tonic-gate 		panic("segkmap not MAXBSIZE aligned");
318*0Sstevel@tonic-gate 		/*NOTREACHED*/
319*0Sstevel@tonic-gate 	}
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 	smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
322*0Sstevel@tonic-gate 
323*0Sstevel@tonic-gate 	seg->s_data = (void *)smd;
324*0Sstevel@tonic-gate 	seg->s_ops = &segmap_ops;
325*0Sstevel@tonic-gate 	smd->smd_prot = a->prot;
326*0Sstevel@tonic-gate 
327*0Sstevel@tonic-gate 	/*
328*0Sstevel@tonic-gate 	 * Scale the number of smap freelists to be
329*0Sstevel@tonic-gate 	 * proportional to max_ncpus * number of virtual colors.
330*0Sstevel@tonic-gate 	 * The caller can over-ride this scaling by providing
331*0Sstevel@tonic-gate 	 * a non-zero a->nfreelist argument.
332*0Sstevel@tonic-gate 	 */
333*0Sstevel@tonic-gate 	nfreelist = a->nfreelist;
334*0Sstevel@tonic-gate 	if (nfreelist == 0)
335*0Sstevel@tonic-gate 		nfreelist = max_ncpus;
336*0Sstevel@tonic-gate 	else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
337*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
338*0Sstevel@tonic-gate 		"%d, using %d", nfreelist, max_ncpus);
339*0Sstevel@tonic-gate 		nfreelist = max_ncpus;
340*0Sstevel@tonic-gate 	}
341*0Sstevel@tonic-gate 	if (nfreelist & (nfreelist - 1)) {
342*0Sstevel@tonic-gate 		/* round up nfreelist to the next power of two. */
343*0Sstevel@tonic-gate 		nfreelist = 1 << (highbit(nfreelist));
344*0Sstevel@tonic-gate 	}
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate 	/*
347*0Sstevel@tonic-gate 	 * Get the number of virtual colors - must be a power of 2.
348*0Sstevel@tonic-gate 	 */
349*0Sstevel@tonic-gate 	if (a->shmsize)
350*0Sstevel@tonic-gate 		smd_ncolor = a->shmsize >> MAXBSHIFT;
351*0Sstevel@tonic-gate 	else
352*0Sstevel@tonic-gate 		smd_ncolor = 1;
353*0Sstevel@tonic-gate 	ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
354*0Sstevel@tonic-gate 	ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
355*0Sstevel@tonic-gate 	smd_colormsk = smd_ncolor - 1;
356*0Sstevel@tonic-gate 	smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
357*0Sstevel@tonic-gate 	smd_freemsk = smd_nfree - 1;
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	/*
360*0Sstevel@tonic-gate 	 * Allocate and initialize the freelist headers.
361*0Sstevel@tonic-gate 	 * Note that sm_freeq[1] starts out as the release queue. This
362*0Sstevel@tonic-gate 	 * is known when the smap structures are initialized below.
363*0Sstevel@tonic-gate 	 */
364*0Sstevel@tonic-gate 	smd_free = smd->smd_free =
365*0Sstevel@tonic-gate 	    kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
366*0Sstevel@tonic-gate 	for (i = 0; i < smd_nfree; i++) {
367*0Sstevel@tonic-gate 		sm = &smd->smd_free[i];
368*0Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
369*0Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
370*0Sstevel@tonic-gate 		sm->sm_allocq = &sm->sm_freeq[0];
371*0Sstevel@tonic-gate 		sm->sm_releq = &sm->sm_freeq[1];
372*0Sstevel@tonic-gate 	}
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate 	/*
375*0Sstevel@tonic-gate 	 * Allocate and initialize the smap hash chain headers.
376*0Sstevel@tonic-gate 	 * Compute hash size rounding down to the next power of two.
377*0Sstevel@tonic-gate 	 */
378*0Sstevel@tonic-gate 	npages = MAP_PAGES(seg);
379*0Sstevel@tonic-gate 	smd->smd_npages = npages;
380*0Sstevel@tonic-gate 	hashsz = npages / SMAP_HASHAVELEN;
381*0Sstevel@tonic-gate 	hashsz = 1 << (highbit(hashsz)-1);
382*0Sstevel@tonic-gate 	smd_hashmsk = hashsz - 1;
383*0Sstevel@tonic-gate 	smd_hash = smd->smd_hash =
384*0Sstevel@tonic-gate 	    kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
385*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
386*0Sstevel@tonic-gate 	smd_hash_len =
387*0Sstevel@tonic-gate 	    kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
388*0Sstevel@tonic-gate #endif
389*0Sstevel@tonic-gate 	for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
390*0Sstevel@tonic-gate 		shashp->sh_hash_list = NULL;
391*0Sstevel@tonic-gate 		mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
392*0Sstevel@tonic-gate 	}
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate 	/*
395*0Sstevel@tonic-gate 	 * Allocate and initialize the smap structures.
396*0Sstevel@tonic-gate 	 * Link all slots onto the appropriate freelist.
397*0Sstevel@tonic-gate 	 * The smap array is large enough to affect boot time
398*0Sstevel@tonic-gate 	 * on large systems, so use memory prefetching and only
399*0Sstevel@tonic-gate 	 * go through the array 1 time. Inline a optimized version
400*0Sstevel@tonic-gate 	 * of segmap_smapadd to add structures to freelists with
401*0Sstevel@tonic-gate 	 * knowledge that no locks are needed here.
402*0Sstevel@tonic-gate 	 */
403*0Sstevel@tonic-gate 	smd_smap = smd->smd_sm =
404*0Sstevel@tonic-gate 		kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate 	for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
407*0Sstevel@tonic-gate 	    smp >= smd->smd_sm; smp--) {
408*0Sstevel@tonic-gate 		struct smap *smpfreelist;
409*0Sstevel@tonic-gate 		struct sm_freeq *releq;
410*0Sstevel@tonic-gate 
411*0Sstevel@tonic-gate 		prefetch_smap_w((char *)smp);
412*0Sstevel@tonic-gate 
413*0Sstevel@tonic-gate 		smp->sm_vp = NULL;
414*0Sstevel@tonic-gate 		smp->sm_hash = NULL;
415*0Sstevel@tonic-gate 		smp->sm_off = 0;
416*0Sstevel@tonic-gate 		smp->sm_bitmap = 0;
417*0Sstevel@tonic-gate 		smp->sm_refcnt = 0;
418*0Sstevel@tonic-gate 		mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
419*0Sstevel@tonic-gate 		smp->sm_free_ndx = SMP2SMF_NDX(smp);
420*0Sstevel@tonic-gate 
421*0Sstevel@tonic-gate 		sm = SMP2SMF(smp);
422*0Sstevel@tonic-gate 		releq = sm->sm_releq;
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate 		smpfreelist = releq->smq_free;
425*0Sstevel@tonic-gate 		if (smpfreelist == 0) {
426*0Sstevel@tonic-gate 			releq->smq_free = smp->sm_next = smp->sm_prev = smp;
427*0Sstevel@tonic-gate 		} else {
428*0Sstevel@tonic-gate 			smp->sm_next = smpfreelist;
429*0Sstevel@tonic-gate 			smp->sm_prev = smpfreelist->sm_prev;
430*0Sstevel@tonic-gate 			smpfreelist->sm_prev = smp;
431*0Sstevel@tonic-gate 			smp->sm_prev->sm_next = smp;
432*0Sstevel@tonic-gate 			releq->smq_free = smp->sm_next;
433*0Sstevel@tonic-gate 		}
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate 		/*
436*0Sstevel@tonic-gate 		 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
437*0Sstevel@tonic-gate 		 */
438*0Sstevel@tonic-gate 		smp->sm_flags = 0;
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
441*0Sstevel@tonic-gate 		/*
442*0Sstevel@tonic-gate 		 * Due to the fragile prefetch loop no
443*0Sstevel@tonic-gate 		 * separate function is used here.
444*0Sstevel@tonic-gate 		 */
445*0Sstevel@tonic-gate 		smp->sm_kpme_next = NULL;
446*0Sstevel@tonic-gate 		smp->sm_kpme_prev = NULL;
447*0Sstevel@tonic-gate 		smp->sm_kpme_page = NULL;
448*0Sstevel@tonic-gate #endif
449*0Sstevel@tonic-gate 	}
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 	/*
452*0Sstevel@tonic-gate 	 * Allocate the per color indices that distribute allocation
453*0Sstevel@tonic-gate 	 * requests over the free lists. Each cpu will have a private
454*0Sstevel@tonic-gate 	 * rotor index to spread the allocations even across the available
455*0Sstevel@tonic-gate 	 * smap freelists. Init the scpu_last_smap field to the first
456*0Sstevel@tonic-gate 	 * smap element so there is no need to check for NULL.
457*0Sstevel@tonic-gate 	 */
458*0Sstevel@tonic-gate 	smd_cpu =
459*0Sstevel@tonic-gate 		kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
460*0Sstevel@tonic-gate 	for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
461*0Sstevel@tonic-gate 		int j;
462*0Sstevel@tonic-gate 		for (j = 0; j < smd_ncolor; j++)
463*0Sstevel@tonic-gate 			scpu->scpu.scpu_free_ndx[j] = j;
464*0Sstevel@tonic-gate 		scpu->scpu.scpu_last_smap = smd_smap;
465*0Sstevel@tonic-gate 	}
466*0Sstevel@tonic-gate 
467*0Sstevel@tonic-gate #ifdef DEBUG
468*0Sstevel@tonic-gate 	/*
469*0Sstevel@tonic-gate 	 * Keep track of which colors are used more often.
470*0Sstevel@tonic-gate 	 */
471*0Sstevel@tonic-gate 	colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
472*0Sstevel@tonic-gate #endif /* DEBUG */
473*0Sstevel@tonic-gate 
474*0Sstevel@tonic-gate 	return (0);
475*0Sstevel@tonic-gate }
476*0Sstevel@tonic-gate 
477*0Sstevel@tonic-gate static void
478*0Sstevel@tonic-gate segmap_free(seg)
479*0Sstevel@tonic-gate 	struct seg *seg;
480*0Sstevel@tonic-gate {
481*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
482*0Sstevel@tonic-gate }
483*0Sstevel@tonic-gate 
484*0Sstevel@tonic-gate /*
485*0Sstevel@tonic-gate  * Do a F_SOFTUNLOCK call over the range requested.
486*0Sstevel@tonic-gate  * The range must have already been F_SOFTLOCK'ed.
487*0Sstevel@tonic-gate  */
488*0Sstevel@tonic-gate static void
489*0Sstevel@tonic-gate segmap_unlock(
490*0Sstevel@tonic-gate 	struct hat *hat,
491*0Sstevel@tonic-gate 	struct seg *seg,
492*0Sstevel@tonic-gate 	caddr_t addr,
493*0Sstevel@tonic-gate 	size_t len,
494*0Sstevel@tonic-gate 	enum seg_rw rw,
495*0Sstevel@tonic-gate 	struct smap *smp)
496*0Sstevel@tonic-gate {
497*0Sstevel@tonic-gate 	page_t *pp;
498*0Sstevel@tonic-gate 	caddr_t adr;
499*0Sstevel@tonic-gate 	u_offset_t off;
500*0Sstevel@tonic-gate 	struct vnode *vp;
501*0Sstevel@tonic-gate 	kmutex_t *smtx;
502*0Sstevel@tonic-gate 
503*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
504*0Sstevel@tonic-gate 
505*0Sstevel@tonic-gate #ifdef lint
506*0Sstevel@tonic-gate 	seg = seg;
507*0Sstevel@tonic-gate #endif
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
510*0Sstevel@tonic-gate 
511*0Sstevel@tonic-gate 		/*
512*0Sstevel@tonic-gate 		 * We're called only from segmap_fault and this was a
513*0Sstevel@tonic-gate 		 * NOP in case of a kpm based smap, so dangerous things
514*0Sstevel@tonic-gate 		 * must have happened in the meantime. Pages are prefaulted
515*0Sstevel@tonic-gate 		 * and locked in segmap_getmapflt and they will not be
516*0Sstevel@tonic-gate 		 * unlocked until segmap_release.
517*0Sstevel@tonic-gate 		 */
518*0Sstevel@tonic-gate 		panic("segmap_unlock: called with kpm addr %p", (void *)addr);
519*0Sstevel@tonic-gate 		/*NOTREACHED*/
520*0Sstevel@tonic-gate 	}
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate 	vp = smp->sm_vp;
523*0Sstevel@tonic-gate 	off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
524*0Sstevel@tonic-gate 
525*0Sstevel@tonic-gate 	hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
526*0Sstevel@tonic-gate 	for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
527*0Sstevel@tonic-gate 		ushort_t bitmask;
528*0Sstevel@tonic-gate 
529*0Sstevel@tonic-gate 		/*
530*0Sstevel@tonic-gate 		 * Use page_find() instead of page_lookup() to
531*0Sstevel@tonic-gate 		 * find the page since we know that it has
532*0Sstevel@tonic-gate 		 * "shared" lock.
533*0Sstevel@tonic-gate 		 */
534*0Sstevel@tonic-gate 		pp = page_find(vp, off);
535*0Sstevel@tonic-gate 		if (pp == NULL) {
536*0Sstevel@tonic-gate 			panic("segmap_unlock: page not found");
537*0Sstevel@tonic-gate 			/*NOTREACHED*/
538*0Sstevel@tonic-gate 		}
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate 		if (rw == S_WRITE) {
541*0Sstevel@tonic-gate 			hat_setrefmod(pp);
542*0Sstevel@tonic-gate 		} else if (rw != S_OTHER) {
543*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
544*0Sstevel@tonic-gate 				"segmap_fault:pp %p vp %p offset %llx",
545*0Sstevel@tonic-gate 				pp, vp, off);
546*0Sstevel@tonic-gate 			hat_setref(pp);
547*0Sstevel@tonic-gate 		}
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 		/*
550*0Sstevel@tonic-gate 		 * Clear bitmap, if the bit corresponding to "off" is set,
551*0Sstevel@tonic-gate 		 * since the page and translation are being unlocked.
552*0Sstevel@tonic-gate 		 */
553*0Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 		/*
556*0Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
557*0Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
558*0Sstevel@tonic-gate 		 */
559*0Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
560*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
561*0Sstevel@tonic-gate 		mutex_enter(smtx);
562*0Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
563*0Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
564*0Sstevel@tonic-gate 		}
565*0Sstevel@tonic-gate 		mutex_exit(smtx);
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate 		page_unlock(pp);
568*0Sstevel@tonic-gate 	}
569*0Sstevel@tonic-gate }
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate #define	MAXPPB	(MAXBSIZE/4096)	/* assumes minimum page size of 4k */
572*0Sstevel@tonic-gate 
573*0Sstevel@tonic-gate /*
574*0Sstevel@tonic-gate  * This routine is called via a machine specific fault handling
575*0Sstevel@tonic-gate  * routine.  It is also called by software routines wishing to
576*0Sstevel@tonic-gate  * lock or unlock a range of addresses.
577*0Sstevel@tonic-gate  *
578*0Sstevel@tonic-gate  * Note that this routine expects a page-aligned "addr".
579*0Sstevel@tonic-gate  */
580*0Sstevel@tonic-gate faultcode_t
581*0Sstevel@tonic-gate segmap_fault(
582*0Sstevel@tonic-gate 	struct hat *hat,
583*0Sstevel@tonic-gate 	struct seg *seg,
584*0Sstevel@tonic-gate 	caddr_t addr,
585*0Sstevel@tonic-gate 	size_t len,
586*0Sstevel@tonic-gate 	enum fault_type type,
587*0Sstevel@tonic-gate 	enum seg_rw rw)
588*0Sstevel@tonic-gate {
589*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
590*0Sstevel@tonic-gate 	struct smap *smp;
591*0Sstevel@tonic-gate 	page_t *pp, **ppp;
592*0Sstevel@tonic-gate 	struct vnode *vp;
593*0Sstevel@tonic-gate 	u_offset_t off;
594*0Sstevel@tonic-gate 	page_t *pl[MAXPPB + 1];
595*0Sstevel@tonic-gate 	uint_t prot;
596*0Sstevel@tonic-gate 	u_offset_t addroff;
597*0Sstevel@tonic-gate 	caddr_t adr;
598*0Sstevel@tonic-gate 	int err;
599*0Sstevel@tonic-gate 	u_offset_t sm_off;
600*0Sstevel@tonic-gate 	int hat_flag;
601*0Sstevel@tonic-gate 
602*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
603*0Sstevel@tonic-gate 		int newpage;
604*0Sstevel@tonic-gate 		kmutex_t *smtx;
605*0Sstevel@tonic-gate 
606*0Sstevel@tonic-gate 		/*
607*0Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
608*0Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
609*0Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
610*0Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
611*0Sstevel@tonic-gate 		 * caller owns an active kpm addr.
612*0Sstevel@tonic-gate 		 */
613*0Sstevel@tonic-gate #ifndef DEBUG
614*0Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK)
615*0Sstevel@tonic-gate 			return (0);
616*0Sstevel@tonic-gate #endif
617*0Sstevel@tonic-gate 
618*0Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
619*0Sstevel@tonic-gate 			panic("segmap_fault: smap not found "
620*0Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
621*0Sstevel@tonic-gate 			/*NOTREACHED*/
622*0Sstevel@tonic-gate 		}
623*0Sstevel@tonic-gate 
624*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
625*0Sstevel@tonic-gate #ifdef	DEBUG
626*0Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
627*0Sstevel@tonic-gate 		if (newpage) {
628*0Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
629*0Sstevel@tonic-gate 				(void *)smp);
630*0Sstevel@tonic-gate 		}
631*0Sstevel@tonic-gate 
632*0Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK) {
633*0Sstevel@tonic-gate 			mutex_exit(smtx);
634*0Sstevel@tonic-gate 			return (0);
635*0Sstevel@tonic-gate 		}
636*0Sstevel@tonic-gate #endif
637*0Sstevel@tonic-gate 		mutex_exit(smtx);
638*0Sstevel@tonic-gate 		vp = smp->sm_vp;
639*0Sstevel@tonic-gate 		sm_off = smp->sm_off;
640*0Sstevel@tonic-gate 
641*0Sstevel@tonic-gate 		if (vp == NULL)
642*0Sstevel@tonic-gate 			return (FC_MAKE_ERR(EIO));
643*0Sstevel@tonic-gate 
644*0Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
645*0Sstevel@tonic-gate 
646*0Sstevel@tonic-gate 		addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
647*0Sstevel@tonic-gate 		if (addroff + len > MAXBSIZE)
648*0Sstevel@tonic-gate 			panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
649*0Sstevel@tonic-gate 			    (void *)(addr + len));
650*0Sstevel@tonic-gate 
651*0Sstevel@tonic-gate 		off = sm_off + addroff;
652*0Sstevel@tonic-gate 
653*0Sstevel@tonic-gate 		pp = page_find(vp, off);
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 		if (pp == NULL)
656*0Sstevel@tonic-gate 			panic("segmap_fault: softunlock page not found");
657*0Sstevel@tonic-gate 
658*0Sstevel@tonic-gate 		/*
659*0Sstevel@tonic-gate 		 * Set ref bit also here in case of S_OTHER to avoid the
660*0Sstevel@tonic-gate 		 * overhead of supporting other cases than F_SOFTUNLOCK
661*0Sstevel@tonic-gate 		 * with segkpm. We can do this because the underlying
662*0Sstevel@tonic-gate 		 * pages are locked anyway.
663*0Sstevel@tonic-gate 		 */
664*0Sstevel@tonic-gate 		if (rw == S_WRITE) {
665*0Sstevel@tonic-gate 			hat_setrefmod(pp);
666*0Sstevel@tonic-gate 		} else {
667*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
668*0Sstevel@tonic-gate 				"segmap_fault:pp %p vp %p offset %llx",
669*0Sstevel@tonic-gate 				pp, vp, off);
670*0Sstevel@tonic-gate 			hat_setref(pp);
671*0Sstevel@tonic-gate 		}
672*0Sstevel@tonic-gate 
673*0Sstevel@tonic-gate 		return (0);
674*0Sstevel@tonic-gate 	}
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
677*0Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
678*0Sstevel@tonic-gate 	vp = smp->sm_vp;
679*0Sstevel@tonic-gate 	sm_off = smp->sm_off;
680*0Sstevel@tonic-gate 
681*0Sstevel@tonic-gate 	if (vp == NULL)
682*0Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
683*0Sstevel@tonic-gate 
684*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
685*0Sstevel@tonic-gate 
686*0Sstevel@tonic-gate 	addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
687*0Sstevel@tonic-gate 	if (addroff + len > MAXBSIZE) {
688*0Sstevel@tonic-gate 		panic("segmap_fault: endaddr %p "
689*0Sstevel@tonic-gate 		    "exceeds MAXBSIZE chunk", (void *)(addr + len));
690*0Sstevel@tonic-gate 		/*NOTREACHED*/
691*0Sstevel@tonic-gate 	}
692*0Sstevel@tonic-gate 	off = sm_off + addroff;
693*0Sstevel@tonic-gate 
694*0Sstevel@tonic-gate 	/*
695*0Sstevel@tonic-gate 	 * First handle the easy stuff
696*0Sstevel@tonic-gate 	 */
697*0Sstevel@tonic-gate 	if (type == F_SOFTUNLOCK) {
698*0Sstevel@tonic-gate 		segmap_unlock(hat, seg, addr, len, rw, smp);
699*0Sstevel@tonic-gate 		return (0);
700*0Sstevel@tonic-gate 	}
701*0Sstevel@tonic-gate 
702*0Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
703*0Sstevel@tonic-gate 		"segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
704*0Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
705*0Sstevel@tonic-gate 	    seg, addr, rw, CRED());
706*0Sstevel@tonic-gate 
707*0Sstevel@tonic-gate 	if (err)
708*0Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
709*0Sstevel@tonic-gate 
710*0Sstevel@tonic-gate 	prot &= smd->smd_prot;
711*0Sstevel@tonic-gate 
712*0Sstevel@tonic-gate 	/*
713*0Sstevel@tonic-gate 	 * Handle all pages returned in the pl[] array.
714*0Sstevel@tonic-gate 	 * This loop is coded on the assumption that if
715*0Sstevel@tonic-gate 	 * there was no error from the VOP_GETPAGE routine,
716*0Sstevel@tonic-gate 	 * that the page list returned will contain all the
717*0Sstevel@tonic-gate 	 * needed pages for the vp from [off..off + len].
718*0Sstevel@tonic-gate 	 */
719*0Sstevel@tonic-gate 	ppp = pl;
720*0Sstevel@tonic-gate 	while ((pp = *ppp++) != NULL) {
721*0Sstevel@tonic-gate 		u_offset_t poff;
722*0Sstevel@tonic-gate 		ASSERT(pp->p_vnode == vp);
723*0Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate 		/*
726*0Sstevel@tonic-gate 		 * Verify that the pages returned are within the range
727*0Sstevel@tonic-gate 		 * of this segmap region.  Note that it is theoretically
728*0Sstevel@tonic-gate 		 * possible for pages outside this range to be returned,
729*0Sstevel@tonic-gate 		 * but it is not very likely.  If we cannot use the
730*0Sstevel@tonic-gate 		 * page here, just release it and go on to the next one.
731*0Sstevel@tonic-gate 		 */
732*0Sstevel@tonic-gate 		if (pp->p_offset < sm_off ||
733*0Sstevel@tonic-gate 		    pp->p_offset >= sm_off + MAXBSIZE) {
734*0Sstevel@tonic-gate 			(void) page_release(pp, 1);
735*0Sstevel@tonic-gate 			continue;
736*0Sstevel@tonic-gate 		}
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 		ASSERT(hat == kas.a_hat);
739*0Sstevel@tonic-gate 		poff = pp->p_offset;
740*0Sstevel@tonic-gate 		adr = addr + (poff - off);
741*0Sstevel@tonic-gate 		if (adr >= addr && adr < addr + len) {
742*0Sstevel@tonic-gate 			hat_setref(pp);
743*0Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
744*0Sstevel@tonic-gate 			    "segmap_fault:pp %p vp %p offset %llx",
745*0Sstevel@tonic-gate 			    pp, vp, poff);
746*0Sstevel@tonic-gate 			if (type == F_SOFTLOCK)
747*0Sstevel@tonic-gate 				hat_flag = HAT_LOAD_LOCK;
748*0Sstevel@tonic-gate 		}
749*0Sstevel@tonic-gate 
750*0Sstevel@tonic-gate 		/*
751*0Sstevel@tonic-gate 		 * Deal with VMODSORT pages here. If we know this is a write
752*0Sstevel@tonic-gate 		 * do the setmod now and allow write protection.
753*0Sstevel@tonic-gate 		 * As long as it's modified or not S_OTHER, remove write
754*0Sstevel@tonic-gate 		 * protection. With S_OTHER it's up to the FS to deal with this.
755*0Sstevel@tonic-gate 		 */
756*0Sstevel@tonic-gate 		if (IS_VMODSORT(vp)) {
757*0Sstevel@tonic-gate 			if (rw == S_WRITE)
758*0Sstevel@tonic-gate 				hat_setmod(pp);
759*0Sstevel@tonic-gate 			else if (rw != S_OTHER && !hat_ismod(pp))
760*0Sstevel@tonic-gate 				prot &= ~PROT_WRITE;
761*0Sstevel@tonic-gate 		}
762*0Sstevel@tonic-gate 
763*0Sstevel@tonic-gate 		hat_memload(hat, adr, pp, prot, hat_flag);
764*0Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
765*0Sstevel@tonic-gate 			page_unlock(pp);
766*0Sstevel@tonic-gate 	}
767*0Sstevel@tonic-gate 	return (0);
768*0Sstevel@tonic-gate }
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate /*
771*0Sstevel@tonic-gate  * This routine is used to start I/O on pages asynchronously.
772*0Sstevel@tonic-gate  */
773*0Sstevel@tonic-gate static faultcode_t
774*0Sstevel@tonic-gate segmap_faulta(struct seg *seg, caddr_t addr)
775*0Sstevel@tonic-gate {
776*0Sstevel@tonic-gate 	struct smap *smp;
777*0Sstevel@tonic-gate 	struct vnode *vp;
778*0Sstevel@tonic-gate 	u_offset_t off;
779*0Sstevel@tonic-gate 	int err;
780*0Sstevel@tonic-gate 
781*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
782*0Sstevel@tonic-gate 		int	newpage;
783*0Sstevel@tonic-gate 		kmutex_t *smtx;
784*0Sstevel@tonic-gate 
785*0Sstevel@tonic-gate 		/*
786*0Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
787*0Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
788*0Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
789*0Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
790*0Sstevel@tonic-gate 		 * caller owns an active kpm addr.
791*0Sstevel@tonic-gate 		 */
792*0Sstevel@tonic-gate #ifdef	DEBUG
793*0Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
794*0Sstevel@tonic-gate 			panic("segmap_faulta: smap not found "
795*0Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
796*0Sstevel@tonic-gate 			/*NOTREACHED*/
797*0Sstevel@tonic-gate 		}
798*0Sstevel@tonic-gate 
799*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
800*0Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
801*0Sstevel@tonic-gate 		mutex_exit(smtx);
802*0Sstevel@tonic-gate 		if (newpage)
803*0Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
804*0Sstevel@tonic-gate 			    (void *)smp);
805*0Sstevel@tonic-gate #endif
806*0Sstevel@tonic-gate 		return (0);
807*0Sstevel@tonic-gate 	}
808*0Sstevel@tonic-gate 
809*0Sstevel@tonic-gate 	segmapcnt.smp_faulta.value.ul++;
810*0Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
811*0Sstevel@tonic-gate 
812*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
813*0Sstevel@tonic-gate 
814*0Sstevel@tonic-gate 	vp = smp->sm_vp;
815*0Sstevel@tonic-gate 	off = smp->sm_off;
816*0Sstevel@tonic-gate 
817*0Sstevel@tonic-gate 	if (vp == NULL) {
818*0Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_faulta - no vp");
819*0Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
820*0Sstevel@tonic-gate 	}
821*0Sstevel@tonic-gate 
822*0Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
823*0Sstevel@tonic-gate 		"segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
824*0Sstevel@tonic-gate 
825*0Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
826*0Sstevel@tonic-gate 	    & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
827*0Sstevel@tonic-gate 	    seg, addr, S_READ, CRED());
828*0Sstevel@tonic-gate 
829*0Sstevel@tonic-gate 	if (err)
830*0Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
831*0Sstevel@tonic-gate 	return (0);
832*0Sstevel@tonic-gate }
833*0Sstevel@tonic-gate 
834*0Sstevel@tonic-gate /*ARGSUSED*/
835*0Sstevel@tonic-gate static int
836*0Sstevel@tonic-gate segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
837*0Sstevel@tonic-gate {
838*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
839*0Sstevel@tonic-gate 
840*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
841*0Sstevel@tonic-gate 
842*0Sstevel@tonic-gate 	/*
843*0Sstevel@tonic-gate 	 * Need not acquire the segment lock since
844*0Sstevel@tonic-gate 	 * "smd_prot" is a read-only field.
845*0Sstevel@tonic-gate 	 */
846*0Sstevel@tonic-gate 	return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
847*0Sstevel@tonic-gate }
848*0Sstevel@tonic-gate 
849*0Sstevel@tonic-gate static int
850*0Sstevel@tonic-gate segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
851*0Sstevel@tonic-gate {
852*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
853*0Sstevel@tonic-gate 	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
854*0Sstevel@tonic-gate 
855*0Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
856*0Sstevel@tonic-gate 
857*0Sstevel@tonic-gate 	if (pgno != 0) {
858*0Sstevel@tonic-gate 		do
859*0Sstevel@tonic-gate 			protv[--pgno] = smd->smd_prot;
860*0Sstevel@tonic-gate 		while (pgno != 0);
861*0Sstevel@tonic-gate 	}
862*0Sstevel@tonic-gate 	return (0);
863*0Sstevel@tonic-gate }
864*0Sstevel@tonic-gate 
865*0Sstevel@tonic-gate static u_offset_t
866*0Sstevel@tonic-gate segmap_getoffset(struct seg *seg, caddr_t addr)
867*0Sstevel@tonic-gate {
868*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
869*0Sstevel@tonic-gate 
870*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
871*0Sstevel@tonic-gate 
872*0Sstevel@tonic-gate 	return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
873*0Sstevel@tonic-gate }
874*0Sstevel@tonic-gate 
875*0Sstevel@tonic-gate /*ARGSUSED*/
876*0Sstevel@tonic-gate static int
877*0Sstevel@tonic-gate segmap_gettype(struct seg *seg, caddr_t addr)
878*0Sstevel@tonic-gate {
879*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
880*0Sstevel@tonic-gate 
881*0Sstevel@tonic-gate 	return (MAP_SHARED);
882*0Sstevel@tonic-gate }
883*0Sstevel@tonic-gate 
884*0Sstevel@tonic-gate /*ARGSUSED*/
885*0Sstevel@tonic-gate static int
886*0Sstevel@tonic-gate segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
887*0Sstevel@tonic-gate {
888*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
889*0Sstevel@tonic-gate 
890*0Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
891*0Sstevel@tonic-gate 
892*0Sstevel@tonic-gate 	/* XXX - This doesn't make any sense */
893*0Sstevel@tonic-gate 	*vpp = smd->smd_sm->sm_vp;
894*0Sstevel@tonic-gate 	return (0);
895*0Sstevel@tonic-gate }
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate /*
898*0Sstevel@tonic-gate  * Check to see if it makes sense to do kluster/read ahead to
899*0Sstevel@tonic-gate  * addr + delta relative to the mapping at addr.  We assume here
900*0Sstevel@tonic-gate  * that delta is a signed PAGESIZE'd multiple (which can be negative).
901*0Sstevel@tonic-gate  *
902*0Sstevel@tonic-gate  * For segmap we always "approve" of this action from our standpoint.
903*0Sstevel@tonic-gate  */
904*0Sstevel@tonic-gate /*ARGSUSED*/
905*0Sstevel@tonic-gate static int
906*0Sstevel@tonic-gate segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
907*0Sstevel@tonic-gate {
908*0Sstevel@tonic-gate 	return (0);
909*0Sstevel@tonic-gate }
910*0Sstevel@tonic-gate 
911*0Sstevel@tonic-gate static void
912*0Sstevel@tonic-gate segmap_badop()
913*0Sstevel@tonic-gate {
914*0Sstevel@tonic-gate 	panic("segmap_badop");
915*0Sstevel@tonic-gate 	/*NOTREACHED*/
916*0Sstevel@tonic-gate }
917*0Sstevel@tonic-gate 
918*0Sstevel@tonic-gate /*
919*0Sstevel@tonic-gate  * Special private segmap operations
920*0Sstevel@tonic-gate  */
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate /*
923*0Sstevel@tonic-gate  * Add smap to the appropriate free list.
924*0Sstevel@tonic-gate  */
925*0Sstevel@tonic-gate static void
926*0Sstevel@tonic-gate segmap_smapadd(struct smap *smp)
927*0Sstevel@tonic-gate {
928*0Sstevel@tonic-gate 	struct smfree *sm;
929*0Sstevel@tonic-gate 	struct smap *smpfreelist;
930*0Sstevel@tonic-gate 	struct sm_freeq *releq;
931*0Sstevel@tonic-gate 
932*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
933*0Sstevel@tonic-gate 
934*0Sstevel@tonic-gate 	if (smp->sm_refcnt != 0) {
935*0Sstevel@tonic-gate 		panic("segmap_smapadd");
936*0Sstevel@tonic-gate 		/*NOTREACHED*/
937*0Sstevel@tonic-gate 	}
938*0Sstevel@tonic-gate 
939*0Sstevel@tonic-gate 	sm = &smd_free[smp->sm_free_ndx];
940*0Sstevel@tonic-gate 	/*
941*0Sstevel@tonic-gate 	 * Add to the tail of the release queue
942*0Sstevel@tonic-gate 	 * Note that sm_releq and sm_allocq could toggle
943*0Sstevel@tonic-gate 	 * before we get the lock. This does not affect
944*0Sstevel@tonic-gate 	 * correctness as the 2 queues are only maintained
945*0Sstevel@tonic-gate 	 * to reduce lock pressure.
946*0Sstevel@tonic-gate 	 */
947*0Sstevel@tonic-gate 	releq = sm->sm_releq;
948*0Sstevel@tonic-gate 	if (releq == &sm->sm_freeq[0])
949*0Sstevel@tonic-gate 		smp->sm_flags |= SM_QNDX_ZERO;
950*0Sstevel@tonic-gate 	else
951*0Sstevel@tonic-gate 		smp->sm_flags &= ~SM_QNDX_ZERO;
952*0Sstevel@tonic-gate 	mutex_enter(&releq->smq_mtx);
953*0Sstevel@tonic-gate 	smpfreelist = releq->smq_free;
954*0Sstevel@tonic-gate 	if (smpfreelist == 0) {
955*0Sstevel@tonic-gate 		int want;
956*0Sstevel@tonic-gate 
957*0Sstevel@tonic-gate 		releq->smq_free = smp->sm_next = smp->sm_prev = smp;
958*0Sstevel@tonic-gate 		/*
959*0Sstevel@tonic-gate 		 * Both queue mutexes held to set sm_want;
960*0Sstevel@tonic-gate 		 * snapshot the value before dropping releq mutex.
961*0Sstevel@tonic-gate 		 * If sm_want appears after the releq mutex is dropped,
962*0Sstevel@tonic-gate 		 * then the smap just freed is already gone.
963*0Sstevel@tonic-gate 		 */
964*0Sstevel@tonic-gate 		want = sm->sm_want;
965*0Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
966*0Sstevel@tonic-gate 		/*
967*0Sstevel@tonic-gate 		 * See if there was a waiter before dropping the releq mutex
968*0Sstevel@tonic-gate 		 * then recheck after obtaining sm_freeq[0] mutex as
969*0Sstevel@tonic-gate 		 * the another thread may have already signaled.
970*0Sstevel@tonic-gate 		 */
971*0Sstevel@tonic-gate 		if (want) {
972*0Sstevel@tonic-gate 			mutex_enter(&sm->sm_freeq[0].smq_mtx);
973*0Sstevel@tonic-gate 			if (sm->sm_want)
974*0Sstevel@tonic-gate 				cv_signal(&sm->sm_free_cv);
975*0Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
976*0Sstevel@tonic-gate 		}
977*0Sstevel@tonic-gate 	} else {
978*0Sstevel@tonic-gate 		smp->sm_next = smpfreelist;
979*0Sstevel@tonic-gate 		smp->sm_prev = smpfreelist->sm_prev;
980*0Sstevel@tonic-gate 		smpfreelist->sm_prev = smp;
981*0Sstevel@tonic-gate 		smp->sm_prev->sm_next = smp;
982*0Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
983*0Sstevel@tonic-gate 	}
984*0Sstevel@tonic-gate }
985*0Sstevel@tonic-gate 
986*0Sstevel@tonic-gate 
987*0Sstevel@tonic-gate static struct smap *
988*0Sstevel@tonic-gate segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
989*0Sstevel@tonic-gate {
990*0Sstevel@tonic-gate 	struct smap **hpp;
991*0Sstevel@tonic-gate 	struct smap *tmp;
992*0Sstevel@tonic-gate 	kmutex_t *hmtx;
993*0Sstevel@tonic-gate 
994*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
995*0Sstevel@tonic-gate 	ASSERT(smp->sm_vp == NULL);
996*0Sstevel@tonic-gate 	ASSERT(smp->sm_hash == NULL);
997*0Sstevel@tonic-gate 	ASSERT(smp->sm_prev == NULL);
998*0Sstevel@tonic-gate 	ASSERT(smp->sm_next == NULL);
999*0Sstevel@tonic-gate 	ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
1000*0Sstevel@tonic-gate 
1001*0Sstevel@tonic-gate 	hmtx = SHASHMTX(hashid);
1002*0Sstevel@tonic-gate 
1003*0Sstevel@tonic-gate 	mutex_enter(hmtx);
1004*0Sstevel@tonic-gate 	/*
1005*0Sstevel@tonic-gate 	 * First we need to verify that no one has created a smp
1006*0Sstevel@tonic-gate 	 * with (vp,off) as its tag before we us.
1007*0Sstevel@tonic-gate 	 */
1008*0Sstevel@tonic-gate 	for (tmp = smd_hash[hashid].sh_hash_list;
1009*0Sstevel@tonic-gate 	    tmp != NULL; tmp = tmp->sm_hash)
1010*0Sstevel@tonic-gate 		if (tmp->sm_vp == vp && tmp->sm_off == off)
1011*0Sstevel@tonic-gate 			break;
1012*0Sstevel@tonic-gate 
1013*0Sstevel@tonic-gate 	if (tmp == NULL) {
1014*0Sstevel@tonic-gate 		/*
1015*0Sstevel@tonic-gate 		 * No one created one yet.
1016*0Sstevel@tonic-gate 		 *
1017*0Sstevel@tonic-gate 		 * Funniness here - we don't increment the ref count on the
1018*0Sstevel@tonic-gate 		 * vnode * even though we have another pointer to it here.
1019*0Sstevel@tonic-gate 		 * The reason for this is that we don't want the fact that
1020*0Sstevel@tonic-gate 		 * a seg_map entry somewhere refers to a vnode to prevent the
1021*0Sstevel@tonic-gate 		 * vnode * itself from going away.  This is because this
1022*0Sstevel@tonic-gate 		 * reference to the vnode is a "soft one".  In the case where
1023*0Sstevel@tonic-gate 		 * a mapping is being used by a rdwr [or directory routine?]
1024*0Sstevel@tonic-gate 		 * there already has to be a non-zero ref count on the vnode.
1025*0Sstevel@tonic-gate 		 * In the case where the vp has been freed and the the smap
1026*0Sstevel@tonic-gate 		 * structure is on the free list, there are no pages in memory
1027*0Sstevel@tonic-gate 		 * that can refer to the vnode.  Thus even if we reuse the same
1028*0Sstevel@tonic-gate 		 * vnode/smap structure for a vnode which has the same
1029*0Sstevel@tonic-gate 		 * address but represents a different object, we are ok.
1030*0Sstevel@tonic-gate 		 */
1031*0Sstevel@tonic-gate 		smp->sm_vp = vp;
1032*0Sstevel@tonic-gate 		smp->sm_off = off;
1033*0Sstevel@tonic-gate 
1034*0Sstevel@tonic-gate 		hpp = &smd_hash[hashid].sh_hash_list;
1035*0Sstevel@tonic-gate 		smp->sm_hash = *hpp;
1036*0Sstevel@tonic-gate 		*hpp = smp;
1037*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
1038*0Sstevel@tonic-gate 		smd_hash_len[hashid]++;
1039*0Sstevel@tonic-gate #endif
1040*0Sstevel@tonic-gate 	}
1041*0Sstevel@tonic-gate 	mutex_exit(hmtx);
1042*0Sstevel@tonic-gate 
1043*0Sstevel@tonic-gate 	return (tmp);
1044*0Sstevel@tonic-gate }
1045*0Sstevel@tonic-gate 
1046*0Sstevel@tonic-gate static void
1047*0Sstevel@tonic-gate segmap_hashout(struct smap *smp)
1048*0Sstevel@tonic-gate {
1049*0Sstevel@tonic-gate 	struct smap **hpp, *hp;
1050*0Sstevel@tonic-gate 	struct vnode *vp;
1051*0Sstevel@tonic-gate 	kmutex_t *mtx;
1052*0Sstevel@tonic-gate 	int hashid;
1053*0Sstevel@tonic-gate 	u_offset_t off;
1054*0Sstevel@tonic-gate 
1055*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1056*0Sstevel@tonic-gate 
1057*0Sstevel@tonic-gate 	vp = smp->sm_vp;
1058*0Sstevel@tonic-gate 	off = smp->sm_off;
1059*0Sstevel@tonic-gate 
1060*0Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
1061*0Sstevel@tonic-gate 	mtx = SHASHMTX(hashid);
1062*0Sstevel@tonic-gate 	mutex_enter(mtx);
1063*0Sstevel@tonic-gate 
1064*0Sstevel@tonic-gate 	hpp = &smd_hash[hashid].sh_hash_list;
1065*0Sstevel@tonic-gate 	for (;;) {
1066*0Sstevel@tonic-gate 		hp = *hpp;
1067*0Sstevel@tonic-gate 		if (hp == NULL) {
1068*0Sstevel@tonic-gate 			panic("segmap_hashout");
1069*0Sstevel@tonic-gate 			/*NOTREACHED*/
1070*0Sstevel@tonic-gate 		}
1071*0Sstevel@tonic-gate 		if (hp == smp)
1072*0Sstevel@tonic-gate 			break;
1073*0Sstevel@tonic-gate 		hpp = &hp->sm_hash;
1074*0Sstevel@tonic-gate 	}
1075*0Sstevel@tonic-gate 
1076*0Sstevel@tonic-gate 	*hpp = smp->sm_hash;
1077*0Sstevel@tonic-gate 	smp->sm_hash = NULL;
1078*0Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
1079*0Sstevel@tonic-gate 	smd_hash_len[hashid]--;
1080*0Sstevel@tonic-gate #endif
1081*0Sstevel@tonic-gate 	mutex_exit(mtx);
1082*0Sstevel@tonic-gate 
1083*0Sstevel@tonic-gate 	smp->sm_vp = NULL;
1084*0Sstevel@tonic-gate 	smp->sm_off = (u_offset_t)0;
1085*0Sstevel@tonic-gate 
1086*0Sstevel@tonic-gate }
1087*0Sstevel@tonic-gate 
1088*0Sstevel@tonic-gate /*
1089*0Sstevel@tonic-gate  * Attempt to free unmodified, unmapped, and non locked segmap
1090*0Sstevel@tonic-gate  * pages.
1091*0Sstevel@tonic-gate  */
1092*0Sstevel@tonic-gate void
1093*0Sstevel@tonic-gate segmap_pagefree(struct vnode *vp, u_offset_t off)
1094*0Sstevel@tonic-gate {
1095*0Sstevel@tonic-gate 	u_offset_t pgoff;
1096*0Sstevel@tonic-gate 	page_t  *pp;
1097*0Sstevel@tonic-gate 
1098*0Sstevel@tonic-gate 	for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1099*0Sstevel@tonic-gate 
1100*0Sstevel@tonic-gate 		if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1101*0Sstevel@tonic-gate 			continue;
1102*0Sstevel@tonic-gate 
1103*0Sstevel@tonic-gate 		switch (page_release(pp, 1)) {
1104*0Sstevel@tonic-gate 		case PGREL_NOTREL:
1105*0Sstevel@tonic-gate 			segmapcnt.smp_free_notfree.value.ul++;
1106*0Sstevel@tonic-gate 			break;
1107*0Sstevel@tonic-gate 		case PGREL_MOD:
1108*0Sstevel@tonic-gate 			segmapcnt.smp_free_dirty.value.ul++;
1109*0Sstevel@tonic-gate 			break;
1110*0Sstevel@tonic-gate 		case PGREL_CLEAN:
1111*0Sstevel@tonic-gate 			segmapcnt.smp_free.value.ul++;
1112*0Sstevel@tonic-gate 			break;
1113*0Sstevel@tonic-gate 		}
1114*0Sstevel@tonic-gate 	}
1115*0Sstevel@tonic-gate }
1116*0Sstevel@tonic-gate 
1117*0Sstevel@tonic-gate /*
1118*0Sstevel@tonic-gate  * Locks held on entry: smap lock
1119*0Sstevel@tonic-gate  * Locks held on exit : smap lock.
1120*0Sstevel@tonic-gate  */
1121*0Sstevel@tonic-gate 
1122*0Sstevel@tonic-gate static void
1123*0Sstevel@tonic-gate grab_smp(struct smap *smp, page_t *pp)
1124*0Sstevel@tonic-gate {
1125*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1126*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt == 0);
1127*0Sstevel@tonic-gate 
1128*0Sstevel@tonic-gate 	if (smp->sm_vp != (struct vnode *)NULL) {
1129*0Sstevel@tonic-gate 		struct vnode	*vp = smp->sm_vp;
1130*0Sstevel@tonic-gate 		u_offset_t 	off = smp->sm_off;
1131*0Sstevel@tonic-gate 		/*
1132*0Sstevel@tonic-gate 		 * Destroy old vnode association and
1133*0Sstevel@tonic-gate 		 * unload any hardware translations to
1134*0Sstevel@tonic-gate 		 * the old object.
1135*0Sstevel@tonic-gate 		 */
1136*0Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1137*0Sstevel@tonic-gate 		segmap_hashout(smp);
1138*0Sstevel@tonic-gate 
1139*0Sstevel@tonic-gate 		/*
1140*0Sstevel@tonic-gate 		 * This node is off freelist and hashlist,
1141*0Sstevel@tonic-gate 		 * so there is no reason to drop/reacquire sm_mtx
1142*0Sstevel@tonic-gate 		 * across calls to hat_unload.
1143*0Sstevel@tonic-gate 		 */
1144*0Sstevel@tonic-gate 		if (segmap_kpm) {
1145*0Sstevel@tonic-gate 			caddr_t vaddr;
1146*0Sstevel@tonic-gate 			int hat_unload_needed = 0;
1147*0Sstevel@tonic-gate 
1148*0Sstevel@tonic-gate 			/*
1149*0Sstevel@tonic-gate 			 * unload kpm mapping
1150*0Sstevel@tonic-gate 			 */
1151*0Sstevel@tonic-gate 			if (pp != NULL) {
1152*0Sstevel@tonic-gate 				vaddr = hat_kpm_page2va(pp, 1);
1153*0Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1154*0Sstevel@tonic-gate 				page_unlock(pp);
1155*0Sstevel@tonic-gate 			}
1156*0Sstevel@tonic-gate 
1157*0Sstevel@tonic-gate 			/*
1158*0Sstevel@tonic-gate 			 * Check if we have (also) the rare case of a
1159*0Sstevel@tonic-gate 			 * non kpm mapping.
1160*0Sstevel@tonic-gate 			 */
1161*0Sstevel@tonic-gate 			if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1162*0Sstevel@tonic-gate 				hat_unload_needed = 1;
1163*0Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1164*0Sstevel@tonic-gate 			}
1165*0Sstevel@tonic-gate 
1166*0Sstevel@tonic-gate 			if (hat_unload_needed) {
1167*0Sstevel@tonic-gate 				hat_unload(kas.a_hat, segkmap->s_base +
1168*0Sstevel@tonic-gate 				    ((smp - smd_smap) * MAXBSIZE),
1169*0Sstevel@tonic-gate 				    MAXBSIZE, HAT_UNLOAD);
1170*0Sstevel@tonic-gate 			}
1171*0Sstevel@tonic-gate 
1172*0Sstevel@tonic-gate 		} else {
1173*0Sstevel@tonic-gate 			ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1174*0Sstevel@tonic-gate 			smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1175*0Sstevel@tonic-gate 			hat_unload(kas.a_hat, segkmap->s_base +
1176*0Sstevel@tonic-gate 			    ((smp - smd_smap) * MAXBSIZE),
1177*0Sstevel@tonic-gate 			    MAXBSIZE, HAT_UNLOAD);
1178*0Sstevel@tonic-gate 		}
1179*0Sstevel@tonic-gate 		segmap_pagefree(vp, off);
1180*0Sstevel@tonic-gate 	}
1181*0Sstevel@tonic-gate }
1182*0Sstevel@tonic-gate 
1183*0Sstevel@tonic-gate static struct smap *
1184*0Sstevel@tonic-gate get_free_smp(int free_ndx)
1185*0Sstevel@tonic-gate {
1186*0Sstevel@tonic-gate 	struct smfree *sm;
1187*0Sstevel@tonic-gate 	kmutex_t *smtx;
1188*0Sstevel@tonic-gate 	struct smap *smp, *first;
1189*0Sstevel@tonic-gate 	struct sm_freeq *allocq, *releq;
1190*0Sstevel@tonic-gate 	struct kpme *kpme;
1191*0Sstevel@tonic-gate 	page_t *pp = NULL;
1192*0Sstevel@tonic-gate 	int end_ndx, page_locked = 0;
1193*0Sstevel@tonic-gate 
1194*0Sstevel@tonic-gate 	end_ndx = free_ndx;
1195*0Sstevel@tonic-gate 	sm = &smd_free[free_ndx];
1196*0Sstevel@tonic-gate 
1197*0Sstevel@tonic-gate retry_queue:
1198*0Sstevel@tonic-gate 	allocq = sm->sm_allocq;
1199*0Sstevel@tonic-gate 	mutex_enter(&allocq->smq_mtx);
1200*0Sstevel@tonic-gate 
1201*0Sstevel@tonic-gate 	if ((smp = allocq->smq_free) == NULL) {
1202*0Sstevel@tonic-gate 
1203*0Sstevel@tonic-gate skip_queue:
1204*0Sstevel@tonic-gate 		/*
1205*0Sstevel@tonic-gate 		 * The alloc list is empty or this queue is being skipped;
1206*0Sstevel@tonic-gate 		 * first see if the allocq toggled.
1207*0Sstevel@tonic-gate 		 */
1208*0Sstevel@tonic-gate 		if (sm->sm_allocq != allocq) {
1209*0Sstevel@tonic-gate 			/* queue changed */
1210*0Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
1211*0Sstevel@tonic-gate 			goto retry_queue;
1212*0Sstevel@tonic-gate 		}
1213*0Sstevel@tonic-gate 		releq = sm->sm_releq;
1214*0Sstevel@tonic-gate 		if (!mutex_tryenter(&releq->smq_mtx)) {
1215*0Sstevel@tonic-gate 			/* cannot get releq; a free smp may be there now */
1216*0Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
1217*0Sstevel@tonic-gate 
1218*0Sstevel@tonic-gate 			/*
1219*0Sstevel@tonic-gate 			 * This loop could spin forever if this thread has
1220*0Sstevel@tonic-gate 			 * higher priority than the thread that is holding
1221*0Sstevel@tonic-gate 			 * releq->smq_mtx. In order to force the other thread
1222*0Sstevel@tonic-gate 			 * to run, we'll lock/unlock the mutex which is safe
1223*0Sstevel@tonic-gate 			 * since we just unlocked the allocq mutex.
1224*0Sstevel@tonic-gate 			 */
1225*0Sstevel@tonic-gate 			mutex_enter(&releq->smq_mtx);
1226*0Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
1227*0Sstevel@tonic-gate 			goto retry_queue;
1228*0Sstevel@tonic-gate 		}
1229*0Sstevel@tonic-gate 		if (releq->smq_free == NULL) {
1230*0Sstevel@tonic-gate 			/*
1231*0Sstevel@tonic-gate 			 * This freelist is empty.
1232*0Sstevel@tonic-gate 			 * This should not happen unless clients
1233*0Sstevel@tonic-gate 			 * are failing to release the segmap
1234*0Sstevel@tonic-gate 			 * window after accessing the data.
1235*0Sstevel@tonic-gate 			 * Before resorting to sleeping, try
1236*0Sstevel@tonic-gate 			 * the next list of the same color.
1237*0Sstevel@tonic-gate 			 */
1238*0Sstevel@tonic-gate 			free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1239*0Sstevel@tonic-gate 			if (free_ndx != end_ndx) {
1240*0Sstevel@tonic-gate 				mutex_exit(&releq->smq_mtx);
1241*0Sstevel@tonic-gate 				mutex_exit(&allocq->smq_mtx);
1242*0Sstevel@tonic-gate 				sm = &smd_free[free_ndx];
1243*0Sstevel@tonic-gate 				goto retry_queue;
1244*0Sstevel@tonic-gate 			}
1245*0Sstevel@tonic-gate 			/*
1246*0Sstevel@tonic-gate 			 * Tried all freelists of the same color once,
1247*0Sstevel@tonic-gate 			 * wait on this list and hope something gets freed.
1248*0Sstevel@tonic-gate 			 */
1249*0Sstevel@tonic-gate 			segmapcnt.smp_get_nofree.value.ul++;
1250*0Sstevel@tonic-gate 			sm->sm_want++;
1251*0Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[1].smq_mtx);
1252*0Sstevel@tonic-gate 			cv_wait(&sm->sm_free_cv,
1253*0Sstevel@tonic-gate 				&sm->sm_freeq[0].smq_mtx);
1254*0Sstevel@tonic-gate 			sm->sm_want--;
1255*0Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
1256*0Sstevel@tonic-gate 			sm = &smd_free[free_ndx];
1257*0Sstevel@tonic-gate 			goto retry_queue;
1258*0Sstevel@tonic-gate 		} else {
1259*0Sstevel@tonic-gate 			/*
1260*0Sstevel@tonic-gate 			 * Something on the rele queue; flip the alloc
1261*0Sstevel@tonic-gate 			 * and rele queues and retry.
1262*0Sstevel@tonic-gate 			 */
1263*0Sstevel@tonic-gate 			sm->sm_allocq = releq;
1264*0Sstevel@tonic-gate 			sm->sm_releq = allocq;
1265*0Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
1266*0Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
1267*0Sstevel@tonic-gate 			if (page_locked) {
1268*0Sstevel@tonic-gate 				delay(hz >> 2);
1269*0Sstevel@tonic-gate 				page_locked = 0;
1270*0Sstevel@tonic-gate 			}
1271*0Sstevel@tonic-gate 			goto retry_queue;
1272*0Sstevel@tonic-gate 		}
1273*0Sstevel@tonic-gate 	} else {
1274*0Sstevel@tonic-gate 		/*
1275*0Sstevel@tonic-gate 		 * Fastpath the case we get the smap mutex
1276*0Sstevel@tonic-gate 		 * on the first try.
1277*0Sstevel@tonic-gate 		 */
1278*0Sstevel@tonic-gate 		first = smp;
1279*0Sstevel@tonic-gate next_smap:
1280*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
1281*0Sstevel@tonic-gate 		if (!mutex_tryenter(smtx)) {
1282*0Sstevel@tonic-gate 			/*
1283*0Sstevel@tonic-gate 			 * Another thread is trying to reclaim this slot.
1284*0Sstevel@tonic-gate 			 * Skip to the next queue or smap.
1285*0Sstevel@tonic-gate 			 */
1286*0Sstevel@tonic-gate 			if ((smp = smp->sm_next) == first) {
1287*0Sstevel@tonic-gate 				goto skip_queue;
1288*0Sstevel@tonic-gate 			} else {
1289*0Sstevel@tonic-gate 				goto next_smap;
1290*0Sstevel@tonic-gate 			}
1291*0Sstevel@tonic-gate 		} else {
1292*0Sstevel@tonic-gate 			/*
1293*0Sstevel@tonic-gate 			 * if kpme exists, get shared lock on the page
1294*0Sstevel@tonic-gate 			 */
1295*0Sstevel@tonic-gate 			if (segmap_kpm && smp->sm_vp != NULL) {
1296*0Sstevel@tonic-gate 
1297*0Sstevel@tonic-gate 				kpme = GET_KPME(smp);
1298*0Sstevel@tonic-gate 				pp = kpme->kpe_page;
1299*0Sstevel@tonic-gate 
1300*0Sstevel@tonic-gate 				if (pp != NULL) {
1301*0Sstevel@tonic-gate 					if (!page_trylock(pp, SE_SHARED)) {
1302*0Sstevel@tonic-gate 						smp = smp->sm_next;
1303*0Sstevel@tonic-gate 						mutex_exit(smtx);
1304*0Sstevel@tonic-gate 						page_locked = 1;
1305*0Sstevel@tonic-gate 
1306*0Sstevel@tonic-gate 						pp = NULL;
1307*0Sstevel@tonic-gate 
1308*0Sstevel@tonic-gate 						if (smp == first) {
1309*0Sstevel@tonic-gate 							goto skip_queue;
1310*0Sstevel@tonic-gate 						} else {
1311*0Sstevel@tonic-gate 							goto next_smap;
1312*0Sstevel@tonic-gate 						}
1313*0Sstevel@tonic-gate 					} else {
1314*0Sstevel@tonic-gate 						if (kpme->kpe_page == NULL) {
1315*0Sstevel@tonic-gate 							page_unlock(pp);
1316*0Sstevel@tonic-gate 							pp = NULL;
1317*0Sstevel@tonic-gate 						}
1318*0Sstevel@tonic-gate 					}
1319*0Sstevel@tonic-gate 				}
1320*0Sstevel@tonic-gate 			}
1321*0Sstevel@tonic-gate 
1322*0Sstevel@tonic-gate 			/*
1323*0Sstevel@tonic-gate 			 * At this point, we've selected smp.  Remove smp
1324*0Sstevel@tonic-gate 			 * from its freelist.  If smp is the first one in
1325*0Sstevel@tonic-gate 			 * the freelist, update the head of the freelist.
1326*0Sstevel@tonic-gate 			 */
1327*0Sstevel@tonic-gate 			if (first == smp) {
1328*0Sstevel@tonic-gate 				ASSERT(first == allocq->smq_free);
1329*0Sstevel@tonic-gate 				allocq->smq_free = smp->sm_next;
1330*0Sstevel@tonic-gate 			}
1331*0Sstevel@tonic-gate 
1332*0Sstevel@tonic-gate 			/*
1333*0Sstevel@tonic-gate 			 * if the head of the freelist still points to smp,
1334*0Sstevel@tonic-gate 			 * then there are no more free smaps in that list.
1335*0Sstevel@tonic-gate 			 */
1336*0Sstevel@tonic-gate 			if (allocq->smq_free == smp)
1337*0Sstevel@tonic-gate 				/*
1338*0Sstevel@tonic-gate 				 * Took the last one
1339*0Sstevel@tonic-gate 				 */
1340*0Sstevel@tonic-gate 				allocq->smq_free = NULL;
1341*0Sstevel@tonic-gate 			else {
1342*0Sstevel@tonic-gate 				smp->sm_prev->sm_next = smp->sm_next;
1343*0Sstevel@tonic-gate 				smp->sm_next->sm_prev = smp->sm_prev;
1344*0Sstevel@tonic-gate 			}
1345*0Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
1346*0Sstevel@tonic-gate 			smp->sm_prev = smp->sm_next = NULL;
1347*0Sstevel@tonic-gate 
1348*0Sstevel@tonic-gate 			/*
1349*0Sstevel@tonic-gate 			 * if pp != NULL, pp must have been locked;
1350*0Sstevel@tonic-gate 			 * grab_smp() unlocks pp.
1351*0Sstevel@tonic-gate 			 */
1352*0Sstevel@tonic-gate 			ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1353*0Sstevel@tonic-gate 			grab_smp(smp, pp);
1354*0Sstevel@tonic-gate 			/* return smp locked. */
1355*0Sstevel@tonic-gate 			ASSERT(SMAPMTX(smp) == smtx);
1356*0Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(smtx));
1357*0Sstevel@tonic-gate 			return (smp);
1358*0Sstevel@tonic-gate 		}
1359*0Sstevel@tonic-gate 	}
1360*0Sstevel@tonic-gate }
1361*0Sstevel@tonic-gate 
1362*0Sstevel@tonic-gate /*
1363*0Sstevel@tonic-gate  * Special public segmap operations
1364*0Sstevel@tonic-gate  */
1365*0Sstevel@tonic-gate 
1366*0Sstevel@tonic-gate /*
1367*0Sstevel@tonic-gate  * Create pages (without using VOP_GETPAGE) and load up tranlations to them.
1368*0Sstevel@tonic-gate  * If softlock is TRUE, then set things up so that it looks like a call
1369*0Sstevel@tonic-gate  * to segmap_fault with F_SOFTLOCK.
1370*0Sstevel@tonic-gate  *
1371*0Sstevel@tonic-gate  * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1372*0Sstevel@tonic-gate  *
1373*0Sstevel@tonic-gate  * All fields in the generic segment (struct seg) are considered to be
1374*0Sstevel@tonic-gate  * read-only for "segmap" even though the kernel address space (kas) may
1375*0Sstevel@tonic-gate  * not be locked, hence no lock is needed to access them.
1376*0Sstevel@tonic-gate  */
1377*0Sstevel@tonic-gate int
1378*0Sstevel@tonic-gate segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1379*0Sstevel@tonic-gate {
1380*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1381*0Sstevel@tonic-gate 	page_t *pp;
1382*0Sstevel@tonic-gate 	u_offset_t off;
1383*0Sstevel@tonic-gate 	struct smap *smp;
1384*0Sstevel@tonic-gate 	struct vnode *vp;
1385*0Sstevel@tonic-gate 	caddr_t eaddr;
1386*0Sstevel@tonic-gate 	int newpage = 0;
1387*0Sstevel@tonic-gate 	uint_t prot;
1388*0Sstevel@tonic-gate 	kmutex_t *smtx;
1389*0Sstevel@tonic-gate 	int hat_flag;
1390*0Sstevel@tonic-gate 
1391*0Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
1392*0Sstevel@tonic-gate 
1393*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
1394*0Sstevel@tonic-gate 		/*
1395*0Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
1396*0Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
1397*0Sstevel@tonic-gate 		 * segmap_release. The SM_KPM_NEWPAGE flag is set
1398*0Sstevel@tonic-gate 		 * in segmap_pagecreate_kpm when new pages are created.
1399*0Sstevel@tonic-gate 		 * and it is returned as "newpage" indication here.
1400*0Sstevel@tonic-gate 		 */
1401*0Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1402*0Sstevel@tonic-gate 			panic("segmap_pagecreate: smap not found "
1403*0Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
1404*0Sstevel@tonic-gate 			/*NOTREACHED*/
1405*0Sstevel@tonic-gate 		}
1406*0Sstevel@tonic-gate 
1407*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
1408*0Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1409*0Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
1410*0Sstevel@tonic-gate 		mutex_exit(smtx);
1411*0Sstevel@tonic-gate 
1412*0Sstevel@tonic-gate 		return (newpage);
1413*0Sstevel@tonic-gate 	}
1414*0Sstevel@tonic-gate 
1415*0Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1416*0Sstevel@tonic-gate 
1417*0Sstevel@tonic-gate 	eaddr = addr + len;
1418*0Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1419*0Sstevel@tonic-gate 
1420*0Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
1421*0Sstevel@tonic-gate 
1422*0Sstevel@tonic-gate 	/*
1423*0Sstevel@tonic-gate 	 * We don't grab smp mutex here since we assume the smp
1424*0Sstevel@tonic-gate 	 * has a refcnt set already which prevents the slot from
1425*0Sstevel@tonic-gate 	 * changing its id.
1426*0Sstevel@tonic-gate 	 */
1427*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
1428*0Sstevel@tonic-gate 
1429*0Sstevel@tonic-gate 	vp = smp->sm_vp;
1430*0Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1431*0Sstevel@tonic-gate 	prot = smd->smd_prot;
1432*0Sstevel@tonic-gate 
1433*0Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1434*0Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
1435*0Sstevel@tonic-gate 		pp = page_lookup(vp, off, SE_SHARED);
1436*0Sstevel@tonic-gate 		if (pp == NULL) {
1437*0Sstevel@tonic-gate 			ushort_t bitindex;
1438*0Sstevel@tonic-gate 
1439*0Sstevel@tonic-gate 			if ((pp = page_create_va(vp, off,
1440*0Sstevel@tonic-gate 			    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1441*0Sstevel@tonic-gate 				panic("segmap_pagecreate: page_create failed");
1442*0Sstevel@tonic-gate 				/*NOTREACHED*/
1443*0Sstevel@tonic-gate 			}
1444*0Sstevel@tonic-gate 			newpage = 1;
1445*0Sstevel@tonic-gate 			page_io_unlock(pp);
1446*0Sstevel@tonic-gate 
1447*0Sstevel@tonic-gate 			/*
1448*0Sstevel@tonic-gate 			 * Since pages created here do not contain valid
1449*0Sstevel@tonic-gate 			 * data until the caller writes into them, the
1450*0Sstevel@tonic-gate 			 * "exclusive" lock will not be dropped to prevent
1451*0Sstevel@tonic-gate 			 * other users from accessing the page.  We also
1452*0Sstevel@tonic-gate 			 * have to lock the translation to prevent a fault
1453*0Sstevel@tonic-gate 			 * from occuring when the virtual address mapped by
1454*0Sstevel@tonic-gate 			 * this page is written into.  This is necessary to
1455*0Sstevel@tonic-gate 			 * avoid a deadlock since we haven't dropped the
1456*0Sstevel@tonic-gate 			 * "exclusive" lock.
1457*0Sstevel@tonic-gate 			 */
1458*0Sstevel@tonic-gate 			bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1459*0Sstevel@tonic-gate 
1460*0Sstevel@tonic-gate 			/*
1461*0Sstevel@tonic-gate 			 * Large Files: The following assertion is to
1462*0Sstevel@tonic-gate 			 * verify the cast above.
1463*0Sstevel@tonic-gate 			 */
1464*0Sstevel@tonic-gate 			ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1465*0Sstevel@tonic-gate 			smtx = SMAPMTX(smp);
1466*0Sstevel@tonic-gate 			mutex_enter(smtx);
1467*0Sstevel@tonic-gate 			smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1468*0Sstevel@tonic-gate 			mutex_exit(smtx);
1469*0Sstevel@tonic-gate 
1470*0Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
1471*0Sstevel@tonic-gate 		} else if (softlock) {
1472*0Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
1473*0Sstevel@tonic-gate 		}
1474*0Sstevel@tonic-gate 
1475*0Sstevel@tonic-gate 		if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1476*0Sstevel@tonic-gate 			hat_setmod(pp);
1477*0Sstevel@tonic-gate 
1478*0Sstevel@tonic-gate 		hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1479*0Sstevel@tonic-gate 
1480*0Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
1481*0Sstevel@tonic-gate 			page_unlock(pp);
1482*0Sstevel@tonic-gate 
1483*0Sstevel@tonic-gate 		TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1484*0Sstevel@tonic-gate 		    "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1485*0Sstevel@tonic-gate 		    seg, addr, pp, vp, off);
1486*0Sstevel@tonic-gate 	}
1487*0Sstevel@tonic-gate 
1488*0Sstevel@tonic-gate 	return (newpage);
1489*0Sstevel@tonic-gate }
1490*0Sstevel@tonic-gate 
1491*0Sstevel@tonic-gate void
1492*0Sstevel@tonic-gate segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1493*0Sstevel@tonic-gate {
1494*0Sstevel@tonic-gate 	struct smap	*smp;
1495*0Sstevel@tonic-gate 	ushort_t	bitmask;
1496*0Sstevel@tonic-gate 	page_t		*pp;
1497*0Sstevel@tonic-gate 	struct	vnode	*vp;
1498*0Sstevel@tonic-gate 	u_offset_t	off;
1499*0Sstevel@tonic-gate 	caddr_t		eaddr;
1500*0Sstevel@tonic-gate 	kmutex_t	*smtx;
1501*0Sstevel@tonic-gate 
1502*0Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
1503*0Sstevel@tonic-gate 
1504*0Sstevel@tonic-gate 	eaddr = addr + len;
1505*0Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1506*0Sstevel@tonic-gate 
1507*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
1508*0Sstevel@tonic-gate 		/*
1509*0Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
1510*0Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
1511*0Sstevel@tonic-gate 		 * segmap_release, so no pages or hat mappings have
1512*0Sstevel@tonic-gate 		 * to be unlocked at this point.
1513*0Sstevel@tonic-gate 		 */
1514*0Sstevel@tonic-gate #ifdef DEBUG
1515*0Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1516*0Sstevel@tonic-gate 			panic("segmap_pageunlock: smap not found "
1517*0Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
1518*0Sstevel@tonic-gate 			/*NOTREACHED*/
1519*0Sstevel@tonic-gate 		}
1520*0Sstevel@tonic-gate 
1521*0Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
1522*0Sstevel@tonic-gate 		mutex_exit(SMAPMTX(smp));
1523*0Sstevel@tonic-gate #endif
1524*0Sstevel@tonic-gate 		return;
1525*0Sstevel@tonic-gate 	}
1526*0Sstevel@tonic-gate 
1527*0Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
1528*0Sstevel@tonic-gate 	smtx = SMAPMTX(smp);
1529*0Sstevel@tonic-gate 
1530*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
1531*0Sstevel@tonic-gate 
1532*0Sstevel@tonic-gate 	vp = smp->sm_vp;
1533*0Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1534*0Sstevel@tonic-gate 
1535*0Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1536*0Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1537*0Sstevel@tonic-gate 
1538*0Sstevel@tonic-gate 		/*
1539*0Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
1540*0Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
1541*0Sstevel@tonic-gate 		 */
1542*0Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 		/*
1545*0Sstevel@tonic-gate 		 * If the bit corresponding to "off" is set,
1546*0Sstevel@tonic-gate 		 * clear this bit in the bitmap, unlock translations,
1547*0Sstevel@tonic-gate 		 * and release the "exclusive" lock on the page.
1548*0Sstevel@tonic-gate 		 */
1549*0Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
1550*0Sstevel@tonic-gate 			mutex_enter(smtx);
1551*0Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
1552*0Sstevel@tonic-gate 			mutex_exit(smtx);
1553*0Sstevel@tonic-gate 
1554*0Sstevel@tonic-gate 			hat_unlock(kas.a_hat, addr, PAGESIZE);
1555*0Sstevel@tonic-gate 
1556*0Sstevel@tonic-gate 			/*
1557*0Sstevel@tonic-gate 			 * Use page_find() instead of page_lookup() to
1558*0Sstevel@tonic-gate 			 * find the page since we know that it has
1559*0Sstevel@tonic-gate 			 * "exclusive" lock.
1560*0Sstevel@tonic-gate 			 */
1561*0Sstevel@tonic-gate 			pp = page_find(vp, off);
1562*0Sstevel@tonic-gate 			if (pp == NULL) {
1563*0Sstevel@tonic-gate 				panic("segmap_pageunlock: page not found");
1564*0Sstevel@tonic-gate 				/*NOTREACHED*/
1565*0Sstevel@tonic-gate 			}
1566*0Sstevel@tonic-gate 			if (rw == S_WRITE) {
1567*0Sstevel@tonic-gate 				hat_setrefmod(pp);
1568*0Sstevel@tonic-gate 			} else if (rw != S_OTHER) {
1569*0Sstevel@tonic-gate 				hat_setref(pp);
1570*0Sstevel@tonic-gate 			}
1571*0Sstevel@tonic-gate 
1572*0Sstevel@tonic-gate 			page_unlock(pp);
1573*0Sstevel@tonic-gate 		}
1574*0Sstevel@tonic-gate 	}
1575*0Sstevel@tonic-gate }
1576*0Sstevel@tonic-gate 
1577*0Sstevel@tonic-gate caddr_t
1578*0Sstevel@tonic-gate segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1579*0Sstevel@tonic-gate {
1580*0Sstevel@tonic-gate 	return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1581*0Sstevel@tonic-gate }
1582*0Sstevel@tonic-gate 
1583*0Sstevel@tonic-gate /*
1584*0Sstevel@tonic-gate  * This is the magic virtual address that offset 0 of an ELF
1585*0Sstevel@tonic-gate  * file gets mapped to in user space. This is used to pick
1586*0Sstevel@tonic-gate  * the vac color on the freelist.
1587*0Sstevel@tonic-gate  */
1588*0Sstevel@tonic-gate #define	ELF_OFFZERO_VA	(0x10000)
1589*0Sstevel@tonic-gate /*
1590*0Sstevel@tonic-gate  * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1591*0Sstevel@tonic-gate  * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1592*0Sstevel@tonic-gate  * The return address is  always MAXBSIZE aligned.
1593*0Sstevel@tonic-gate  *
1594*0Sstevel@tonic-gate  * If forcefault is nonzero and the MMU translations haven't yet been created,
1595*0Sstevel@tonic-gate  * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1596*0Sstevel@tonic-gate  */
1597*0Sstevel@tonic-gate caddr_t
1598*0Sstevel@tonic-gate segmap_getmapflt(
1599*0Sstevel@tonic-gate 	struct seg *seg,
1600*0Sstevel@tonic-gate 	struct vnode *vp,
1601*0Sstevel@tonic-gate 	u_offset_t off,
1602*0Sstevel@tonic-gate 	size_t len,
1603*0Sstevel@tonic-gate 	int forcefault,
1604*0Sstevel@tonic-gate 	enum seg_rw rw)
1605*0Sstevel@tonic-gate {
1606*0Sstevel@tonic-gate 	struct smap *smp, *nsmp;
1607*0Sstevel@tonic-gate 	extern struct vnode *common_specvp();
1608*0Sstevel@tonic-gate 	caddr_t baseaddr;			/* MAXBSIZE aligned */
1609*0Sstevel@tonic-gate 	u_offset_t baseoff;
1610*0Sstevel@tonic-gate 	int newslot;
1611*0Sstevel@tonic-gate 	caddr_t vaddr;
1612*0Sstevel@tonic-gate 	int color, hashid;
1613*0Sstevel@tonic-gate 	kmutex_t *hashmtx, *smapmtx;
1614*0Sstevel@tonic-gate 	struct smfree *sm;
1615*0Sstevel@tonic-gate 	page_t	*pp;
1616*0Sstevel@tonic-gate 	struct kpme *kpme;
1617*0Sstevel@tonic-gate 	uint_t	prot;
1618*0Sstevel@tonic-gate 	caddr_t base;
1619*0Sstevel@tonic-gate 	page_t	*pl[MAXPPB + 1];
1620*0Sstevel@tonic-gate 	int	error;
1621*0Sstevel@tonic-gate 	int	is_kpm = 1;
1622*0Sstevel@tonic-gate 
1623*0Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
1624*0Sstevel@tonic-gate 	ASSERT(seg == segkmap);
1625*0Sstevel@tonic-gate 
1626*0Sstevel@tonic-gate 	baseoff = off & (offset_t)MAXBMASK;
1627*0Sstevel@tonic-gate 	if (off + len > baseoff + MAXBSIZE) {
1628*0Sstevel@tonic-gate 		panic("segmap_getmap bad len");
1629*0Sstevel@tonic-gate 		/*NOTREACHED*/
1630*0Sstevel@tonic-gate 	}
1631*0Sstevel@tonic-gate 
1632*0Sstevel@tonic-gate 	/*
1633*0Sstevel@tonic-gate 	 * If this is a block device we have to be sure to use the
1634*0Sstevel@tonic-gate 	 * "common" block device vnode for the mapping.
1635*0Sstevel@tonic-gate 	 */
1636*0Sstevel@tonic-gate 	if (vp->v_type == VBLK)
1637*0Sstevel@tonic-gate 		vp = common_specvp(vp);
1638*0Sstevel@tonic-gate 
1639*0Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1640*0Sstevel@tonic-gate 
1641*0Sstevel@tonic-gate 	if (segmap_kpm == 0 ||
1642*0Sstevel@tonic-gate 	    (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1643*0Sstevel@tonic-gate 		is_kpm = 0;
1644*0Sstevel@tonic-gate 	}
1645*0Sstevel@tonic-gate 
1646*0Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
1647*0Sstevel@tonic-gate 	hashmtx = SHASHMTX(hashid);
1648*0Sstevel@tonic-gate 
1649*0Sstevel@tonic-gate retry_hash:
1650*0Sstevel@tonic-gate 	mutex_enter(hashmtx);
1651*0Sstevel@tonic-gate 	for (smp = smd_hash[hashid].sh_hash_list;
1652*0Sstevel@tonic-gate 	    smp != NULL; smp = smp->sm_hash)
1653*0Sstevel@tonic-gate 		if (smp->sm_vp == vp && smp->sm_off == baseoff)
1654*0Sstevel@tonic-gate 			break;
1655*0Sstevel@tonic-gate 	mutex_exit(hashmtx);
1656*0Sstevel@tonic-gate 
1657*0Sstevel@tonic-gate vrfy_smp:
1658*0Sstevel@tonic-gate 	if (smp != NULL) {
1659*0Sstevel@tonic-gate 
1660*0Sstevel@tonic-gate 		ASSERT(vp->v_count != 0);
1661*0Sstevel@tonic-gate 
1662*0Sstevel@tonic-gate 		/*
1663*0Sstevel@tonic-gate 		 * Get smap lock and recheck its tag. The hash lock
1664*0Sstevel@tonic-gate 		 * is dropped since the hash is based on (vp, off)
1665*0Sstevel@tonic-gate 		 * and (vp, off) won't change when we have smap mtx.
1666*0Sstevel@tonic-gate 		 */
1667*0Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
1668*0Sstevel@tonic-gate 		mutex_enter(smapmtx);
1669*0Sstevel@tonic-gate 		if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1670*0Sstevel@tonic-gate 			mutex_exit(smapmtx);
1671*0Sstevel@tonic-gate 			goto retry_hash;
1672*0Sstevel@tonic-gate 		}
1673*0Sstevel@tonic-gate 
1674*0Sstevel@tonic-gate 		if (smp->sm_refcnt == 0) {
1675*0Sstevel@tonic-gate 
1676*0Sstevel@tonic-gate 			smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1677*0Sstevel@tonic-gate 
1678*0Sstevel@tonic-gate 			/*
1679*0Sstevel@tonic-gate 			 * Could still be on the free list. However, this
1680*0Sstevel@tonic-gate 			 * could also be an smp that is transitioning from
1681*0Sstevel@tonic-gate 			 * the free list when we have too much contention
1682*0Sstevel@tonic-gate 			 * for the smapmtx's. In this case, we have an
1683*0Sstevel@tonic-gate 			 * unlocked smp that is not on the free list any
1684*0Sstevel@tonic-gate 			 * longer, but still has a 0 refcnt.  The only way
1685*0Sstevel@tonic-gate 			 * to be sure is to check the freelist pointers.
1686*0Sstevel@tonic-gate 			 * Since we now have the smapmtx, we are guaranteed
1687*0Sstevel@tonic-gate 			 * that the (vp, off) won't change, so we are safe
1688*0Sstevel@tonic-gate 			 * to reclaim it.  get_free_smp() knows that this
1689*0Sstevel@tonic-gate 			 * can happen, and it will check the refcnt.
1690*0Sstevel@tonic-gate 			 */
1691*0Sstevel@tonic-gate 
1692*0Sstevel@tonic-gate 			if ((smp->sm_next != NULL)) {
1693*0Sstevel@tonic-gate 				struct sm_freeq *freeq;
1694*0Sstevel@tonic-gate 
1695*0Sstevel@tonic-gate 				ASSERT(smp->sm_prev != NULL);
1696*0Sstevel@tonic-gate 				sm = &smd_free[smp->sm_free_ndx];
1697*0Sstevel@tonic-gate 
1698*0Sstevel@tonic-gate 				if (smp->sm_flags & SM_QNDX_ZERO)
1699*0Sstevel@tonic-gate 					freeq = &sm->sm_freeq[0];
1700*0Sstevel@tonic-gate 				else
1701*0Sstevel@tonic-gate 					freeq = &sm->sm_freeq[1];
1702*0Sstevel@tonic-gate 
1703*0Sstevel@tonic-gate 				mutex_enter(&freeq->smq_mtx);
1704*0Sstevel@tonic-gate 				if (freeq->smq_free != smp) {
1705*0Sstevel@tonic-gate 					/*
1706*0Sstevel@tonic-gate 					 * fastpath normal case
1707*0Sstevel@tonic-gate 					 */
1708*0Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
1709*0Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
1710*0Sstevel@tonic-gate 				} else if (smp == smp->sm_next) {
1711*0Sstevel@tonic-gate 					/*
1712*0Sstevel@tonic-gate 					 * Taking the last smap on freelist
1713*0Sstevel@tonic-gate 					 */
1714*0Sstevel@tonic-gate 					freeq->smq_free = NULL;
1715*0Sstevel@tonic-gate 				} else {
1716*0Sstevel@tonic-gate 					/*
1717*0Sstevel@tonic-gate 					 * Reclaiming 1st smap on list
1718*0Sstevel@tonic-gate 					 */
1719*0Sstevel@tonic-gate 					freeq->smq_free = smp->sm_next;
1720*0Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
1721*0Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
1722*0Sstevel@tonic-gate 				}
1723*0Sstevel@tonic-gate 				mutex_exit(&freeq->smq_mtx);
1724*0Sstevel@tonic-gate 				smp->sm_prev = smp->sm_next = NULL;
1725*0Sstevel@tonic-gate 			} else {
1726*0Sstevel@tonic-gate 				ASSERT(smp->sm_prev == NULL);
1727*0Sstevel@tonic-gate 				segmapcnt.smp_stolen.value.ul++;
1728*0Sstevel@tonic-gate 			}
1729*0Sstevel@tonic-gate 
1730*0Sstevel@tonic-gate 		} else {
1731*0Sstevel@tonic-gate 			segmapcnt.smp_get_use.value.ul++;
1732*0Sstevel@tonic-gate 		}
1733*0Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
1734*0Sstevel@tonic-gate 
1735*0Sstevel@tonic-gate 		/*
1736*0Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
1737*0Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
1738*0Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
1739*0Sstevel@tonic-gate 		 */
1740*0Sstevel@tonic-gate 		if (is_kpm) {
1741*0Sstevel@tonic-gate 			if (rw == S_WRITE) {
1742*0Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
1743*0Sstevel@tonic-gate 			} else if (rw == S_READ) {
1744*0Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
1745*0Sstevel@tonic-gate 			}
1746*0Sstevel@tonic-gate 		}
1747*0Sstevel@tonic-gate 		mutex_exit(smapmtx);
1748*0Sstevel@tonic-gate 
1749*0Sstevel@tonic-gate 		newslot = 0;
1750*0Sstevel@tonic-gate 	} else {
1751*0Sstevel@tonic-gate 
1752*0Sstevel@tonic-gate 		uint32_t free_ndx, *free_ndxp;
1753*0Sstevel@tonic-gate 		union segmap_cpu *scpu;
1754*0Sstevel@tonic-gate 
1755*0Sstevel@tonic-gate 		/*
1756*0Sstevel@tonic-gate 		 * On a PAC machine or a machine with anti-alias
1757*0Sstevel@tonic-gate 		 * hardware, smd_colormsk will be zero.
1758*0Sstevel@tonic-gate 		 *
1759*0Sstevel@tonic-gate 		 * On a VAC machine- pick color by offset in the file
1760*0Sstevel@tonic-gate 		 * so we won't get VAC conflicts on elf files.
1761*0Sstevel@tonic-gate 		 * On data files, color does not matter but we
1762*0Sstevel@tonic-gate 		 * don't know what kind of file it is so we always
1763*0Sstevel@tonic-gate 		 * pick color by offset. This causes color
1764*0Sstevel@tonic-gate 		 * corresponding to file offset zero to be used more
1765*0Sstevel@tonic-gate 		 * heavily.
1766*0Sstevel@tonic-gate 		 */
1767*0Sstevel@tonic-gate 		color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1768*0Sstevel@tonic-gate 		scpu = smd_cpu+CPU->cpu_seqid;
1769*0Sstevel@tonic-gate 		free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1770*0Sstevel@tonic-gate 		free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1771*0Sstevel@tonic-gate #ifdef DEBUG
1772*0Sstevel@tonic-gate 		colors_used[free_ndx]++;
1773*0Sstevel@tonic-gate #endif /* DEBUG */
1774*0Sstevel@tonic-gate 
1775*0Sstevel@tonic-gate 		/*
1776*0Sstevel@tonic-gate 		 * Get a locked smp slot from the free list.
1777*0Sstevel@tonic-gate 		 */
1778*0Sstevel@tonic-gate 		smp = get_free_smp(free_ndx);
1779*0Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
1780*0Sstevel@tonic-gate 
1781*0Sstevel@tonic-gate 		ASSERT(smp->sm_vp == NULL);
1782*0Sstevel@tonic-gate 
1783*0Sstevel@tonic-gate 		if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1784*0Sstevel@tonic-gate 			/*
1785*0Sstevel@tonic-gate 			 * Failed to hashin, there exists one now.
1786*0Sstevel@tonic-gate 			 * Return the smp we just allocated.
1787*0Sstevel@tonic-gate 			 */
1788*0Sstevel@tonic-gate 			segmap_smapadd(smp);
1789*0Sstevel@tonic-gate 			mutex_exit(smapmtx);
1790*0Sstevel@tonic-gate 
1791*0Sstevel@tonic-gate 			smp = nsmp;
1792*0Sstevel@tonic-gate 			goto vrfy_smp;
1793*0Sstevel@tonic-gate 		}
1794*0Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
1795*0Sstevel@tonic-gate 
1796*0Sstevel@tonic-gate 		/*
1797*0Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
1798*0Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
1799*0Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
1800*0Sstevel@tonic-gate 		 */
1801*0Sstevel@tonic-gate 		if (is_kpm) {
1802*0Sstevel@tonic-gate 			if (rw == S_WRITE) {
1803*0Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
1804*0Sstevel@tonic-gate 			} else if (rw == S_READ) {
1805*0Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
1806*0Sstevel@tonic-gate 			}
1807*0Sstevel@tonic-gate 		}
1808*0Sstevel@tonic-gate 		mutex_exit(smapmtx);
1809*0Sstevel@tonic-gate 
1810*0Sstevel@tonic-gate 		newslot = 1;
1811*0Sstevel@tonic-gate 	}
1812*0Sstevel@tonic-gate 
1813*0Sstevel@tonic-gate 	if (!is_kpm)
1814*0Sstevel@tonic-gate 		goto use_segmap_range;
1815*0Sstevel@tonic-gate 
1816*0Sstevel@tonic-gate 	/*
1817*0Sstevel@tonic-gate 	 * Use segkpm
1818*0Sstevel@tonic-gate 	 */
1819*0Sstevel@tonic-gate 	ASSERT(PAGESIZE == MAXBSIZE);
1820*0Sstevel@tonic-gate 
1821*0Sstevel@tonic-gate 	/*
1822*0Sstevel@tonic-gate 	 * remember the last smp faulted on this cpu.
1823*0Sstevel@tonic-gate 	 */
1824*0Sstevel@tonic-gate 	(smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1825*0Sstevel@tonic-gate 
1826*0Sstevel@tonic-gate 	if (forcefault == SM_PAGECREATE) {
1827*0Sstevel@tonic-gate 		baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1828*0Sstevel@tonic-gate 		return (baseaddr);
1829*0Sstevel@tonic-gate 	}
1830*0Sstevel@tonic-gate 
1831*0Sstevel@tonic-gate 	if (newslot == 0 &&
1832*0Sstevel@tonic-gate 	    (pp = GET_KPME(smp)->kpe_page) != NULL) {
1833*0Sstevel@tonic-gate 
1834*0Sstevel@tonic-gate 		/* fastpath */
1835*0Sstevel@tonic-gate 		switch (rw) {
1836*0Sstevel@tonic-gate 		case S_READ:
1837*0Sstevel@tonic-gate 		case S_WRITE:
1838*0Sstevel@tonic-gate 			if (page_trylock(pp, SE_SHARED)) {
1839*0Sstevel@tonic-gate 				if (PP_ISFREE(pp) ||
1840*0Sstevel@tonic-gate 				    !(pp->p_vnode == vp &&
1841*0Sstevel@tonic-gate 				    pp->p_offset == baseoff)) {
1842*0Sstevel@tonic-gate 					page_unlock(pp);
1843*0Sstevel@tonic-gate 					pp = page_lookup(vp, baseoff,
1844*0Sstevel@tonic-gate 						SE_SHARED);
1845*0Sstevel@tonic-gate 				}
1846*0Sstevel@tonic-gate 			} else {
1847*0Sstevel@tonic-gate 				pp = page_lookup(vp, baseoff, SE_SHARED);
1848*0Sstevel@tonic-gate 			}
1849*0Sstevel@tonic-gate 
1850*0Sstevel@tonic-gate 			if (pp == NULL) {
1851*0Sstevel@tonic-gate 				ASSERT(GET_KPME(smp)->kpe_page == NULL);
1852*0Sstevel@tonic-gate 				break;
1853*0Sstevel@tonic-gate 			}
1854*0Sstevel@tonic-gate 
1855*0Sstevel@tonic-gate 			if (rw == S_WRITE &&
1856*0Sstevel@tonic-gate 			    hat_page_getattr(pp, P_MOD | P_REF) !=
1857*0Sstevel@tonic-gate 			    (P_MOD | P_REF)) {
1858*0Sstevel@tonic-gate 				page_unlock(pp);
1859*0Sstevel@tonic-gate 				break;
1860*0Sstevel@tonic-gate 			}
1861*0Sstevel@tonic-gate 
1862*0Sstevel@tonic-gate 			/*
1863*0Sstevel@tonic-gate 			 * We have the p_selock as reader, grab_smp
1864*0Sstevel@tonic-gate 			 * can't hit us, we have bumped the smap
1865*0Sstevel@tonic-gate 			 * refcnt and hat_pageunload needs the
1866*0Sstevel@tonic-gate 			 * p_selock exclusive.
1867*0Sstevel@tonic-gate 			 */
1868*0Sstevel@tonic-gate 			kpme = GET_KPME(smp);
1869*0Sstevel@tonic-gate 			if (kpme->kpe_page == pp) {
1870*0Sstevel@tonic-gate 				baseaddr = hat_kpm_page2va(pp, 0);
1871*0Sstevel@tonic-gate 			} else if (kpme->kpe_page == NULL) {
1872*0Sstevel@tonic-gate 				baseaddr = hat_kpm_mapin(pp, kpme);
1873*0Sstevel@tonic-gate 			} else {
1874*0Sstevel@tonic-gate 				panic("segmap_getmapflt: stale "
1875*0Sstevel@tonic-gate 				    "kpme page, kpme %p", (void *)kpme);
1876*0Sstevel@tonic-gate 				/*NOTREACHED*/
1877*0Sstevel@tonic-gate 			}
1878*0Sstevel@tonic-gate 
1879*0Sstevel@tonic-gate 			/*
1880*0Sstevel@tonic-gate 			 * We don't invoke segmap_fault via TLB miss,
1881*0Sstevel@tonic-gate 			 * so we set ref and mod bits in advance.
1882*0Sstevel@tonic-gate 			 * For S_OTHER and we set them in segmap_fault
1883*0Sstevel@tonic-gate 			 * F_SOFTUNLOCK.
1884*0Sstevel@tonic-gate 			 */
1885*0Sstevel@tonic-gate 			if (rw == S_READ && !hat_isref(pp))
1886*0Sstevel@tonic-gate 				hat_setref(pp);
1887*0Sstevel@tonic-gate 
1888*0Sstevel@tonic-gate 			return (baseaddr);
1889*0Sstevel@tonic-gate 		default:
1890*0Sstevel@tonic-gate 			break;
1891*0Sstevel@tonic-gate 		}
1892*0Sstevel@tonic-gate 	}
1893*0Sstevel@tonic-gate 
1894*0Sstevel@tonic-gate 	base = segkpm_create_va(baseoff);
1895*0Sstevel@tonic-gate 	error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1896*0Sstevel@tonic-gate 	    seg, base, rw, CRED());
1897*0Sstevel@tonic-gate 
1898*0Sstevel@tonic-gate 	pp = pl[0];
1899*0Sstevel@tonic-gate 	if (error || pp == NULL) {
1900*0Sstevel@tonic-gate 		/*
1901*0Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault deal
1902*0Sstevel@tonic-gate 		 * with the error cases. There is no error return
1903*0Sstevel@tonic-gate 		 * possible here.
1904*0Sstevel@tonic-gate 		 */
1905*0Sstevel@tonic-gate 		goto use_segmap_range;
1906*0Sstevel@tonic-gate 	}
1907*0Sstevel@tonic-gate 
1908*0Sstevel@tonic-gate 	ASSERT(pl[1] == NULL);
1909*0Sstevel@tonic-gate 
1910*0Sstevel@tonic-gate 	/*
1911*0Sstevel@tonic-gate 	 * When prot is not returned w/ PROT_ALL the returned pages
1912*0Sstevel@tonic-gate 	 * are not backed by fs blocks. For most of the segmap users
1913*0Sstevel@tonic-gate 	 * this is no problem, they don't write to the pages in the
1914*0Sstevel@tonic-gate 	 * same request and therefore don't rely on a following
1915*0Sstevel@tonic-gate 	 * trap driven segmap_fault. With SM_LOCKPROTO users it
1916*0Sstevel@tonic-gate 	 * is more secure to use segkmap adresses to allow
1917*0Sstevel@tonic-gate 	 * protection segmap_fault's.
1918*0Sstevel@tonic-gate 	 */
1919*0Sstevel@tonic-gate 	if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1920*0Sstevel@tonic-gate 		/*
1921*0Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault
1922*0Sstevel@tonic-gate 		 * do the error return.
1923*0Sstevel@tonic-gate 		 */
1924*0Sstevel@tonic-gate 		ASSERT(rw != S_WRITE);
1925*0Sstevel@tonic-gate 		ASSERT(PAGE_LOCKED(pp));
1926*0Sstevel@tonic-gate 		page_unlock(pp);
1927*0Sstevel@tonic-gate 		forcefault = 0;
1928*0Sstevel@tonic-gate 		goto use_segmap_range;
1929*0Sstevel@tonic-gate 	}
1930*0Sstevel@tonic-gate 
1931*0Sstevel@tonic-gate 	/*
1932*0Sstevel@tonic-gate 	 * We have the p_selock as reader, grab_smp can't hit us, we
1933*0Sstevel@tonic-gate 	 * have bumped the smap refcnt and hat_pageunload needs the
1934*0Sstevel@tonic-gate 	 * p_selock exclusive.
1935*0Sstevel@tonic-gate 	 */
1936*0Sstevel@tonic-gate 	kpme = GET_KPME(smp);
1937*0Sstevel@tonic-gate 	if (kpme->kpe_page == pp) {
1938*0Sstevel@tonic-gate 		baseaddr = hat_kpm_page2va(pp, 0);
1939*0Sstevel@tonic-gate 	} else if (kpme->kpe_page == NULL) {
1940*0Sstevel@tonic-gate 		baseaddr = hat_kpm_mapin(pp, kpme);
1941*0Sstevel@tonic-gate 	} else {
1942*0Sstevel@tonic-gate 		panic("segmap_getmapflt: stale kpme page after "
1943*0Sstevel@tonic-gate 		    "VOP_GETPAGE, kpme %p", (void *)kpme);
1944*0Sstevel@tonic-gate 		/*NOTREACHED*/
1945*0Sstevel@tonic-gate 	}
1946*0Sstevel@tonic-gate 
1947*0Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1948*0Sstevel@tonic-gate 
1949*0Sstevel@tonic-gate 	return (baseaddr);
1950*0Sstevel@tonic-gate 
1951*0Sstevel@tonic-gate 
1952*0Sstevel@tonic-gate use_segmap_range:
1953*0Sstevel@tonic-gate 	baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1954*0Sstevel@tonic-gate 	TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1955*0Sstevel@tonic-gate 	    "segmap_getmap:seg %p addr %p vp %p offset %llx",
1956*0Sstevel@tonic-gate 	    seg, baseaddr, vp, baseoff);
1957*0Sstevel@tonic-gate 
1958*0Sstevel@tonic-gate 	/*
1959*0Sstevel@tonic-gate 	 * Prefault the translations
1960*0Sstevel@tonic-gate 	 */
1961*0Sstevel@tonic-gate 	vaddr = baseaddr + (off - baseoff);
1962*0Sstevel@tonic-gate 	if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1963*0Sstevel@tonic-gate 
1964*0Sstevel@tonic-gate 		caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1965*0Sstevel@tonic-gate 		    (uintptr_t)PAGEMASK);
1966*0Sstevel@tonic-gate 
1967*0Sstevel@tonic-gate 		(void) segmap_fault(kas.a_hat, seg, pgaddr,
1968*0Sstevel@tonic-gate 		    (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1969*0Sstevel@tonic-gate 		    F_INVAL, rw);
1970*0Sstevel@tonic-gate 	}
1971*0Sstevel@tonic-gate 
1972*0Sstevel@tonic-gate 	return (baseaddr);
1973*0Sstevel@tonic-gate }
1974*0Sstevel@tonic-gate 
1975*0Sstevel@tonic-gate int
1976*0Sstevel@tonic-gate segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1977*0Sstevel@tonic-gate {
1978*0Sstevel@tonic-gate 	struct smap	*smp;
1979*0Sstevel@tonic-gate 	int 		error;
1980*0Sstevel@tonic-gate 	int		bflags = 0;
1981*0Sstevel@tonic-gate 	struct vnode	*vp;
1982*0Sstevel@tonic-gate 	u_offset_t	offset;
1983*0Sstevel@tonic-gate 	kmutex_t	*smtx;
1984*0Sstevel@tonic-gate 	int		is_kpm = 0;
1985*0Sstevel@tonic-gate 	page_t		*pp;
1986*0Sstevel@tonic-gate 
1987*0Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
1988*0Sstevel@tonic-gate 
1989*0Sstevel@tonic-gate 		if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1990*0Sstevel@tonic-gate 			panic("segmap_release: addr %p not "
1991*0Sstevel@tonic-gate 			    "MAXBSIZE aligned", (void *)addr);
1992*0Sstevel@tonic-gate 			/*NOTREACHED*/
1993*0Sstevel@tonic-gate 		}
1994*0Sstevel@tonic-gate 
1995*0Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1996*0Sstevel@tonic-gate 			panic("segmap_release: smap not found "
1997*0Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
1998*0Sstevel@tonic-gate 			/*NOTREACHED*/
1999*0Sstevel@tonic-gate 		}
2000*0Sstevel@tonic-gate 
2001*0Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2002*0Sstevel@tonic-gate 			"segmap_relmap:seg %p addr %p smp %p",
2003*0Sstevel@tonic-gate 			seg, addr, smp);
2004*0Sstevel@tonic-gate 
2005*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
2006*0Sstevel@tonic-gate 
2007*0Sstevel@tonic-gate 		/*
2008*0Sstevel@tonic-gate 		 * For compatibilty reasons segmap_pagecreate_kpm sets this
2009*0Sstevel@tonic-gate 		 * flag to allow a following segmap_pagecreate to return
2010*0Sstevel@tonic-gate 		 * this as "newpage" flag. When segmap_pagecreate is not
2011*0Sstevel@tonic-gate 		 * called at all we clear it now.
2012*0Sstevel@tonic-gate 		 */
2013*0Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
2014*0Sstevel@tonic-gate 		is_kpm = 1;
2015*0Sstevel@tonic-gate 		if (smp->sm_flags & SM_WRITE_DATA) {
2016*0Sstevel@tonic-gate 			hat_setrefmod(pp);
2017*0Sstevel@tonic-gate 		} else if (smp->sm_flags & SM_READ_DATA) {
2018*0Sstevel@tonic-gate 			hat_setref(pp);
2019*0Sstevel@tonic-gate 		}
2020*0Sstevel@tonic-gate 	} else {
2021*0Sstevel@tonic-gate 		if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2022*0Sstevel@tonic-gate 		    ((uintptr_t)addr & MAXBOFFSET) != 0) {
2023*0Sstevel@tonic-gate 			panic("segmap_release: bad addr %p", (void *)addr);
2024*0Sstevel@tonic-gate 			/*NOTREACHED*/
2025*0Sstevel@tonic-gate 		}
2026*0Sstevel@tonic-gate 		smp = GET_SMAP(seg, addr);
2027*0Sstevel@tonic-gate 
2028*0Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2029*0Sstevel@tonic-gate 			"segmap_relmap:seg %p addr %p smp %p",
2030*0Sstevel@tonic-gate 			seg, addr, smp);
2031*0Sstevel@tonic-gate 
2032*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
2033*0Sstevel@tonic-gate 		mutex_enter(smtx);
2034*0Sstevel@tonic-gate 		smp->sm_flags |= SM_NOTKPM_RELEASED;
2035*0Sstevel@tonic-gate 	}
2036*0Sstevel@tonic-gate 
2037*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
2038*0Sstevel@tonic-gate 
2039*0Sstevel@tonic-gate 	/*
2040*0Sstevel@tonic-gate 	 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2041*0Sstevel@tonic-gate 	 * are set.
2042*0Sstevel@tonic-gate 	 */
2043*0Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
2044*0Sstevel@tonic-gate 		if (flags & SM_WRITE)
2045*0Sstevel@tonic-gate 			segmapcnt.smp_rel_write.value.ul++;
2046*0Sstevel@tonic-gate 		if (flags & SM_ASYNC) {
2047*0Sstevel@tonic-gate 			bflags |= B_ASYNC;
2048*0Sstevel@tonic-gate 			segmapcnt.smp_rel_async.value.ul++;
2049*0Sstevel@tonic-gate 		}
2050*0Sstevel@tonic-gate 		if (flags & SM_INVAL) {
2051*0Sstevel@tonic-gate 			bflags |= B_INVAL;
2052*0Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
2053*0Sstevel@tonic-gate 		}
2054*0Sstevel@tonic-gate 		if (flags & SM_DESTROY) {
2055*0Sstevel@tonic-gate 			bflags |= (B_INVAL|B_TRUNC);
2056*0Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
2057*0Sstevel@tonic-gate 		}
2058*0Sstevel@tonic-gate 		if (smp->sm_refcnt == 1) {
2059*0Sstevel@tonic-gate 			/*
2060*0Sstevel@tonic-gate 			 * We only bother doing the FREE and DONTNEED flags
2061*0Sstevel@tonic-gate 			 * if no one else is still referencing this mapping.
2062*0Sstevel@tonic-gate 			 */
2063*0Sstevel@tonic-gate 			if (flags & SM_FREE) {
2064*0Sstevel@tonic-gate 				bflags |= B_FREE;
2065*0Sstevel@tonic-gate 				segmapcnt.smp_rel_free.value.ul++;
2066*0Sstevel@tonic-gate 			}
2067*0Sstevel@tonic-gate 			if (flags & SM_DONTNEED) {
2068*0Sstevel@tonic-gate 				bflags |= B_DONTNEED;
2069*0Sstevel@tonic-gate 				segmapcnt.smp_rel_dontneed.value.ul++;
2070*0Sstevel@tonic-gate 			}
2071*0Sstevel@tonic-gate 		}
2072*0Sstevel@tonic-gate 	} else {
2073*0Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2074*0Sstevel@tonic-gate 	}
2075*0Sstevel@tonic-gate 
2076*0Sstevel@tonic-gate 	vp = smp->sm_vp;
2077*0Sstevel@tonic-gate 	offset = smp->sm_off;
2078*0Sstevel@tonic-gate 
2079*0Sstevel@tonic-gate 	if (--smp->sm_refcnt == 0) {
2080*0Sstevel@tonic-gate 
2081*0Sstevel@tonic-gate 		if (is_kpm) {
2082*0Sstevel@tonic-gate 			smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2083*0Sstevel@tonic-gate 		}
2084*0Sstevel@tonic-gate 		if (flags & (SM_INVAL|SM_DESTROY)) {
2085*0Sstevel@tonic-gate 			segmap_hashout(smp);	/* remove map info */
2086*0Sstevel@tonic-gate 			if (is_kpm) {
2087*0Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), addr);
2088*0Sstevel@tonic-gate 				if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2089*0Sstevel@tonic-gate 					smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2090*0Sstevel@tonic-gate 					hat_unload(kas.a_hat, addr, MAXBSIZE,
2091*0Sstevel@tonic-gate 						HAT_UNLOAD);
2092*0Sstevel@tonic-gate 				}
2093*0Sstevel@tonic-gate 
2094*0Sstevel@tonic-gate 			} else {
2095*0Sstevel@tonic-gate 				if (segmap_kpm)
2096*0Sstevel@tonic-gate 					segkpm_mapout_validkpme(GET_KPME(smp));
2097*0Sstevel@tonic-gate 
2098*0Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2099*0Sstevel@tonic-gate 				hat_unload(kas.a_hat, addr, MAXBSIZE,
2100*0Sstevel@tonic-gate 					HAT_UNLOAD);
2101*0Sstevel@tonic-gate 			}
2102*0Sstevel@tonic-gate 		}
2103*0Sstevel@tonic-gate 		segmap_smapadd(smp);	/* add to free list */
2104*0Sstevel@tonic-gate 	}
2105*0Sstevel@tonic-gate 
2106*0Sstevel@tonic-gate 	mutex_exit(smtx);
2107*0Sstevel@tonic-gate 
2108*0Sstevel@tonic-gate 	if (is_kpm)
2109*0Sstevel@tonic-gate 		page_unlock(pp);
2110*0Sstevel@tonic-gate 	/*
2111*0Sstevel@tonic-gate 	 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2112*0Sstevel@tonic-gate 	 * are set.
2113*0Sstevel@tonic-gate 	 */
2114*0Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
2115*0Sstevel@tonic-gate 		error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2116*0Sstevel@tonic-gate 		    bflags, CRED());
2117*0Sstevel@tonic-gate 	} else {
2118*0Sstevel@tonic-gate 		error = 0;
2119*0Sstevel@tonic-gate 	}
2120*0Sstevel@tonic-gate 
2121*0Sstevel@tonic-gate 	return (error);
2122*0Sstevel@tonic-gate }
2123*0Sstevel@tonic-gate 
2124*0Sstevel@tonic-gate /*
2125*0Sstevel@tonic-gate  * Dump the pages belonging to this segmap segment.
2126*0Sstevel@tonic-gate  */
2127*0Sstevel@tonic-gate static void
2128*0Sstevel@tonic-gate segmap_dump(struct seg *seg)
2129*0Sstevel@tonic-gate {
2130*0Sstevel@tonic-gate 	struct segmap_data *smd;
2131*0Sstevel@tonic-gate 	struct smap *smp, *smp_end;
2132*0Sstevel@tonic-gate 	page_t *pp;
2133*0Sstevel@tonic-gate 	pfn_t pfn;
2134*0Sstevel@tonic-gate 	u_offset_t off;
2135*0Sstevel@tonic-gate 	caddr_t addr;
2136*0Sstevel@tonic-gate 
2137*0Sstevel@tonic-gate 	smd = (struct segmap_data *)seg->s_data;
2138*0Sstevel@tonic-gate 	addr = seg->s_base;
2139*0Sstevel@tonic-gate 	for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2140*0Sstevel@tonic-gate 	    smp < smp_end; smp++) {
2141*0Sstevel@tonic-gate 
2142*0Sstevel@tonic-gate 		if (smp->sm_refcnt) {
2143*0Sstevel@tonic-gate 			for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2144*0Sstevel@tonic-gate 				int we_own_it = 0;
2145*0Sstevel@tonic-gate 
2146*0Sstevel@tonic-gate 				/*
2147*0Sstevel@tonic-gate 				 * If pp == NULL, the page either does
2148*0Sstevel@tonic-gate 				 * not exist or is exclusively locked.
2149*0Sstevel@tonic-gate 				 * So determine if it exists before
2150*0Sstevel@tonic-gate 				 * searching for it.
2151*0Sstevel@tonic-gate 				 */
2152*0Sstevel@tonic-gate 				if ((pp = page_lookup_nowait(smp->sm_vp,
2153*0Sstevel@tonic-gate 				    smp->sm_off + off, SE_SHARED)))
2154*0Sstevel@tonic-gate 					we_own_it = 1;
2155*0Sstevel@tonic-gate 				else
2156*0Sstevel@tonic-gate 					pp = page_exists(smp->sm_vp,
2157*0Sstevel@tonic-gate 					    smp->sm_off + off);
2158*0Sstevel@tonic-gate 
2159*0Sstevel@tonic-gate 				if (pp) {
2160*0Sstevel@tonic-gate 					pfn = page_pptonum(pp);
2161*0Sstevel@tonic-gate 					dump_addpage(seg->s_as,
2162*0Sstevel@tonic-gate 						addr + off, pfn);
2163*0Sstevel@tonic-gate 					if (we_own_it)
2164*0Sstevel@tonic-gate 						page_unlock(pp);
2165*0Sstevel@tonic-gate 				}
2166*0Sstevel@tonic-gate 				dump_timeleft = dump_timeout;
2167*0Sstevel@tonic-gate 			}
2168*0Sstevel@tonic-gate 		}
2169*0Sstevel@tonic-gate 		addr += MAXBSIZE;
2170*0Sstevel@tonic-gate 	}
2171*0Sstevel@tonic-gate }
2172*0Sstevel@tonic-gate 
2173*0Sstevel@tonic-gate /*ARGSUSED*/
2174*0Sstevel@tonic-gate static int
2175*0Sstevel@tonic-gate segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2176*0Sstevel@tonic-gate     struct page ***ppp, enum lock_type type, enum seg_rw rw)
2177*0Sstevel@tonic-gate {
2178*0Sstevel@tonic-gate 	return (ENOTSUP);
2179*0Sstevel@tonic-gate }
2180*0Sstevel@tonic-gate 
2181*0Sstevel@tonic-gate static int
2182*0Sstevel@tonic-gate segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2183*0Sstevel@tonic-gate {
2184*0Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2185*0Sstevel@tonic-gate 
2186*0Sstevel@tonic-gate 	memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2187*0Sstevel@tonic-gate 	memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2188*0Sstevel@tonic-gate 	return (0);
2189*0Sstevel@tonic-gate }
2190*0Sstevel@tonic-gate 
2191*0Sstevel@tonic-gate /*ARGSUSED*/
2192*0Sstevel@tonic-gate static lgrp_mem_policy_info_t *
2193*0Sstevel@tonic-gate segmap_getpolicy(struct seg *seg, caddr_t addr)
2194*0Sstevel@tonic-gate {
2195*0Sstevel@tonic-gate 	return (NULL);
2196*0Sstevel@tonic-gate }
2197*0Sstevel@tonic-gate 
2198*0Sstevel@tonic-gate 
2199*0Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
2200*0Sstevel@tonic-gate 
2201*0Sstevel@tonic-gate /*
2202*0Sstevel@tonic-gate  * segkpm support routines
2203*0Sstevel@tonic-gate  */
2204*0Sstevel@tonic-gate 
2205*0Sstevel@tonic-gate static caddr_t
2206*0Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2207*0Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
2208*0Sstevel@tonic-gate {
2209*0Sstevel@tonic-gate 	caddr_t	base;
2210*0Sstevel@tonic-gate 	page_t	*pp;
2211*0Sstevel@tonic-gate 	int	newpage = 0;
2212*0Sstevel@tonic-gate 	struct kpme	*kpme;
2213*0Sstevel@tonic-gate 
2214*0Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
2215*0Sstevel@tonic-gate 
2216*0Sstevel@tonic-gate 	if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2217*0Sstevel@tonic-gate 		kmutex_t *smtx;
2218*0Sstevel@tonic-gate 
2219*0Sstevel@tonic-gate 		base = segkpm_create_va(off);
2220*0Sstevel@tonic-gate 
2221*0Sstevel@tonic-gate 		if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2222*0Sstevel@tonic-gate 		    seg, base)) == NULL) {
2223*0Sstevel@tonic-gate 			panic("segmap_pagecreate_kpm: "
2224*0Sstevel@tonic-gate 			    "page_create failed");
2225*0Sstevel@tonic-gate 			/*NOTREACHED*/
2226*0Sstevel@tonic-gate 		}
2227*0Sstevel@tonic-gate 
2228*0Sstevel@tonic-gate 		newpage = 1;
2229*0Sstevel@tonic-gate 		page_io_unlock(pp);
2230*0Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2231*0Sstevel@tonic-gate 
2232*0Sstevel@tonic-gate 		/*
2233*0Sstevel@tonic-gate 		 * Mark this here until the following segmap_pagecreate
2234*0Sstevel@tonic-gate 		 * or segmap_release.
2235*0Sstevel@tonic-gate 		 */
2236*0Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
2237*0Sstevel@tonic-gate 		mutex_enter(smtx);
2238*0Sstevel@tonic-gate 		smp->sm_flags |= SM_KPM_NEWPAGE;
2239*0Sstevel@tonic-gate 		mutex_exit(smtx);
2240*0Sstevel@tonic-gate 	}
2241*0Sstevel@tonic-gate 
2242*0Sstevel@tonic-gate 	kpme = GET_KPME(smp);
2243*0Sstevel@tonic-gate 	if (!newpage && kpme->kpe_page == pp)
2244*0Sstevel@tonic-gate 		base = hat_kpm_page2va(pp, 0);
2245*0Sstevel@tonic-gate 	else
2246*0Sstevel@tonic-gate 		base = hat_kpm_mapin(pp, kpme);
2247*0Sstevel@tonic-gate 
2248*0Sstevel@tonic-gate 	/*
2249*0Sstevel@tonic-gate 	 * FS code may decide not to call segmap_pagecreate and we
2250*0Sstevel@tonic-gate 	 * don't invoke segmap_fault via TLB miss, so we have to set
2251*0Sstevel@tonic-gate 	 * ref and mod bits in advance.
2252*0Sstevel@tonic-gate 	 */
2253*0Sstevel@tonic-gate 	if (rw == S_WRITE) {
2254*0Sstevel@tonic-gate 		hat_setrefmod(pp);
2255*0Sstevel@tonic-gate 	} else {
2256*0Sstevel@tonic-gate 		ASSERT(rw == S_READ);
2257*0Sstevel@tonic-gate 		hat_setref(pp);
2258*0Sstevel@tonic-gate 	}
2259*0Sstevel@tonic-gate 
2260*0Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2261*0Sstevel@tonic-gate 
2262*0Sstevel@tonic-gate 	return (base);
2263*0Sstevel@tonic-gate }
2264*0Sstevel@tonic-gate 
2265*0Sstevel@tonic-gate /*
2266*0Sstevel@tonic-gate  * Find the smap structure corresponding to the
2267*0Sstevel@tonic-gate  * KPM addr and return it locked.
2268*0Sstevel@tonic-gate  */
2269*0Sstevel@tonic-gate struct smap *
2270*0Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
2271*0Sstevel@tonic-gate {
2272*0Sstevel@tonic-gate 	struct smap	*smp;
2273*0Sstevel@tonic-gate 	struct vnode	*vp;
2274*0Sstevel@tonic-gate 	u_offset_t	offset;
2275*0Sstevel@tonic-gate 	caddr_t		baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2276*0Sstevel@tonic-gate 	int		hashid;
2277*0Sstevel@tonic-gate 	kmutex_t	*hashmtx;
2278*0Sstevel@tonic-gate 	page_t		*pp;
2279*0Sstevel@tonic-gate 	union segmap_cpu *scpu;
2280*0Sstevel@tonic-gate 
2281*0Sstevel@tonic-gate 	pp = hat_kpm_vaddr2page(baseaddr);
2282*0Sstevel@tonic-gate 
2283*0Sstevel@tonic-gate 	ASSERT(pp && !PP_ISFREE(pp));
2284*0Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
2285*0Sstevel@tonic-gate 	ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2286*0Sstevel@tonic-gate 
2287*0Sstevel@tonic-gate 	vp = pp->p_vnode;
2288*0Sstevel@tonic-gate 	offset = pp->p_offset;
2289*0Sstevel@tonic-gate 	ASSERT(vp != NULL);
2290*0Sstevel@tonic-gate 
2291*0Sstevel@tonic-gate 	/*
2292*0Sstevel@tonic-gate 	 * Assume the last smap used on this cpu is the one needed.
2293*0Sstevel@tonic-gate 	 */
2294*0Sstevel@tonic-gate 	scpu = smd_cpu+CPU->cpu_seqid;
2295*0Sstevel@tonic-gate 	smp = scpu->scpu.scpu_last_smap;
2296*0Sstevel@tonic-gate 	mutex_enter(&smp->sm_mtx);
2297*0Sstevel@tonic-gate 	if (smp->sm_vp == vp && smp->sm_off == offset) {
2298*0Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
2299*0Sstevel@tonic-gate 	} else {
2300*0Sstevel@tonic-gate 		/*
2301*0Sstevel@tonic-gate 		 * Assumption wrong, find the smap on the hash chain.
2302*0Sstevel@tonic-gate 		 */
2303*0Sstevel@tonic-gate 		mutex_exit(&smp->sm_mtx);
2304*0Sstevel@tonic-gate 		SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2305*0Sstevel@tonic-gate 		hashmtx = SHASHMTX(hashid);
2306*0Sstevel@tonic-gate 
2307*0Sstevel@tonic-gate 		mutex_enter(hashmtx);
2308*0Sstevel@tonic-gate 		smp = smd_hash[hashid].sh_hash_list;
2309*0Sstevel@tonic-gate 		for (; smp != NULL; smp = smp->sm_hash) {
2310*0Sstevel@tonic-gate 			if (smp->sm_vp == vp && smp->sm_off == offset)
2311*0Sstevel@tonic-gate 				break;
2312*0Sstevel@tonic-gate 		}
2313*0Sstevel@tonic-gate 		mutex_exit(hashmtx);
2314*0Sstevel@tonic-gate 		if (smp) {
2315*0Sstevel@tonic-gate 			mutex_enter(&smp->sm_mtx);
2316*0Sstevel@tonic-gate 			ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2317*0Sstevel@tonic-gate 		}
2318*0Sstevel@tonic-gate 	}
2319*0Sstevel@tonic-gate 
2320*0Sstevel@tonic-gate 	if (ppp)
2321*0Sstevel@tonic-gate 		*ppp = smp ? pp : NULL;
2322*0Sstevel@tonic-gate 
2323*0Sstevel@tonic-gate 	return (smp);
2324*0Sstevel@tonic-gate }
2325*0Sstevel@tonic-gate 
2326*0Sstevel@tonic-gate #else	/* SEGKPM_SUPPORT */
2327*0Sstevel@tonic-gate 
2328*0Sstevel@tonic-gate /* segkpm stubs */
2329*0Sstevel@tonic-gate 
2330*0Sstevel@tonic-gate /*ARGSUSED*/
2331*0Sstevel@tonic-gate static caddr_t
2332*0Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2333*0Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
2334*0Sstevel@tonic-gate {
2335*0Sstevel@tonic-gate 	return (NULL);
2336*0Sstevel@tonic-gate }
2337*0Sstevel@tonic-gate 
2338*0Sstevel@tonic-gate /*ARGSUSED*/
2339*0Sstevel@tonic-gate struct smap *
2340*0Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
2341*0Sstevel@tonic-gate {
2342*0Sstevel@tonic-gate 	return (NULL);
2343*0Sstevel@tonic-gate }
2344*0Sstevel@tonic-gate 
2345*0Sstevel@tonic-gate #endif	/* SEGKPM_SUPPORT */
2346