xref: /onnv-gate/usr/src/uts/common/vm/seg_map.c (revision 10010)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
51841Spraks  * Common Development and Distribution License (the "License").
61841Spraks  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
229281SPrakash.Sangappa@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
310Sstevel@tonic-gate  * under license from the Regents of the University of California.
320Sstevel@tonic-gate  */
330Sstevel@tonic-gate 
340Sstevel@tonic-gate /*
350Sstevel@tonic-gate  * VM - generic vnode mapping segment.
360Sstevel@tonic-gate  *
370Sstevel@tonic-gate  * The segmap driver is used only by the kernel to get faster (than seg_vn)
380Sstevel@tonic-gate  * mappings [lower routine overhead; more persistent cache] to random
390Sstevel@tonic-gate  * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
400Sstevel@tonic-gate  */
410Sstevel@tonic-gate 
420Sstevel@tonic-gate #include <sys/types.h>
430Sstevel@tonic-gate #include <sys/t_lock.h>
440Sstevel@tonic-gate #include <sys/param.h>
450Sstevel@tonic-gate #include <sys/sysmacros.h>
460Sstevel@tonic-gate #include <sys/buf.h>
470Sstevel@tonic-gate #include <sys/systm.h>
480Sstevel@tonic-gate #include <sys/vnode.h>
490Sstevel@tonic-gate #include <sys/mman.h>
500Sstevel@tonic-gate #include <sys/errno.h>
510Sstevel@tonic-gate #include <sys/cred.h>
520Sstevel@tonic-gate #include <sys/kmem.h>
530Sstevel@tonic-gate #include <sys/vtrace.h>
540Sstevel@tonic-gate #include <sys/cmn_err.h>
550Sstevel@tonic-gate #include <sys/debug.h>
560Sstevel@tonic-gate #include <sys/thread.h>
570Sstevel@tonic-gate #include <sys/dumphdr.h>
580Sstevel@tonic-gate #include <sys/bitmap.h>
590Sstevel@tonic-gate #include <sys/lgrp.h>
600Sstevel@tonic-gate 
610Sstevel@tonic-gate #include <vm/seg_kmem.h>
620Sstevel@tonic-gate #include <vm/hat.h>
630Sstevel@tonic-gate #include <vm/as.h>
640Sstevel@tonic-gate #include <vm/seg.h>
650Sstevel@tonic-gate #include <vm/seg_kpm.h>
660Sstevel@tonic-gate #include <vm/seg_map.h>
670Sstevel@tonic-gate #include <vm/page.h>
680Sstevel@tonic-gate #include <vm/pvn.h>
690Sstevel@tonic-gate #include <vm/rm.h>
700Sstevel@tonic-gate 
710Sstevel@tonic-gate /*
720Sstevel@tonic-gate  * Private seg op routines.
730Sstevel@tonic-gate  */
740Sstevel@tonic-gate static void	segmap_free(struct seg *seg);
750Sstevel@tonic-gate faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
760Sstevel@tonic-gate 			size_t len, enum fault_type type, enum seg_rw rw);
770Sstevel@tonic-gate static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
780Sstevel@tonic-gate static int	segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
790Sstevel@tonic-gate 			uint_t prot);
800Sstevel@tonic-gate static int	segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
810Sstevel@tonic-gate static int	segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
820Sstevel@tonic-gate 			uint_t *protv);
830Sstevel@tonic-gate static u_offset_t	segmap_getoffset(struct seg *seg, caddr_t addr);
840Sstevel@tonic-gate static int	segmap_gettype(struct seg *seg, caddr_t addr);
850Sstevel@tonic-gate static int	segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
860Sstevel@tonic-gate static void	segmap_dump(struct seg *seg);
870Sstevel@tonic-gate static int	segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
880Sstevel@tonic-gate 			struct page ***ppp, enum lock_type type,
890Sstevel@tonic-gate 			enum seg_rw rw);
900Sstevel@tonic-gate static void	segmap_badop(void);
910Sstevel@tonic-gate static int	segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
920Sstevel@tonic-gate static lgrp_mem_policy_info_t	*segmap_getpolicy(struct seg *seg,
930Sstevel@tonic-gate     caddr_t addr);
94670Selowe static int	segmap_capable(struct seg *seg, segcapability_t capability);
950Sstevel@tonic-gate 
960Sstevel@tonic-gate /* segkpm support */
970Sstevel@tonic-gate static caddr_t	segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
980Sstevel@tonic-gate 			struct smap *, enum seg_rw);
990Sstevel@tonic-gate struct smap	*get_smap_kpm(caddr_t, page_t **);
1000Sstevel@tonic-gate 
1010Sstevel@tonic-gate #define	SEGMAP_BADOP(t)	(t(*)())segmap_badop
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate static struct seg_ops segmap_ops = {
1040Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* dup */
1050Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* unmap */
1060Sstevel@tonic-gate 	segmap_free,
1070Sstevel@tonic-gate 	segmap_fault,
1080Sstevel@tonic-gate 	segmap_faulta,
1090Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setprot */
1100Sstevel@tonic-gate 	segmap_checkprot,
1110Sstevel@tonic-gate 	segmap_kluster,
1120Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* swapout */
1130Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* sync */
1140Sstevel@tonic-gate 	SEGMAP_BADOP(size_t),	/* incore */
1150Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* lockop */
1160Sstevel@tonic-gate 	segmap_getprot,
1170Sstevel@tonic-gate 	segmap_getoffset,
1180Sstevel@tonic-gate 	segmap_gettype,
1190Sstevel@tonic-gate 	segmap_getvp,
1200Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* advise */
1210Sstevel@tonic-gate 	segmap_dump,
1220Sstevel@tonic-gate 	segmap_pagelock,	/* pagelock */
1230Sstevel@tonic-gate 	SEGMAP_BADOP(int),	/* setpgsz */
1240Sstevel@tonic-gate 	segmap_getmemid,	/* getmemid */
1250Sstevel@tonic-gate 	segmap_getpolicy,	/* getpolicy */
126670Selowe 	segmap_capable,		/* capable */
1270Sstevel@tonic-gate };
1280Sstevel@tonic-gate 
1290Sstevel@tonic-gate /*
1300Sstevel@tonic-gate  * Private segmap routines.
1310Sstevel@tonic-gate  */
1320Sstevel@tonic-gate static void	segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
1330Sstevel@tonic-gate 			size_t len, enum seg_rw rw, struct smap *smp);
1340Sstevel@tonic-gate static void	segmap_smapadd(struct smap *smp);
1350Sstevel@tonic-gate static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
1360Sstevel@tonic-gate 			u_offset_t off, int hashid);
1370Sstevel@tonic-gate static void	segmap_hashout(struct smap *smp);
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 
1400Sstevel@tonic-gate /*
1410Sstevel@tonic-gate  * Statistics for segmap operations.
1420Sstevel@tonic-gate  *
1430Sstevel@tonic-gate  * No explicit locking to protect these stats.
1440Sstevel@tonic-gate  */
1450Sstevel@tonic-gate struct segmapcnt segmapcnt = {
1460Sstevel@tonic-gate 	{ "fault",		KSTAT_DATA_ULONG },
1470Sstevel@tonic-gate 	{ "faulta",		KSTAT_DATA_ULONG },
1480Sstevel@tonic-gate 	{ "getmap",		KSTAT_DATA_ULONG },
1490Sstevel@tonic-gate 	{ "get_use",		KSTAT_DATA_ULONG },
1500Sstevel@tonic-gate 	{ "get_reclaim",	KSTAT_DATA_ULONG },
1510Sstevel@tonic-gate 	{ "get_reuse",		KSTAT_DATA_ULONG },
1520Sstevel@tonic-gate 	{ "get_unused",		KSTAT_DATA_ULONG },
1530Sstevel@tonic-gate 	{ "get_nofree",		KSTAT_DATA_ULONG },
1540Sstevel@tonic-gate 	{ "rel_async",		KSTAT_DATA_ULONG },
1550Sstevel@tonic-gate 	{ "rel_write",		KSTAT_DATA_ULONG },
1560Sstevel@tonic-gate 	{ "rel_free",		KSTAT_DATA_ULONG },
1570Sstevel@tonic-gate 	{ "rel_abort",		KSTAT_DATA_ULONG },
1580Sstevel@tonic-gate 	{ "rel_dontneed",	KSTAT_DATA_ULONG },
1590Sstevel@tonic-gate 	{ "release",		KSTAT_DATA_ULONG },
1600Sstevel@tonic-gate 	{ "pagecreate",		KSTAT_DATA_ULONG },
1610Sstevel@tonic-gate 	{ "free_notfree",	KSTAT_DATA_ULONG },
1620Sstevel@tonic-gate 	{ "free_dirty",		KSTAT_DATA_ULONG },
1630Sstevel@tonic-gate 	{ "free",		KSTAT_DATA_ULONG },
1640Sstevel@tonic-gate 	{ "stolen",		KSTAT_DATA_ULONG },
1650Sstevel@tonic-gate 	{ "get_nomtx",		KSTAT_DATA_ULONG }
1660Sstevel@tonic-gate };
1670Sstevel@tonic-gate 
1680Sstevel@tonic-gate kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
1690Sstevel@tonic-gate uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
1700Sstevel@tonic-gate 
1710Sstevel@tonic-gate /*
1720Sstevel@tonic-gate  * Return number of map pages in segment.
1730Sstevel@tonic-gate  */
1740Sstevel@tonic-gate #define	MAP_PAGES(seg)		((seg)->s_size >> MAXBSHIFT)
1750Sstevel@tonic-gate 
1760Sstevel@tonic-gate /*
1770Sstevel@tonic-gate  * Translate addr into smap number within segment.
1780Sstevel@tonic-gate  */
1790Sstevel@tonic-gate #define	MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
1800Sstevel@tonic-gate 
1810Sstevel@tonic-gate /*
1820Sstevel@tonic-gate  * Translate addr in seg into struct smap pointer.
1830Sstevel@tonic-gate  */
1840Sstevel@tonic-gate #define	GET_SMAP(seg, addr)	\
1850Sstevel@tonic-gate 	&(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
1860Sstevel@tonic-gate 
1870Sstevel@tonic-gate /*
1880Sstevel@tonic-gate  * Bit in map (16 bit bitmap).
1890Sstevel@tonic-gate  */
1900Sstevel@tonic-gate #define	SMAP_BIT_MASK(bitindex)	(1 << ((bitindex) & 0xf))
1910Sstevel@tonic-gate 
1920Sstevel@tonic-gate static int smd_colormsk = 0;
1930Sstevel@tonic-gate static int smd_ncolor = 0;
1940Sstevel@tonic-gate static int smd_nfree = 0;
1950Sstevel@tonic-gate static int smd_freemsk = 0;
1960Sstevel@tonic-gate #ifdef DEBUG
1970Sstevel@tonic-gate static int *colors_used;
1980Sstevel@tonic-gate #endif
1990Sstevel@tonic-gate static struct smap *smd_smap;
2000Sstevel@tonic-gate static struct smaphash *smd_hash;
2010Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
2020Sstevel@tonic-gate static unsigned int *smd_hash_len;
2030Sstevel@tonic-gate #endif
2040Sstevel@tonic-gate static struct smfree *smd_free;
2050Sstevel@tonic-gate static ulong_t smd_hashmsk = 0;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate #define	SEGMAP_MAXCOLOR		2
2080Sstevel@tonic-gate #define	SEGMAP_CACHE_PAD	64
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate union segmap_cpu {
2110Sstevel@tonic-gate 	struct {
2120Sstevel@tonic-gate 		uint32_t	scpu_free_ndx[SEGMAP_MAXCOLOR];
2130Sstevel@tonic-gate 		struct smap	*scpu_last_smap;
2140Sstevel@tonic-gate 		ulong_t		scpu_getmap;
2150Sstevel@tonic-gate 		ulong_t		scpu_release;
2160Sstevel@tonic-gate 		ulong_t		scpu_get_reclaim;
2170Sstevel@tonic-gate 		ulong_t		scpu_fault;
2180Sstevel@tonic-gate 		ulong_t		scpu_pagecreate;
2190Sstevel@tonic-gate 		ulong_t		scpu_get_reuse;
2200Sstevel@tonic-gate 	} scpu;
2210Sstevel@tonic-gate 	char	scpu_pad[SEGMAP_CACHE_PAD];
2220Sstevel@tonic-gate };
2230Sstevel@tonic-gate static union segmap_cpu *smd_cpu;
2240Sstevel@tonic-gate 
2250Sstevel@tonic-gate /*
2260Sstevel@tonic-gate  * There are three locks in seg_map:
2270Sstevel@tonic-gate  *	- per freelist mutexes
2280Sstevel@tonic-gate  *	- per hashchain mutexes
2290Sstevel@tonic-gate  *	- per smap mutexes
2300Sstevel@tonic-gate  *
2310Sstevel@tonic-gate  * The lock ordering is to get the smap mutex to lock down the slot
2320Sstevel@tonic-gate  * first then the hash lock (for hash in/out (vp, off) list) or the
2330Sstevel@tonic-gate  * freelist lock to put the slot back on the free list.
2340Sstevel@tonic-gate  *
2350Sstevel@tonic-gate  * The hash search is done by only holding the hashchain lock, when a wanted
2360Sstevel@tonic-gate  * slot is found, we drop the hashchain lock then lock the slot so there
2370Sstevel@tonic-gate  * is no overlapping of hashchain and smap locks. After the slot is
2380Sstevel@tonic-gate  * locked, we verify again if the slot is still what we are looking
2390Sstevel@tonic-gate  * for.
2400Sstevel@tonic-gate  *
2410Sstevel@tonic-gate  * Allocation of a free slot is done by holding the freelist lock,
2420Sstevel@tonic-gate  * then locking the smap slot at the head of the freelist. This is
2430Sstevel@tonic-gate  * in reversed lock order so mutex_tryenter() is used.
2440Sstevel@tonic-gate  *
2450Sstevel@tonic-gate  * The smap lock protects all fields in smap structure except for
2460Sstevel@tonic-gate  * the link fields for hash/free lists which are protected by
2470Sstevel@tonic-gate  * hashchain and freelist locks.
2480Sstevel@tonic-gate  */
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate #define	SHASHMTX(hashid)	(&smd_hash[hashid].sh_mtx)
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate #define	SMP2SMF(smp)		(&smd_free[(smp - smd_smap) & smd_freemsk])
2530Sstevel@tonic-gate #define	SMP2SMF_NDX(smp)	(ushort_t)((smp - smd_smap) & smd_freemsk)
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate #define	SMAPMTX(smp) (&smp->sm_mtx)
2560Sstevel@tonic-gate 
2570Sstevel@tonic-gate #define	SMAP_HASHFUNC(vp, off, hashid) \
2580Sstevel@tonic-gate 	{ \
2590Sstevel@tonic-gate 	hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
2600Sstevel@tonic-gate 		((off) >> MAXBSHIFT)) & smd_hashmsk); \
2610Sstevel@tonic-gate 	}
2620Sstevel@tonic-gate 
2630Sstevel@tonic-gate /*
2640Sstevel@tonic-gate  * The most frequently updated kstat counters are kept in the
2650Sstevel@tonic-gate  * per cpu array to avoid hot cache blocks. The update function
2660Sstevel@tonic-gate  * sums the cpu local counters to update the global counters.
2670Sstevel@tonic-gate  */
2680Sstevel@tonic-gate 
2690Sstevel@tonic-gate /* ARGSUSED */
2700Sstevel@tonic-gate int
2710Sstevel@tonic-gate segmap_kstat_update(kstat_t *ksp, int rw)
2720Sstevel@tonic-gate {
2730Sstevel@tonic-gate 	int i;
2740Sstevel@tonic-gate 	ulong_t	getmap, release, get_reclaim;
2750Sstevel@tonic-gate 	ulong_t	fault, pagecreate, get_reuse;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
2780Sstevel@tonic-gate 		return (EACCES);
2790Sstevel@tonic-gate 	getmap = release = get_reclaim = (ulong_t)0;
2800Sstevel@tonic-gate 	fault = pagecreate = get_reuse = (ulong_t)0;
2810Sstevel@tonic-gate 	for (i = 0; i < max_ncpus; i++) {
2820Sstevel@tonic-gate 		getmap += smd_cpu[i].scpu.scpu_getmap;
2830Sstevel@tonic-gate 		release  += smd_cpu[i].scpu.scpu_release;
2840Sstevel@tonic-gate 		get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
2850Sstevel@tonic-gate 		fault  += smd_cpu[i].scpu.scpu_fault;
2860Sstevel@tonic-gate 		pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
2870Sstevel@tonic-gate 		get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
2880Sstevel@tonic-gate 	}
2890Sstevel@tonic-gate 	segmapcnt.smp_getmap.value.ul = getmap;
2900Sstevel@tonic-gate 	segmapcnt.smp_release.value.ul = release;
2910Sstevel@tonic-gate 	segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
2920Sstevel@tonic-gate 	segmapcnt.smp_fault.value.ul = fault;
2930Sstevel@tonic-gate 	segmapcnt.smp_pagecreate.value.ul = pagecreate;
2940Sstevel@tonic-gate 	segmapcnt.smp_get_reuse.value.ul = get_reuse;
2950Sstevel@tonic-gate 	return (0);
2960Sstevel@tonic-gate }
2970Sstevel@tonic-gate 
2980Sstevel@tonic-gate int
2990Sstevel@tonic-gate segmap_create(struct seg *seg, void *argsp)
3000Sstevel@tonic-gate {
3010Sstevel@tonic-gate 	struct segmap_data *smd;
3020Sstevel@tonic-gate 	struct smap *smp;
3030Sstevel@tonic-gate 	struct smfree *sm;
3040Sstevel@tonic-gate 	struct segmap_crargs *a = (struct segmap_crargs *)argsp;
3050Sstevel@tonic-gate 	struct smaphash *shashp;
3060Sstevel@tonic-gate 	union segmap_cpu *scpu;
3070Sstevel@tonic-gate 	long i, npages;
3080Sstevel@tonic-gate 	size_t hashsz;
3090Sstevel@tonic-gate 	uint_t nfreelist;
3100Sstevel@tonic-gate 	extern void prefetch_smap_w(void *);
3110Sstevel@tonic-gate 	extern int max_ncpus;
3120Sstevel@tonic-gate 
3130Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
3140Sstevel@tonic-gate 
3150Sstevel@tonic-gate 	if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
3160Sstevel@tonic-gate 		panic("segkmap not MAXBSIZE aligned");
3170Sstevel@tonic-gate 		/*NOTREACHED*/
3180Sstevel@tonic-gate 	}
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
3210Sstevel@tonic-gate 
3220Sstevel@tonic-gate 	seg->s_data = (void *)smd;
3230Sstevel@tonic-gate 	seg->s_ops = &segmap_ops;
3240Sstevel@tonic-gate 	smd->smd_prot = a->prot;
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	/*
3270Sstevel@tonic-gate 	 * Scale the number of smap freelists to be
3280Sstevel@tonic-gate 	 * proportional to max_ncpus * number of virtual colors.
3290Sstevel@tonic-gate 	 * The caller can over-ride this scaling by providing
3300Sstevel@tonic-gate 	 * a non-zero a->nfreelist argument.
3310Sstevel@tonic-gate 	 */
3320Sstevel@tonic-gate 	nfreelist = a->nfreelist;
3330Sstevel@tonic-gate 	if (nfreelist == 0)
3340Sstevel@tonic-gate 		nfreelist = max_ncpus;
3350Sstevel@tonic-gate 	else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
3360Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
3370Sstevel@tonic-gate 		"%d, using %d", nfreelist, max_ncpus);
3380Sstevel@tonic-gate 		nfreelist = max_ncpus;
3390Sstevel@tonic-gate 	}
3400Sstevel@tonic-gate 	if (nfreelist & (nfreelist - 1)) {
3410Sstevel@tonic-gate 		/* round up nfreelist to the next power of two. */
3420Sstevel@tonic-gate 		nfreelist = 1 << (highbit(nfreelist));
3430Sstevel@tonic-gate 	}
3440Sstevel@tonic-gate 
3450Sstevel@tonic-gate 	/*
3460Sstevel@tonic-gate 	 * Get the number of virtual colors - must be a power of 2.
3470Sstevel@tonic-gate 	 */
3480Sstevel@tonic-gate 	if (a->shmsize)
3490Sstevel@tonic-gate 		smd_ncolor = a->shmsize >> MAXBSHIFT;
3500Sstevel@tonic-gate 	else
3510Sstevel@tonic-gate 		smd_ncolor = 1;
3520Sstevel@tonic-gate 	ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
3530Sstevel@tonic-gate 	ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
3540Sstevel@tonic-gate 	smd_colormsk = smd_ncolor - 1;
3550Sstevel@tonic-gate 	smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
3560Sstevel@tonic-gate 	smd_freemsk = smd_nfree - 1;
3570Sstevel@tonic-gate 
3580Sstevel@tonic-gate 	/*
3590Sstevel@tonic-gate 	 * Allocate and initialize the freelist headers.
3600Sstevel@tonic-gate 	 * Note that sm_freeq[1] starts out as the release queue. This
3610Sstevel@tonic-gate 	 * is known when the smap structures are initialized below.
3620Sstevel@tonic-gate 	 */
3630Sstevel@tonic-gate 	smd_free = smd->smd_free =
3640Sstevel@tonic-gate 	    kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
3650Sstevel@tonic-gate 	for (i = 0; i < smd_nfree; i++) {
3660Sstevel@tonic-gate 		sm = &smd->smd_free[i];
3670Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
3680Sstevel@tonic-gate 		mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
3690Sstevel@tonic-gate 		sm->sm_allocq = &sm->sm_freeq[0];
3700Sstevel@tonic-gate 		sm->sm_releq = &sm->sm_freeq[1];
3710Sstevel@tonic-gate 	}
3720Sstevel@tonic-gate 
3730Sstevel@tonic-gate 	/*
3740Sstevel@tonic-gate 	 * Allocate and initialize the smap hash chain headers.
3750Sstevel@tonic-gate 	 * Compute hash size rounding down to the next power of two.
3760Sstevel@tonic-gate 	 */
3770Sstevel@tonic-gate 	npages = MAP_PAGES(seg);
3780Sstevel@tonic-gate 	smd->smd_npages = npages;
3790Sstevel@tonic-gate 	hashsz = npages / SMAP_HASHAVELEN;
3800Sstevel@tonic-gate 	hashsz = 1 << (highbit(hashsz)-1);
3810Sstevel@tonic-gate 	smd_hashmsk = hashsz - 1;
3820Sstevel@tonic-gate 	smd_hash = smd->smd_hash =
3830Sstevel@tonic-gate 	    kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
3840Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
3850Sstevel@tonic-gate 	smd_hash_len =
3860Sstevel@tonic-gate 	    kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
3870Sstevel@tonic-gate #endif
3880Sstevel@tonic-gate 	for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
3890Sstevel@tonic-gate 		shashp->sh_hash_list = NULL;
3900Sstevel@tonic-gate 		mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
3910Sstevel@tonic-gate 	}
3920Sstevel@tonic-gate 
3930Sstevel@tonic-gate 	/*
3940Sstevel@tonic-gate 	 * Allocate and initialize the smap structures.
3950Sstevel@tonic-gate 	 * Link all slots onto the appropriate freelist.
3960Sstevel@tonic-gate 	 * The smap array is large enough to affect boot time
3970Sstevel@tonic-gate 	 * on large systems, so use memory prefetching and only
3980Sstevel@tonic-gate 	 * go through the array 1 time. Inline a optimized version
3990Sstevel@tonic-gate 	 * of segmap_smapadd to add structures to freelists with
4000Sstevel@tonic-gate 	 * knowledge that no locks are needed here.
4010Sstevel@tonic-gate 	 */
4020Sstevel@tonic-gate 	smd_smap = smd->smd_sm =
4039281SPrakash.Sangappa@Sun.COM 	    kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
4060Sstevel@tonic-gate 	    smp >= smd->smd_sm; smp--) {
4070Sstevel@tonic-gate 		struct smap *smpfreelist;
4080Sstevel@tonic-gate 		struct sm_freeq *releq;
4090Sstevel@tonic-gate 
4100Sstevel@tonic-gate 		prefetch_smap_w((char *)smp);
4110Sstevel@tonic-gate 
4120Sstevel@tonic-gate 		smp->sm_vp = NULL;
4130Sstevel@tonic-gate 		smp->sm_hash = NULL;
4140Sstevel@tonic-gate 		smp->sm_off = 0;
4150Sstevel@tonic-gate 		smp->sm_bitmap = 0;
4160Sstevel@tonic-gate 		smp->sm_refcnt = 0;
4170Sstevel@tonic-gate 		mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
4180Sstevel@tonic-gate 		smp->sm_free_ndx = SMP2SMF_NDX(smp);
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 		sm = SMP2SMF(smp);
4210Sstevel@tonic-gate 		releq = sm->sm_releq;
4220Sstevel@tonic-gate 
4230Sstevel@tonic-gate 		smpfreelist = releq->smq_free;
4240Sstevel@tonic-gate 		if (smpfreelist == 0) {
4250Sstevel@tonic-gate 			releq->smq_free = smp->sm_next = smp->sm_prev = smp;
4260Sstevel@tonic-gate 		} else {
4270Sstevel@tonic-gate 			smp->sm_next = smpfreelist;
4280Sstevel@tonic-gate 			smp->sm_prev = smpfreelist->sm_prev;
4290Sstevel@tonic-gate 			smpfreelist->sm_prev = smp;
4300Sstevel@tonic-gate 			smp->sm_prev->sm_next = smp;
4310Sstevel@tonic-gate 			releq->smq_free = smp->sm_next;
4320Sstevel@tonic-gate 		}
4330Sstevel@tonic-gate 
4340Sstevel@tonic-gate 		/*
4350Sstevel@tonic-gate 		 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
4360Sstevel@tonic-gate 		 */
4370Sstevel@tonic-gate 		smp->sm_flags = 0;
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
4400Sstevel@tonic-gate 		/*
4410Sstevel@tonic-gate 		 * Due to the fragile prefetch loop no
4420Sstevel@tonic-gate 		 * separate function is used here.
4430Sstevel@tonic-gate 		 */
4440Sstevel@tonic-gate 		smp->sm_kpme_next = NULL;
4450Sstevel@tonic-gate 		smp->sm_kpme_prev = NULL;
4460Sstevel@tonic-gate 		smp->sm_kpme_page = NULL;
4470Sstevel@tonic-gate #endif
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	/*
4510Sstevel@tonic-gate 	 * Allocate the per color indices that distribute allocation
4520Sstevel@tonic-gate 	 * requests over the free lists. Each cpu will have a private
4530Sstevel@tonic-gate 	 * rotor index to spread the allocations even across the available
4540Sstevel@tonic-gate 	 * smap freelists. Init the scpu_last_smap field to the first
4550Sstevel@tonic-gate 	 * smap element so there is no need to check for NULL.
4560Sstevel@tonic-gate 	 */
4570Sstevel@tonic-gate 	smd_cpu =
4589281SPrakash.Sangappa@Sun.COM 	    kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
4590Sstevel@tonic-gate 	for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
4600Sstevel@tonic-gate 		int j;
4610Sstevel@tonic-gate 		for (j = 0; j < smd_ncolor; j++)
4620Sstevel@tonic-gate 			scpu->scpu.scpu_free_ndx[j] = j;
4630Sstevel@tonic-gate 		scpu->scpu.scpu_last_smap = smd_smap;
4640Sstevel@tonic-gate 	}
4650Sstevel@tonic-gate 
4669281SPrakash.Sangappa@Sun.COM 	vpm_init();
4671841Spraks 
4680Sstevel@tonic-gate #ifdef DEBUG
4690Sstevel@tonic-gate 	/*
4700Sstevel@tonic-gate 	 * Keep track of which colors are used more often.
4710Sstevel@tonic-gate 	 */
4720Sstevel@tonic-gate 	colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
4730Sstevel@tonic-gate #endif /* DEBUG */
4740Sstevel@tonic-gate 
4750Sstevel@tonic-gate 	return (0);
4760Sstevel@tonic-gate }
4770Sstevel@tonic-gate 
4780Sstevel@tonic-gate static void
4790Sstevel@tonic-gate segmap_free(seg)
4800Sstevel@tonic-gate 	struct seg *seg;
4810Sstevel@tonic-gate {
4820Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate  * Do a F_SOFTUNLOCK call over the range requested.
4870Sstevel@tonic-gate  * The range must have already been F_SOFTLOCK'ed.
4880Sstevel@tonic-gate  */
4890Sstevel@tonic-gate static void
4900Sstevel@tonic-gate segmap_unlock(
4910Sstevel@tonic-gate 	struct hat *hat,
4920Sstevel@tonic-gate 	struct seg *seg,
4930Sstevel@tonic-gate 	caddr_t addr,
4940Sstevel@tonic-gate 	size_t len,
4950Sstevel@tonic-gate 	enum seg_rw rw,
4960Sstevel@tonic-gate 	struct smap *smp)
4970Sstevel@tonic-gate {
4980Sstevel@tonic-gate 	page_t *pp;
4990Sstevel@tonic-gate 	caddr_t adr;
5000Sstevel@tonic-gate 	u_offset_t off;
5010Sstevel@tonic-gate 	struct vnode *vp;
5020Sstevel@tonic-gate 	kmutex_t *smtx;
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
5050Sstevel@tonic-gate 
5060Sstevel@tonic-gate #ifdef lint
5070Sstevel@tonic-gate 	seg = seg;
5080Sstevel@tonic-gate #endif
5090Sstevel@tonic-gate 
5100Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
5110Sstevel@tonic-gate 
5120Sstevel@tonic-gate 		/*
5130Sstevel@tonic-gate 		 * We're called only from segmap_fault and this was a
5140Sstevel@tonic-gate 		 * NOP in case of a kpm based smap, so dangerous things
5150Sstevel@tonic-gate 		 * must have happened in the meantime. Pages are prefaulted
5160Sstevel@tonic-gate 		 * and locked in segmap_getmapflt and they will not be
5170Sstevel@tonic-gate 		 * unlocked until segmap_release.
5180Sstevel@tonic-gate 		 */
5190Sstevel@tonic-gate 		panic("segmap_unlock: called with kpm addr %p", (void *)addr);
5200Sstevel@tonic-gate 		/*NOTREACHED*/
5210Sstevel@tonic-gate 	}
5220Sstevel@tonic-gate 
5230Sstevel@tonic-gate 	vp = smp->sm_vp;
5240Sstevel@tonic-gate 	off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 	hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
5270Sstevel@tonic-gate 	for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
5280Sstevel@tonic-gate 		ushort_t bitmask;
5290Sstevel@tonic-gate 
5300Sstevel@tonic-gate 		/*
5310Sstevel@tonic-gate 		 * Use page_find() instead of page_lookup() to
5320Sstevel@tonic-gate 		 * find the page since we know that it has
5330Sstevel@tonic-gate 		 * "shared" lock.
5340Sstevel@tonic-gate 		 */
5350Sstevel@tonic-gate 		pp = page_find(vp, off);
5360Sstevel@tonic-gate 		if (pp == NULL) {
5370Sstevel@tonic-gate 			panic("segmap_unlock: page not found");
5380Sstevel@tonic-gate 			/*NOTREACHED*/
5390Sstevel@tonic-gate 		}
5400Sstevel@tonic-gate 
5410Sstevel@tonic-gate 		if (rw == S_WRITE) {
5420Sstevel@tonic-gate 			hat_setrefmod(pp);
5430Sstevel@tonic-gate 		} else if (rw != S_OTHER) {
5440Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
5459281SPrakash.Sangappa@Sun.COM 			"segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
5460Sstevel@tonic-gate 			hat_setref(pp);
5470Sstevel@tonic-gate 		}
5480Sstevel@tonic-gate 
5490Sstevel@tonic-gate 		/*
5500Sstevel@tonic-gate 		 * Clear bitmap, if the bit corresponding to "off" is set,
5510Sstevel@tonic-gate 		 * since the page and translation are being unlocked.
5520Sstevel@tonic-gate 		 */
5530Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
5540Sstevel@tonic-gate 
5550Sstevel@tonic-gate 		/*
5560Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
5570Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
5580Sstevel@tonic-gate 		 */
5590Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
5600Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
5610Sstevel@tonic-gate 		mutex_enter(smtx);
5620Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
5630Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
5640Sstevel@tonic-gate 		}
5650Sstevel@tonic-gate 		mutex_exit(smtx);
5660Sstevel@tonic-gate 
5670Sstevel@tonic-gate 		page_unlock(pp);
5680Sstevel@tonic-gate 	}
5690Sstevel@tonic-gate }
5700Sstevel@tonic-gate 
5710Sstevel@tonic-gate #define	MAXPPB	(MAXBSIZE/4096)	/* assumes minimum page size of 4k */
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate /*
5740Sstevel@tonic-gate  * This routine is called via a machine specific fault handling
5750Sstevel@tonic-gate  * routine.  It is also called by software routines wishing to
5760Sstevel@tonic-gate  * lock or unlock a range of addresses.
5770Sstevel@tonic-gate  *
5780Sstevel@tonic-gate  * Note that this routine expects a page-aligned "addr".
5790Sstevel@tonic-gate  */
5800Sstevel@tonic-gate faultcode_t
5810Sstevel@tonic-gate segmap_fault(
5820Sstevel@tonic-gate 	struct hat *hat,
5830Sstevel@tonic-gate 	struct seg *seg,
5840Sstevel@tonic-gate 	caddr_t addr,
5850Sstevel@tonic-gate 	size_t len,
5860Sstevel@tonic-gate 	enum fault_type type,
5870Sstevel@tonic-gate 	enum seg_rw rw)
5880Sstevel@tonic-gate {
5890Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
5900Sstevel@tonic-gate 	struct smap *smp;
5910Sstevel@tonic-gate 	page_t *pp, **ppp;
5920Sstevel@tonic-gate 	struct vnode *vp;
5930Sstevel@tonic-gate 	u_offset_t off;
5940Sstevel@tonic-gate 	page_t *pl[MAXPPB + 1];
5950Sstevel@tonic-gate 	uint_t prot;
5960Sstevel@tonic-gate 	u_offset_t addroff;
5970Sstevel@tonic-gate 	caddr_t adr;
5980Sstevel@tonic-gate 	int err;
5990Sstevel@tonic-gate 	u_offset_t sm_off;
6000Sstevel@tonic-gate 	int hat_flag;
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
6030Sstevel@tonic-gate 		int newpage;
6040Sstevel@tonic-gate 		kmutex_t *smtx;
6050Sstevel@tonic-gate 
6060Sstevel@tonic-gate 		/*
6070Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
6080Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
6090Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
6100Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
6110Sstevel@tonic-gate 		 * caller owns an active kpm addr.
6120Sstevel@tonic-gate 		 */
6130Sstevel@tonic-gate #ifndef DEBUG
6140Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK)
6150Sstevel@tonic-gate 			return (0);
6160Sstevel@tonic-gate #endif
6170Sstevel@tonic-gate 
6180Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
6190Sstevel@tonic-gate 			panic("segmap_fault: smap not found "
6200Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
6210Sstevel@tonic-gate 			/*NOTREACHED*/
6220Sstevel@tonic-gate 		}
6230Sstevel@tonic-gate 
6240Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
6250Sstevel@tonic-gate #ifdef	DEBUG
6260Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
6270Sstevel@tonic-gate 		if (newpage) {
6280Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
6299281SPrakash.Sangappa@Sun.COM 			    (void *)smp);
6300Sstevel@tonic-gate 		}
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		if (type != F_SOFTUNLOCK) {
6330Sstevel@tonic-gate 			mutex_exit(smtx);
6340Sstevel@tonic-gate 			return (0);
6350Sstevel@tonic-gate 		}
6360Sstevel@tonic-gate #endif
6370Sstevel@tonic-gate 		mutex_exit(smtx);
6380Sstevel@tonic-gate 		vp = smp->sm_vp;
6390Sstevel@tonic-gate 		sm_off = smp->sm_off;
6400Sstevel@tonic-gate 
6410Sstevel@tonic-gate 		if (vp == NULL)
6420Sstevel@tonic-gate 			return (FC_MAKE_ERR(EIO));
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 		addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
6470Sstevel@tonic-gate 		if (addroff + len > MAXBSIZE)
6480Sstevel@tonic-gate 			panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
6490Sstevel@tonic-gate 			    (void *)(addr + len));
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate 		off = sm_off + addroff;
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 		pp = page_find(vp, off);
6540Sstevel@tonic-gate 
6550Sstevel@tonic-gate 		if (pp == NULL)
6560Sstevel@tonic-gate 			panic("segmap_fault: softunlock page not found");
6570Sstevel@tonic-gate 
6580Sstevel@tonic-gate 		/*
6590Sstevel@tonic-gate 		 * Set ref bit also here in case of S_OTHER to avoid the
6600Sstevel@tonic-gate 		 * overhead of supporting other cases than F_SOFTUNLOCK
6610Sstevel@tonic-gate 		 * with segkpm. We can do this because the underlying
6620Sstevel@tonic-gate 		 * pages are locked anyway.
6630Sstevel@tonic-gate 		 */
6640Sstevel@tonic-gate 		if (rw == S_WRITE) {
6650Sstevel@tonic-gate 			hat_setrefmod(pp);
6660Sstevel@tonic-gate 		} else {
6670Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
6689281SPrakash.Sangappa@Sun.COM 			    "segmap_fault:pp %p vp %p offset %llx",
6699281SPrakash.Sangappa@Sun.COM 			    pp, vp, off);
6700Sstevel@tonic-gate 			hat_setref(pp);
6710Sstevel@tonic-gate 		}
6720Sstevel@tonic-gate 
6730Sstevel@tonic-gate 		return (0);
6740Sstevel@tonic-gate 	}
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
6770Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
6780Sstevel@tonic-gate 	vp = smp->sm_vp;
6790Sstevel@tonic-gate 	sm_off = smp->sm_off;
6800Sstevel@tonic-gate 
6810Sstevel@tonic-gate 	if (vp == NULL)
6820Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
6830Sstevel@tonic-gate 
6840Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 	addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
6870Sstevel@tonic-gate 	if (addroff + len > MAXBSIZE) {
6880Sstevel@tonic-gate 		panic("segmap_fault: endaddr %p "
6890Sstevel@tonic-gate 		    "exceeds MAXBSIZE chunk", (void *)(addr + len));
6900Sstevel@tonic-gate 		/*NOTREACHED*/
6910Sstevel@tonic-gate 	}
6920Sstevel@tonic-gate 	off = sm_off + addroff;
6930Sstevel@tonic-gate 
6940Sstevel@tonic-gate 	/*
6950Sstevel@tonic-gate 	 * First handle the easy stuff
6960Sstevel@tonic-gate 	 */
6970Sstevel@tonic-gate 	if (type == F_SOFTUNLOCK) {
6980Sstevel@tonic-gate 		segmap_unlock(hat, seg, addr, len, rw, smp);
6990Sstevel@tonic-gate 		return (0);
7000Sstevel@tonic-gate 	}
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
7039281SPrakash.Sangappa@Sun.COM 	    "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
7040Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
7055331Samw 	    seg, addr, rw, CRED(), NULL);
7060Sstevel@tonic-gate 
7070Sstevel@tonic-gate 	if (err)
7080Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate 	prot &= smd->smd_prot;
7110Sstevel@tonic-gate 
7120Sstevel@tonic-gate 	/*
7130Sstevel@tonic-gate 	 * Handle all pages returned in the pl[] array.
7140Sstevel@tonic-gate 	 * This loop is coded on the assumption that if
7150Sstevel@tonic-gate 	 * there was no error from the VOP_GETPAGE routine,
7160Sstevel@tonic-gate 	 * that the page list returned will contain all the
7170Sstevel@tonic-gate 	 * needed pages for the vp from [off..off + len].
7180Sstevel@tonic-gate 	 */
7190Sstevel@tonic-gate 	ppp = pl;
7200Sstevel@tonic-gate 	while ((pp = *ppp++) != NULL) {
7210Sstevel@tonic-gate 		u_offset_t poff;
7220Sstevel@tonic-gate 		ASSERT(pp->p_vnode == vp);
7230Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate 		/*
7260Sstevel@tonic-gate 		 * Verify that the pages returned are within the range
7270Sstevel@tonic-gate 		 * of this segmap region.  Note that it is theoretically
7280Sstevel@tonic-gate 		 * possible for pages outside this range to be returned,
7290Sstevel@tonic-gate 		 * but it is not very likely.  If we cannot use the
7300Sstevel@tonic-gate 		 * page here, just release it and go on to the next one.
7310Sstevel@tonic-gate 		 */
7320Sstevel@tonic-gate 		if (pp->p_offset < sm_off ||
7330Sstevel@tonic-gate 		    pp->p_offset >= sm_off + MAXBSIZE) {
7340Sstevel@tonic-gate 			(void) page_release(pp, 1);
7350Sstevel@tonic-gate 			continue;
7360Sstevel@tonic-gate 		}
7370Sstevel@tonic-gate 
7380Sstevel@tonic-gate 		ASSERT(hat == kas.a_hat);
7390Sstevel@tonic-gate 		poff = pp->p_offset;
7400Sstevel@tonic-gate 		adr = addr + (poff - off);
7410Sstevel@tonic-gate 		if (adr >= addr && adr < addr + len) {
7420Sstevel@tonic-gate 			hat_setref(pp);
7430Sstevel@tonic-gate 			TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
7440Sstevel@tonic-gate 			    "segmap_fault:pp %p vp %p offset %llx",
7450Sstevel@tonic-gate 			    pp, vp, poff);
7460Sstevel@tonic-gate 			if (type == F_SOFTLOCK)
7470Sstevel@tonic-gate 				hat_flag = HAT_LOAD_LOCK;
7480Sstevel@tonic-gate 		}
7490Sstevel@tonic-gate 
7500Sstevel@tonic-gate 		/*
7510Sstevel@tonic-gate 		 * Deal with VMODSORT pages here. If we know this is a write
7520Sstevel@tonic-gate 		 * do the setmod now and allow write protection.
7530Sstevel@tonic-gate 		 * As long as it's modified or not S_OTHER, remove write
7540Sstevel@tonic-gate 		 * protection. With S_OTHER it's up to the FS to deal with this.
7550Sstevel@tonic-gate 		 */
7560Sstevel@tonic-gate 		if (IS_VMODSORT(vp)) {
7570Sstevel@tonic-gate 			if (rw == S_WRITE)
7580Sstevel@tonic-gate 				hat_setmod(pp);
7590Sstevel@tonic-gate 			else if (rw != S_OTHER && !hat_ismod(pp))
7600Sstevel@tonic-gate 				prot &= ~PROT_WRITE;
7610Sstevel@tonic-gate 		}
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 		hat_memload(hat, adr, pp, prot, hat_flag);
7640Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
7650Sstevel@tonic-gate 			page_unlock(pp);
7660Sstevel@tonic-gate 	}
7670Sstevel@tonic-gate 	return (0);
7680Sstevel@tonic-gate }
7690Sstevel@tonic-gate 
7700Sstevel@tonic-gate /*
7710Sstevel@tonic-gate  * This routine is used to start I/O on pages asynchronously.
7720Sstevel@tonic-gate  */
7730Sstevel@tonic-gate static faultcode_t
7740Sstevel@tonic-gate segmap_faulta(struct seg *seg, caddr_t addr)
7750Sstevel@tonic-gate {
7760Sstevel@tonic-gate 	struct smap *smp;
7770Sstevel@tonic-gate 	struct vnode *vp;
7780Sstevel@tonic-gate 	u_offset_t off;
7790Sstevel@tonic-gate 	int err;
7800Sstevel@tonic-gate 
7810Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
7820Sstevel@tonic-gate 		int	newpage;
7830Sstevel@tonic-gate 		kmutex_t *smtx;
7840Sstevel@tonic-gate 
7850Sstevel@tonic-gate 		/*
7860Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
7870Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
7880Sstevel@tonic-gate 		 * segmap_release. No hat mappings have to be locked
7890Sstevel@tonic-gate 		 * and they also can't be unlocked as long as the
7900Sstevel@tonic-gate 		 * caller owns an active kpm addr.
7910Sstevel@tonic-gate 		 */
7920Sstevel@tonic-gate #ifdef	DEBUG
7930Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
7940Sstevel@tonic-gate 			panic("segmap_faulta: smap not found "
7950Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
7960Sstevel@tonic-gate 			/*NOTREACHED*/
7970Sstevel@tonic-gate 		}
7980Sstevel@tonic-gate 
7990Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
8000Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
8010Sstevel@tonic-gate 		mutex_exit(smtx);
8020Sstevel@tonic-gate 		if (newpage)
8030Sstevel@tonic-gate 			cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
8040Sstevel@tonic-gate 			    (void *)smp);
8050Sstevel@tonic-gate #endif
8060Sstevel@tonic-gate 		return (0);
8070Sstevel@tonic-gate 	}
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate 	segmapcnt.smp_faulta.value.ul++;
8100Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
8110Sstevel@tonic-gate 
8120Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
8130Sstevel@tonic-gate 
8140Sstevel@tonic-gate 	vp = smp->sm_vp;
8150Sstevel@tonic-gate 	off = smp->sm_off;
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	if (vp == NULL) {
8180Sstevel@tonic-gate 		cmn_err(CE_WARN, "segmap_faulta - no vp");
8190Sstevel@tonic-gate 		return (FC_MAKE_ERR(EIO));
8200Sstevel@tonic-gate 	}
8210Sstevel@tonic-gate 
8220Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
8239281SPrakash.Sangappa@Sun.COM 	    "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
8240Sstevel@tonic-gate 
8250Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
8260Sstevel@tonic-gate 	    & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
8275331Samw 	    seg, addr, S_READ, CRED(), NULL);
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	if (err)
8300Sstevel@tonic-gate 		return (FC_MAKE_ERR(err));
8310Sstevel@tonic-gate 	return (0);
8320Sstevel@tonic-gate }
8330Sstevel@tonic-gate 
8340Sstevel@tonic-gate /*ARGSUSED*/
8350Sstevel@tonic-gate static int
8360Sstevel@tonic-gate segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
8370Sstevel@tonic-gate {
8380Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8390Sstevel@tonic-gate 
8400Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
8410Sstevel@tonic-gate 
8420Sstevel@tonic-gate 	/*
8430Sstevel@tonic-gate 	 * Need not acquire the segment lock since
8440Sstevel@tonic-gate 	 * "smd_prot" is a read-only field.
8450Sstevel@tonic-gate 	 */
8460Sstevel@tonic-gate 	return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
8470Sstevel@tonic-gate }
8480Sstevel@tonic-gate 
8490Sstevel@tonic-gate static int
8500Sstevel@tonic-gate segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
8510Sstevel@tonic-gate {
8520Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8530Sstevel@tonic-gate 	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	if (pgno != 0) {
8589281SPrakash.Sangappa@Sun.COM 		do {
8590Sstevel@tonic-gate 			protv[--pgno] = smd->smd_prot;
8609281SPrakash.Sangappa@Sun.COM 		} while (pgno != 0);
8610Sstevel@tonic-gate 	}
8620Sstevel@tonic-gate 	return (0);
8630Sstevel@tonic-gate }
8640Sstevel@tonic-gate 
8650Sstevel@tonic-gate static u_offset_t
8660Sstevel@tonic-gate segmap_getoffset(struct seg *seg, caddr_t addr)
8670Sstevel@tonic-gate {
8680Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
8730Sstevel@tonic-gate }
8740Sstevel@tonic-gate 
8750Sstevel@tonic-gate /*ARGSUSED*/
8760Sstevel@tonic-gate static int
8770Sstevel@tonic-gate segmap_gettype(struct seg *seg, caddr_t addr)
8780Sstevel@tonic-gate {
8790Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8800Sstevel@tonic-gate 
8810Sstevel@tonic-gate 	return (MAP_SHARED);
8820Sstevel@tonic-gate }
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate /*ARGSUSED*/
8850Sstevel@tonic-gate static int
8860Sstevel@tonic-gate segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
8870Sstevel@tonic-gate {
8880Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 	ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
8910Sstevel@tonic-gate 
8920Sstevel@tonic-gate 	/* XXX - This doesn't make any sense */
8930Sstevel@tonic-gate 	*vpp = smd->smd_sm->sm_vp;
8940Sstevel@tonic-gate 	return (0);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate /*
8980Sstevel@tonic-gate  * Check to see if it makes sense to do kluster/read ahead to
8990Sstevel@tonic-gate  * addr + delta relative to the mapping at addr.  We assume here
9000Sstevel@tonic-gate  * that delta is a signed PAGESIZE'd multiple (which can be negative).
9010Sstevel@tonic-gate  *
9020Sstevel@tonic-gate  * For segmap we always "approve" of this action from our standpoint.
9030Sstevel@tonic-gate  */
9040Sstevel@tonic-gate /*ARGSUSED*/
9050Sstevel@tonic-gate static int
9060Sstevel@tonic-gate segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
9070Sstevel@tonic-gate {
9080Sstevel@tonic-gate 	return (0);
9090Sstevel@tonic-gate }
9100Sstevel@tonic-gate 
9110Sstevel@tonic-gate static void
9120Sstevel@tonic-gate segmap_badop()
9130Sstevel@tonic-gate {
9140Sstevel@tonic-gate 	panic("segmap_badop");
9150Sstevel@tonic-gate 	/*NOTREACHED*/
9160Sstevel@tonic-gate }
9170Sstevel@tonic-gate 
9180Sstevel@tonic-gate /*
9190Sstevel@tonic-gate  * Special private segmap operations
9200Sstevel@tonic-gate  */
9210Sstevel@tonic-gate 
9220Sstevel@tonic-gate /*
9230Sstevel@tonic-gate  * Add smap to the appropriate free list.
9240Sstevel@tonic-gate  */
9250Sstevel@tonic-gate static void
9260Sstevel@tonic-gate segmap_smapadd(struct smap *smp)
9270Sstevel@tonic-gate {
9280Sstevel@tonic-gate 	struct smfree *sm;
9290Sstevel@tonic-gate 	struct smap *smpfreelist;
9300Sstevel@tonic-gate 	struct sm_freeq *releq;
9310Sstevel@tonic-gate 
9320Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
9330Sstevel@tonic-gate 
9340Sstevel@tonic-gate 	if (smp->sm_refcnt != 0) {
9350Sstevel@tonic-gate 		panic("segmap_smapadd");
9360Sstevel@tonic-gate 		/*NOTREACHED*/
9370Sstevel@tonic-gate 	}
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	sm = &smd_free[smp->sm_free_ndx];
9400Sstevel@tonic-gate 	/*
9410Sstevel@tonic-gate 	 * Add to the tail of the release queue
9420Sstevel@tonic-gate 	 * Note that sm_releq and sm_allocq could toggle
9430Sstevel@tonic-gate 	 * before we get the lock. This does not affect
9440Sstevel@tonic-gate 	 * correctness as the 2 queues are only maintained
9450Sstevel@tonic-gate 	 * to reduce lock pressure.
9460Sstevel@tonic-gate 	 */
9470Sstevel@tonic-gate 	releq = sm->sm_releq;
9480Sstevel@tonic-gate 	if (releq == &sm->sm_freeq[0])
9490Sstevel@tonic-gate 		smp->sm_flags |= SM_QNDX_ZERO;
9500Sstevel@tonic-gate 	else
9510Sstevel@tonic-gate 		smp->sm_flags &= ~SM_QNDX_ZERO;
9520Sstevel@tonic-gate 	mutex_enter(&releq->smq_mtx);
9530Sstevel@tonic-gate 	smpfreelist = releq->smq_free;
9540Sstevel@tonic-gate 	if (smpfreelist == 0) {
9550Sstevel@tonic-gate 		int want;
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate 		releq->smq_free = smp->sm_next = smp->sm_prev = smp;
9580Sstevel@tonic-gate 		/*
9590Sstevel@tonic-gate 		 * Both queue mutexes held to set sm_want;
9600Sstevel@tonic-gate 		 * snapshot the value before dropping releq mutex.
9610Sstevel@tonic-gate 		 * If sm_want appears after the releq mutex is dropped,
9620Sstevel@tonic-gate 		 * then the smap just freed is already gone.
9630Sstevel@tonic-gate 		 */
9640Sstevel@tonic-gate 		want = sm->sm_want;
9650Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
9660Sstevel@tonic-gate 		/*
9670Sstevel@tonic-gate 		 * See if there was a waiter before dropping the releq mutex
9680Sstevel@tonic-gate 		 * then recheck after obtaining sm_freeq[0] mutex as
9690Sstevel@tonic-gate 		 * the another thread may have already signaled.
9700Sstevel@tonic-gate 		 */
9710Sstevel@tonic-gate 		if (want) {
9720Sstevel@tonic-gate 			mutex_enter(&sm->sm_freeq[0].smq_mtx);
9730Sstevel@tonic-gate 			if (sm->sm_want)
9740Sstevel@tonic-gate 				cv_signal(&sm->sm_free_cv);
9750Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
9760Sstevel@tonic-gate 		}
9770Sstevel@tonic-gate 	} else {
9780Sstevel@tonic-gate 		smp->sm_next = smpfreelist;
9790Sstevel@tonic-gate 		smp->sm_prev = smpfreelist->sm_prev;
9800Sstevel@tonic-gate 		smpfreelist->sm_prev = smp;
9810Sstevel@tonic-gate 		smp->sm_prev->sm_next = smp;
9820Sstevel@tonic-gate 		mutex_exit(&releq->smq_mtx);
9830Sstevel@tonic-gate 	}
9840Sstevel@tonic-gate }
9850Sstevel@tonic-gate 
9860Sstevel@tonic-gate 
9870Sstevel@tonic-gate static struct smap *
9880Sstevel@tonic-gate segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
9890Sstevel@tonic-gate {
9900Sstevel@tonic-gate 	struct smap **hpp;
9910Sstevel@tonic-gate 	struct smap *tmp;
9920Sstevel@tonic-gate 	kmutex_t *hmtx;
9930Sstevel@tonic-gate 
9940Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
9950Sstevel@tonic-gate 	ASSERT(smp->sm_vp == NULL);
9960Sstevel@tonic-gate 	ASSERT(smp->sm_hash == NULL);
9970Sstevel@tonic-gate 	ASSERT(smp->sm_prev == NULL);
9980Sstevel@tonic-gate 	ASSERT(smp->sm_next == NULL);
9990Sstevel@tonic-gate 	ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
10000Sstevel@tonic-gate 
10010Sstevel@tonic-gate 	hmtx = SHASHMTX(hashid);
10020Sstevel@tonic-gate 
10030Sstevel@tonic-gate 	mutex_enter(hmtx);
10040Sstevel@tonic-gate 	/*
10050Sstevel@tonic-gate 	 * First we need to verify that no one has created a smp
10060Sstevel@tonic-gate 	 * with (vp,off) as its tag before we us.
10070Sstevel@tonic-gate 	 */
10080Sstevel@tonic-gate 	for (tmp = smd_hash[hashid].sh_hash_list;
10090Sstevel@tonic-gate 	    tmp != NULL; tmp = tmp->sm_hash)
10100Sstevel@tonic-gate 		if (tmp->sm_vp == vp && tmp->sm_off == off)
10110Sstevel@tonic-gate 			break;
10120Sstevel@tonic-gate 
10130Sstevel@tonic-gate 	if (tmp == NULL) {
10140Sstevel@tonic-gate 		/*
10150Sstevel@tonic-gate 		 * No one created one yet.
10160Sstevel@tonic-gate 		 *
10170Sstevel@tonic-gate 		 * Funniness here - we don't increment the ref count on the
10180Sstevel@tonic-gate 		 * vnode * even though we have another pointer to it here.
10190Sstevel@tonic-gate 		 * The reason for this is that we don't want the fact that
10200Sstevel@tonic-gate 		 * a seg_map entry somewhere refers to a vnode to prevent the
10210Sstevel@tonic-gate 		 * vnode * itself from going away.  This is because this
10220Sstevel@tonic-gate 		 * reference to the vnode is a "soft one".  In the case where
10230Sstevel@tonic-gate 		 * a mapping is being used by a rdwr [or directory routine?]
10240Sstevel@tonic-gate 		 * there already has to be a non-zero ref count on the vnode.
10250Sstevel@tonic-gate 		 * In the case where the vp has been freed and the the smap
10260Sstevel@tonic-gate 		 * structure is on the free list, there are no pages in memory
10270Sstevel@tonic-gate 		 * that can refer to the vnode.  Thus even if we reuse the same
10280Sstevel@tonic-gate 		 * vnode/smap structure for a vnode which has the same
10290Sstevel@tonic-gate 		 * address but represents a different object, we are ok.
10300Sstevel@tonic-gate 		 */
10310Sstevel@tonic-gate 		smp->sm_vp = vp;
10320Sstevel@tonic-gate 		smp->sm_off = off;
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate 		hpp = &smd_hash[hashid].sh_hash_list;
10350Sstevel@tonic-gate 		smp->sm_hash = *hpp;
10360Sstevel@tonic-gate 		*hpp = smp;
10370Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
10380Sstevel@tonic-gate 		smd_hash_len[hashid]++;
10390Sstevel@tonic-gate #endif
10400Sstevel@tonic-gate 	}
10410Sstevel@tonic-gate 	mutex_exit(hmtx);
10420Sstevel@tonic-gate 
10430Sstevel@tonic-gate 	return (tmp);
10440Sstevel@tonic-gate }
10450Sstevel@tonic-gate 
10460Sstevel@tonic-gate static void
10470Sstevel@tonic-gate segmap_hashout(struct smap *smp)
10480Sstevel@tonic-gate {
10490Sstevel@tonic-gate 	struct smap **hpp, *hp;
10500Sstevel@tonic-gate 	struct vnode *vp;
10510Sstevel@tonic-gate 	kmutex_t *mtx;
10520Sstevel@tonic-gate 	int hashid;
10530Sstevel@tonic-gate 	u_offset_t off;
10540Sstevel@tonic-gate 
10550Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
10560Sstevel@tonic-gate 
10570Sstevel@tonic-gate 	vp = smp->sm_vp;
10580Sstevel@tonic-gate 	off = smp->sm_off;
10590Sstevel@tonic-gate 
10600Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
10610Sstevel@tonic-gate 	mtx = SHASHMTX(hashid);
10620Sstevel@tonic-gate 	mutex_enter(mtx);
10630Sstevel@tonic-gate 
10640Sstevel@tonic-gate 	hpp = &smd_hash[hashid].sh_hash_list;
10650Sstevel@tonic-gate 	for (;;) {
10660Sstevel@tonic-gate 		hp = *hpp;
10670Sstevel@tonic-gate 		if (hp == NULL) {
10680Sstevel@tonic-gate 			panic("segmap_hashout");
10690Sstevel@tonic-gate 			/*NOTREACHED*/
10700Sstevel@tonic-gate 		}
10710Sstevel@tonic-gate 		if (hp == smp)
10720Sstevel@tonic-gate 			break;
10730Sstevel@tonic-gate 		hpp = &hp->sm_hash;
10740Sstevel@tonic-gate 	}
10750Sstevel@tonic-gate 
10760Sstevel@tonic-gate 	*hpp = smp->sm_hash;
10770Sstevel@tonic-gate 	smp->sm_hash = NULL;
10780Sstevel@tonic-gate #ifdef SEGMAP_HASHSTATS
10790Sstevel@tonic-gate 	smd_hash_len[hashid]--;
10800Sstevel@tonic-gate #endif
10810Sstevel@tonic-gate 	mutex_exit(mtx);
10820Sstevel@tonic-gate 
10830Sstevel@tonic-gate 	smp->sm_vp = NULL;
10840Sstevel@tonic-gate 	smp->sm_off = (u_offset_t)0;
10850Sstevel@tonic-gate 
10860Sstevel@tonic-gate }
10870Sstevel@tonic-gate 
10880Sstevel@tonic-gate /*
10890Sstevel@tonic-gate  * Attempt to free unmodified, unmapped, and non locked segmap
10900Sstevel@tonic-gate  * pages.
10910Sstevel@tonic-gate  */
10920Sstevel@tonic-gate void
10930Sstevel@tonic-gate segmap_pagefree(struct vnode *vp, u_offset_t off)
10940Sstevel@tonic-gate {
10950Sstevel@tonic-gate 	u_offset_t pgoff;
10960Sstevel@tonic-gate 	page_t  *pp;
10970Sstevel@tonic-gate 
10980Sstevel@tonic-gate 	for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
10990Sstevel@tonic-gate 
11000Sstevel@tonic-gate 		if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
11010Sstevel@tonic-gate 			continue;
11020Sstevel@tonic-gate 
11030Sstevel@tonic-gate 		switch (page_release(pp, 1)) {
11040Sstevel@tonic-gate 		case PGREL_NOTREL:
11050Sstevel@tonic-gate 			segmapcnt.smp_free_notfree.value.ul++;
11060Sstevel@tonic-gate 			break;
11070Sstevel@tonic-gate 		case PGREL_MOD:
11080Sstevel@tonic-gate 			segmapcnt.smp_free_dirty.value.ul++;
11090Sstevel@tonic-gate 			break;
11100Sstevel@tonic-gate 		case PGREL_CLEAN:
11110Sstevel@tonic-gate 			segmapcnt.smp_free.value.ul++;
11120Sstevel@tonic-gate 			break;
11130Sstevel@tonic-gate 		}
11140Sstevel@tonic-gate 	}
11150Sstevel@tonic-gate }
11160Sstevel@tonic-gate 
11170Sstevel@tonic-gate /*
11180Sstevel@tonic-gate  * Locks held on entry: smap lock
11190Sstevel@tonic-gate  * Locks held on exit : smap lock.
11200Sstevel@tonic-gate  */
11210Sstevel@tonic-gate 
11220Sstevel@tonic-gate static void
11230Sstevel@tonic-gate grab_smp(struct smap *smp, page_t *pp)
11240Sstevel@tonic-gate {
11250Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(SMAPMTX(smp)));
11260Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt == 0);
11270Sstevel@tonic-gate 
11280Sstevel@tonic-gate 	if (smp->sm_vp != (struct vnode *)NULL) {
11290Sstevel@tonic-gate 		struct vnode	*vp = smp->sm_vp;
11300Sstevel@tonic-gate 		u_offset_t 	off = smp->sm_off;
11310Sstevel@tonic-gate 		/*
11320Sstevel@tonic-gate 		 * Destroy old vnode association and
11330Sstevel@tonic-gate 		 * unload any hardware translations to
11340Sstevel@tonic-gate 		 * the old object.
11350Sstevel@tonic-gate 		 */
11360Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
11370Sstevel@tonic-gate 		segmap_hashout(smp);
11380Sstevel@tonic-gate 
11390Sstevel@tonic-gate 		/*
11400Sstevel@tonic-gate 		 * This node is off freelist and hashlist,
11410Sstevel@tonic-gate 		 * so there is no reason to drop/reacquire sm_mtx
11420Sstevel@tonic-gate 		 * across calls to hat_unload.
11430Sstevel@tonic-gate 		 */
11440Sstevel@tonic-gate 		if (segmap_kpm) {
11450Sstevel@tonic-gate 			caddr_t vaddr;
11460Sstevel@tonic-gate 			int hat_unload_needed = 0;
11470Sstevel@tonic-gate 
11480Sstevel@tonic-gate 			/*
11490Sstevel@tonic-gate 			 * unload kpm mapping
11500Sstevel@tonic-gate 			 */
11510Sstevel@tonic-gate 			if (pp != NULL) {
11520Sstevel@tonic-gate 				vaddr = hat_kpm_page2va(pp, 1);
11530Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
11540Sstevel@tonic-gate 				page_unlock(pp);
11550Sstevel@tonic-gate 			}
11560Sstevel@tonic-gate 
11570Sstevel@tonic-gate 			/*
11580Sstevel@tonic-gate 			 * Check if we have (also) the rare case of a
11590Sstevel@tonic-gate 			 * non kpm mapping.
11600Sstevel@tonic-gate 			 */
11610Sstevel@tonic-gate 			if (smp->sm_flags & SM_NOTKPM_RELEASED) {
11620Sstevel@tonic-gate 				hat_unload_needed = 1;
11630Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
11640Sstevel@tonic-gate 			}
11650Sstevel@tonic-gate 
11660Sstevel@tonic-gate 			if (hat_unload_needed) {
11670Sstevel@tonic-gate 				hat_unload(kas.a_hat, segkmap->s_base +
11680Sstevel@tonic-gate 				    ((smp - smd_smap) * MAXBSIZE),
11690Sstevel@tonic-gate 				    MAXBSIZE, HAT_UNLOAD);
11700Sstevel@tonic-gate 			}
11710Sstevel@tonic-gate 
11720Sstevel@tonic-gate 		} else {
11730Sstevel@tonic-gate 			ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
11740Sstevel@tonic-gate 			smp->sm_flags &= ~SM_NOTKPM_RELEASED;
11750Sstevel@tonic-gate 			hat_unload(kas.a_hat, segkmap->s_base +
11760Sstevel@tonic-gate 			    ((smp - smd_smap) * MAXBSIZE),
11770Sstevel@tonic-gate 			    MAXBSIZE, HAT_UNLOAD);
11780Sstevel@tonic-gate 		}
11790Sstevel@tonic-gate 		segmap_pagefree(vp, off);
11800Sstevel@tonic-gate 	}
11810Sstevel@tonic-gate }
11820Sstevel@tonic-gate 
11830Sstevel@tonic-gate static struct smap *
11840Sstevel@tonic-gate get_free_smp(int free_ndx)
11850Sstevel@tonic-gate {
11860Sstevel@tonic-gate 	struct smfree *sm;
11870Sstevel@tonic-gate 	kmutex_t *smtx;
11880Sstevel@tonic-gate 	struct smap *smp, *first;
11890Sstevel@tonic-gate 	struct sm_freeq *allocq, *releq;
11900Sstevel@tonic-gate 	struct kpme *kpme;
11910Sstevel@tonic-gate 	page_t *pp = NULL;
11920Sstevel@tonic-gate 	int end_ndx, page_locked = 0;
11930Sstevel@tonic-gate 
11940Sstevel@tonic-gate 	end_ndx = free_ndx;
11950Sstevel@tonic-gate 	sm = &smd_free[free_ndx];
11960Sstevel@tonic-gate 
11970Sstevel@tonic-gate retry_queue:
11980Sstevel@tonic-gate 	allocq = sm->sm_allocq;
11990Sstevel@tonic-gate 	mutex_enter(&allocq->smq_mtx);
12000Sstevel@tonic-gate 
12010Sstevel@tonic-gate 	if ((smp = allocq->smq_free) == NULL) {
12020Sstevel@tonic-gate 
12030Sstevel@tonic-gate skip_queue:
12040Sstevel@tonic-gate 		/*
12050Sstevel@tonic-gate 		 * The alloc list is empty or this queue is being skipped;
12060Sstevel@tonic-gate 		 * first see if the allocq toggled.
12070Sstevel@tonic-gate 		 */
12080Sstevel@tonic-gate 		if (sm->sm_allocq != allocq) {
12090Sstevel@tonic-gate 			/* queue changed */
12100Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12110Sstevel@tonic-gate 			goto retry_queue;
12120Sstevel@tonic-gate 		}
12130Sstevel@tonic-gate 		releq = sm->sm_releq;
12140Sstevel@tonic-gate 		if (!mutex_tryenter(&releq->smq_mtx)) {
12150Sstevel@tonic-gate 			/* cannot get releq; a free smp may be there now */
12160Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12170Sstevel@tonic-gate 
12180Sstevel@tonic-gate 			/*
12190Sstevel@tonic-gate 			 * This loop could spin forever if this thread has
12200Sstevel@tonic-gate 			 * higher priority than the thread that is holding
12210Sstevel@tonic-gate 			 * releq->smq_mtx. In order to force the other thread
12220Sstevel@tonic-gate 			 * to run, we'll lock/unlock the mutex which is safe
12230Sstevel@tonic-gate 			 * since we just unlocked the allocq mutex.
12240Sstevel@tonic-gate 			 */
12250Sstevel@tonic-gate 			mutex_enter(&releq->smq_mtx);
12260Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
12270Sstevel@tonic-gate 			goto retry_queue;
12280Sstevel@tonic-gate 		}
12290Sstevel@tonic-gate 		if (releq->smq_free == NULL) {
12300Sstevel@tonic-gate 			/*
12310Sstevel@tonic-gate 			 * This freelist is empty.
12320Sstevel@tonic-gate 			 * This should not happen unless clients
12330Sstevel@tonic-gate 			 * are failing to release the segmap
12340Sstevel@tonic-gate 			 * window after accessing the data.
12350Sstevel@tonic-gate 			 * Before resorting to sleeping, try
12360Sstevel@tonic-gate 			 * the next list of the same color.
12370Sstevel@tonic-gate 			 */
12380Sstevel@tonic-gate 			free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
12390Sstevel@tonic-gate 			if (free_ndx != end_ndx) {
12400Sstevel@tonic-gate 				mutex_exit(&releq->smq_mtx);
12410Sstevel@tonic-gate 				mutex_exit(&allocq->smq_mtx);
12420Sstevel@tonic-gate 				sm = &smd_free[free_ndx];
12430Sstevel@tonic-gate 				goto retry_queue;
12440Sstevel@tonic-gate 			}
12450Sstevel@tonic-gate 			/*
12460Sstevel@tonic-gate 			 * Tried all freelists of the same color once,
12470Sstevel@tonic-gate 			 * wait on this list and hope something gets freed.
12480Sstevel@tonic-gate 			 */
12490Sstevel@tonic-gate 			segmapcnt.smp_get_nofree.value.ul++;
12500Sstevel@tonic-gate 			sm->sm_want++;
12510Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[1].smq_mtx);
12520Sstevel@tonic-gate 			cv_wait(&sm->sm_free_cv,
12539281SPrakash.Sangappa@Sun.COM 			    &sm->sm_freeq[0].smq_mtx);
12540Sstevel@tonic-gate 			sm->sm_want--;
12550Sstevel@tonic-gate 			mutex_exit(&sm->sm_freeq[0].smq_mtx);
12560Sstevel@tonic-gate 			sm = &smd_free[free_ndx];
12570Sstevel@tonic-gate 			goto retry_queue;
12580Sstevel@tonic-gate 		} else {
12590Sstevel@tonic-gate 			/*
12600Sstevel@tonic-gate 			 * Something on the rele queue; flip the alloc
12610Sstevel@tonic-gate 			 * and rele queues and retry.
12620Sstevel@tonic-gate 			 */
12630Sstevel@tonic-gate 			sm->sm_allocq = releq;
12640Sstevel@tonic-gate 			sm->sm_releq = allocq;
12650Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
12660Sstevel@tonic-gate 			mutex_exit(&releq->smq_mtx);
12670Sstevel@tonic-gate 			if (page_locked) {
12680Sstevel@tonic-gate 				delay(hz >> 2);
12690Sstevel@tonic-gate 				page_locked = 0;
12700Sstevel@tonic-gate 			}
12710Sstevel@tonic-gate 			goto retry_queue;
12720Sstevel@tonic-gate 		}
12730Sstevel@tonic-gate 	} else {
12740Sstevel@tonic-gate 		/*
12750Sstevel@tonic-gate 		 * Fastpath the case we get the smap mutex
12760Sstevel@tonic-gate 		 * on the first try.
12770Sstevel@tonic-gate 		 */
12780Sstevel@tonic-gate 		first = smp;
12790Sstevel@tonic-gate next_smap:
12800Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
12810Sstevel@tonic-gate 		if (!mutex_tryenter(smtx)) {
12820Sstevel@tonic-gate 			/*
12830Sstevel@tonic-gate 			 * Another thread is trying to reclaim this slot.
12840Sstevel@tonic-gate 			 * Skip to the next queue or smap.
12850Sstevel@tonic-gate 			 */
12860Sstevel@tonic-gate 			if ((smp = smp->sm_next) == first) {
12870Sstevel@tonic-gate 				goto skip_queue;
12880Sstevel@tonic-gate 			} else {
12890Sstevel@tonic-gate 				goto next_smap;
12900Sstevel@tonic-gate 			}
12910Sstevel@tonic-gate 		} else {
12920Sstevel@tonic-gate 			/*
12930Sstevel@tonic-gate 			 * if kpme exists, get shared lock on the page
12940Sstevel@tonic-gate 			 */
12950Sstevel@tonic-gate 			if (segmap_kpm && smp->sm_vp != NULL) {
12960Sstevel@tonic-gate 
12970Sstevel@tonic-gate 				kpme = GET_KPME(smp);
12980Sstevel@tonic-gate 				pp = kpme->kpe_page;
12990Sstevel@tonic-gate 
13000Sstevel@tonic-gate 				if (pp != NULL) {
13010Sstevel@tonic-gate 					if (!page_trylock(pp, SE_SHARED)) {
13020Sstevel@tonic-gate 						smp = smp->sm_next;
13030Sstevel@tonic-gate 						mutex_exit(smtx);
13040Sstevel@tonic-gate 						page_locked = 1;
13050Sstevel@tonic-gate 
13060Sstevel@tonic-gate 						pp = NULL;
13070Sstevel@tonic-gate 
13080Sstevel@tonic-gate 						if (smp == first) {
13090Sstevel@tonic-gate 							goto skip_queue;
13100Sstevel@tonic-gate 						} else {
13110Sstevel@tonic-gate 							goto next_smap;
13120Sstevel@tonic-gate 						}
13130Sstevel@tonic-gate 					} else {
13140Sstevel@tonic-gate 						if (kpme->kpe_page == NULL) {
13150Sstevel@tonic-gate 							page_unlock(pp);
13160Sstevel@tonic-gate 							pp = NULL;
13170Sstevel@tonic-gate 						}
13180Sstevel@tonic-gate 					}
13190Sstevel@tonic-gate 				}
13200Sstevel@tonic-gate 			}
13210Sstevel@tonic-gate 
13220Sstevel@tonic-gate 			/*
13230Sstevel@tonic-gate 			 * At this point, we've selected smp.  Remove smp
13240Sstevel@tonic-gate 			 * from its freelist.  If smp is the first one in
13250Sstevel@tonic-gate 			 * the freelist, update the head of the freelist.
13260Sstevel@tonic-gate 			 */
13270Sstevel@tonic-gate 			if (first == smp) {
13280Sstevel@tonic-gate 				ASSERT(first == allocq->smq_free);
13290Sstevel@tonic-gate 				allocq->smq_free = smp->sm_next;
13300Sstevel@tonic-gate 			}
13310Sstevel@tonic-gate 
13320Sstevel@tonic-gate 			/*
13330Sstevel@tonic-gate 			 * if the head of the freelist still points to smp,
13340Sstevel@tonic-gate 			 * then there are no more free smaps in that list.
13350Sstevel@tonic-gate 			 */
13360Sstevel@tonic-gate 			if (allocq->smq_free == smp)
13370Sstevel@tonic-gate 				/*
13380Sstevel@tonic-gate 				 * Took the last one
13390Sstevel@tonic-gate 				 */
13400Sstevel@tonic-gate 				allocq->smq_free = NULL;
13410Sstevel@tonic-gate 			else {
13420Sstevel@tonic-gate 				smp->sm_prev->sm_next = smp->sm_next;
13430Sstevel@tonic-gate 				smp->sm_next->sm_prev = smp->sm_prev;
13440Sstevel@tonic-gate 			}
13450Sstevel@tonic-gate 			mutex_exit(&allocq->smq_mtx);
13460Sstevel@tonic-gate 			smp->sm_prev = smp->sm_next = NULL;
13470Sstevel@tonic-gate 
13480Sstevel@tonic-gate 			/*
13490Sstevel@tonic-gate 			 * if pp != NULL, pp must have been locked;
13500Sstevel@tonic-gate 			 * grab_smp() unlocks pp.
13510Sstevel@tonic-gate 			 */
13520Sstevel@tonic-gate 			ASSERT((pp == NULL) || PAGE_LOCKED(pp));
13530Sstevel@tonic-gate 			grab_smp(smp, pp);
13540Sstevel@tonic-gate 			/* return smp locked. */
13550Sstevel@tonic-gate 			ASSERT(SMAPMTX(smp) == smtx);
13560Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(smtx));
13570Sstevel@tonic-gate 			return (smp);
13580Sstevel@tonic-gate 		}
13590Sstevel@tonic-gate 	}
13600Sstevel@tonic-gate }
13610Sstevel@tonic-gate 
13620Sstevel@tonic-gate /*
13630Sstevel@tonic-gate  * Special public segmap operations
13640Sstevel@tonic-gate  */
13650Sstevel@tonic-gate 
13660Sstevel@tonic-gate /*
13675331Samw  * Create pages (without using VOP_GETPAGE) and load up translations to them.
13680Sstevel@tonic-gate  * If softlock is TRUE, then set things up so that it looks like a call
13690Sstevel@tonic-gate  * to segmap_fault with F_SOFTLOCK.
13700Sstevel@tonic-gate  *
13710Sstevel@tonic-gate  * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
13720Sstevel@tonic-gate  *
13730Sstevel@tonic-gate  * All fields in the generic segment (struct seg) are considered to be
13740Sstevel@tonic-gate  * read-only for "segmap" even though the kernel address space (kas) may
13750Sstevel@tonic-gate  * not be locked, hence no lock is needed to access them.
13760Sstevel@tonic-gate  */
13770Sstevel@tonic-gate int
13780Sstevel@tonic-gate segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
13790Sstevel@tonic-gate {
13800Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
13810Sstevel@tonic-gate 	page_t *pp;
13820Sstevel@tonic-gate 	u_offset_t off;
13830Sstevel@tonic-gate 	struct smap *smp;
13840Sstevel@tonic-gate 	struct vnode *vp;
13850Sstevel@tonic-gate 	caddr_t eaddr;
13860Sstevel@tonic-gate 	int newpage = 0;
13870Sstevel@tonic-gate 	uint_t prot;
13880Sstevel@tonic-gate 	kmutex_t *smtx;
13890Sstevel@tonic-gate 	int hat_flag;
13900Sstevel@tonic-gate 
13910Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
13920Sstevel@tonic-gate 
13930Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
13940Sstevel@tonic-gate 		/*
13950Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
13960Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
13970Sstevel@tonic-gate 		 * segmap_release. The SM_KPM_NEWPAGE flag is set
13980Sstevel@tonic-gate 		 * in segmap_pagecreate_kpm when new pages are created.
13990Sstevel@tonic-gate 		 * and it is returned as "newpage" indication here.
14000Sstevel@tonic-gate 		 */
14010Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
14020Sstevel@tonic-gate 			panic("segmap_pagecreate: smap not found "
14030Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
14040Sstevel@tonic-gate 			/*NOTREACHED*/
14050Sstevel@tonic-gate 		}
14060Sstevel@tonic-gate 
14070Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
14080Sstevel@tonic-gate 		newpage = smp->sm_flags & SM_KPM_NEWPAGE;
14090Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
14100Sstevel@tonic-gate 		mutex_exit(smtx);
14110Sstevel@tonic-gate 
14120Sstevel@tonic-gate 		return (newpage);
14130Sstevel@tonic-gate 	}
14140Sstevel@tonic-gate 
14150Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
14160Sstevel@tonic-gate 
14170Sstevel@tonic-gate 	eaddr = addr + len;
14180Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
14190Sstevel@tonic-gate 
14200Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
14210Sstevel@tonic-gate 
14220Sstevel@tonic-gate 	/*
14230Sstevel@tonic-gate 	 * We don't grab smp mutex here since we assume the smp
14240Sstevel@tonic-gate 	 * has a refcnt set already which prevents the slot from
14250Sstevel@tonic-gate 	 * changing its id.
14260Sstevel@tonic-gate 	 */
14270Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
14280Sstevel@tonic-gate 
14290Sstevel@tonic-gate 	vp = smp->sm_vp;
14300Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
14310Sstevel@tonic-gate 	prot = smd->smd_prot;
14320Sstevel@tonic-gate 
14330Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
14340Sstevel@tonic-gate 		hat_flag = HAT_LOAD;
14350Sstevel@tonic-gate 		pp = page_lookup(vp, off, SE_SHARED);
14360Sstevel@tonic-gate 		if (pp == NULL) {
14370Sstevel@tonic-gate 			ushort_t bitindex;
14380Sstevel@tonic-gate 
14390Sstevel@tonic-gate 			if ((pp = page_create_va(vp, off,
14400Sstevel@tonic-gate 			    PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
14410Sstevel@tonic-gate 				panic("segmap_pagecreate: page_create failed");
14420Sstevel@tonic-gate 				/*NOTREACHED*/
14430Sstevel@tonic-gate 			}
14440Sstevel@tonic-gate 			newpage = 1;
14450Sstevel@tonic-gate 			page_io_unlock(pp);
14460Sstevel@tonic-gate 
14470Sstevel@tonic-gate 			/*
14480Sstevel@tonic-gate 			 * Since pages created here do not contain valid
14490Sstevel@tonic-gate 			 * data until the caller writes into them, the
14500Sstevel@tonic-gate 			 * "exclusive" lock will not be dropped to prevent
14510Sstevel@tonic-gate 			 * other users from accessing the page.  We also
14520Sstevel@tonic-gate 			 * have to lock the translation to prevent a fault
14535331Samw 			 * from occurring when the virtual address mapped by
14540Sstevel@tonic-gate 			 * this page is written into.  This is necessary to
14550Sstevel@tonic-gate 			 * avoid a deadlock since we haven't dropped the
14560Sstevel@tonic-gate 			 * "exclusive" lock.
14570Sstevel@tonic-gate 			 */
14580Sstevel@tonic-gate 			bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
14590Sstevel@tonic-gate 
14600Sstevel@tonic-gate 			/*
14610Sstevel@tonic-gate 			 * Large Files: The following assertion is to
14620Sstevel@tonic-gate 			 * verify the cast above.
14630Sstevel@tonic-gate 			 */
14640Sstevel@tonic-gate 			ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
14650Sstevel@tonic-gate 			smtx = SMAPMTX(smp);
14660Sstevel@tonic-gate 			mutex_enter(smtx);
14670Sstevel@tonic-gate 			smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
14680Sstevel@tonic-gate 			mutex_exit(smtx);
14690Sstevel@tonic-gate 
14700Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
14710Sstevel@tonic-gate 		} else if (softlock) {
14720Sstevel@tonic-gate 			hat_flag = HAT_LOAD_LOCK;
14730Sstevel@tonic-gate 		}
14740Sstevel@tonic-gate 
14750Sstevel@tonic-gate 		if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
14760Sstevel@tonic-gate 			hat_setmod(pp);
14770Sstevel@tonic-gate 
14780Sstevel@tonic-gate 		hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
14790Sstevel@tonic-gate 
14800Sstevel@tonic-gate 		if (hat_flag != HAT_LOAD_LOCK)
14810Sstevel@tonic-gate 			page_unlock(pp);
14820Sstevel@tonic-gate 
14830Sstevel@tonic-gate 		TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
14840Sstevel@tonic-gate 		    "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
14850Sstevel@tonic-gate 		    seg, addr, pp, vp, off);
14860Sstevel@tonic-gate 	}
14870Sstevel@tonic-gate 
14880Sstevel@tonic-gate 	return (newpage);
14890Sstevel@tonic-gate }
14900Sstevel@tonic-gate 
14910Sstevel@tonic-gate void
14920Sstevel@tonic-gate segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
14930Sstevel@tonic-gate {
14940Sstevel@tonic-gate 	struct smap	*smp;
14950Sstevel@tonic-gate 	ushort_t	bitmask;
14960Sstevel@tonic-gate 	page_t		*pp;
14970Sstevel@tonic-gate 	struct	vnode	*vp;
14980Sstevel@tonic-gate 	u_offset_t	off;
14990Sstevel@tonic-gate 	caddr_t		eaddr;
15000Sstevel@tonic-gate 	kmutex_t	*smtx;
15010Sstevel@tonic-gate 
15020Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
15030Sstevel@tonic-gate 
15040Sstevel@tonic-gate 	eaddr = addr + len;
15050Sstevel@tonic-gate 	addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
15060Sstevel@tonic-gate 
15070Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
15080Sstevel@tonic-gate 		/*
15090Sstevel@tonic-gate 		 * Pages are successfully prefaulted and locked in
15100Sstevel@tonic-gate 		 * segmap_getmapflt and can't be unlocked until
15110Sstevel@tonic-gate 		 * segmap_release, so no pages or hat mappings have
15120Sstevel@tonic-gate 		 * to be unlocked at this point.
15130Sstevel@tonic-gate 		 */
15140Sstevel@tonic-gate #ifdef DEBUG
15150Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
15160Sstevel@tonic-gate 			panic("segmap_pageunlock: smap not found "
15170Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
15180Sstevel@tonic-gate 			/*NOTREACHED*/
15190Sstevel@tonic-gate 		}
15200Sstevel@tonic-gate 
15210Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
15220Sstevel@tonic-gate 		mutex_exit(SMAPMTX(smp));
15230Sstevel@tonic-gate #endif
15240Sstevel@tonic-gate 		return;
15250Sstevel@tonic-gate 	}
15260Sstevel@tonic-gate 
15270Sstevel@tonic-gate 	smp = GET_SMAP(seg, addr);
15280Sstevel@tonic-gate 	smtx = SMAPMTX(smp);
15290Sstevel@tonic-gate 
15300Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
15310Sstevel@tonic-gate 
15320Sstevel@tonic-gate 	vp = smp->sm_vp;
15330Sstevel@tonic-gate 	off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
15340Sstevel@tonic-gate 
15350Sstevel@tonic-gate 	for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
15360Sstevel@tonic-gate 		bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
15370Sstevel@tonic-gate 
15380Sstevel@tonic-gate 		/*
15390Sstevel@tonic-gate 		 * Large Files: Following assertion is to verify
15400Sstevel@tonic-gate 		 * the correctness of the cast to (int) above.
15410Sstevel@tonic-gate 		 */
15420Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
15430Sstevel@tonic-gate 
15440Sstevel@tonic-gate 		/*
15450Sstevel@tonic-gate 		 * If the bit corresponding to "off" is set,
15460Sstevel@tonic-gate 		 * clear this bit in the bitmap, unlock translations,
15470Sstevel@tonic-gate 		 * and release the "exclusive" lock on the page.
15480Sstevel@tonic-gate 		 */
15490Sstevel@tonic-gate 		if (smp->sm_bitmap & bitmask) {
15500Sstevel@tonic-gate 			mutex_enter(smtx);
15510Sstevel@tonic-gate 			smp->sm_bitmap &= ~bitmask;
15520Sstevel@tonic-gate 			mutex_exit(smtx);
15530Sstevel@tonic-gate 
15540Sstevel@tonic-gate 			hat_unlock(kas.a_hat, addr, PAGESIZE);
15550Sstevel@tonic-gate 
15560Sstevel@tonic-gate 			/*
15570Sstevel@tonic-gate 			 * Use page_find() instead of page_lookup() to
15580Sstevel@tonic-gate 			 * find the page since we know that it has
15590Sstevel@tonic-gate 			 * "exclusive" lock.
15600Sstevel@tonic-gate 			 */
15610Sstevel@tonic-gate 			pp = page_find(vp, off);
15620Sstevel@tonic-gate 			if (pp == NULL) {
15630Sstevel@tonic-gate 				panic("segmap_pageunlock: page not found");
15640Sstevel@tonic-gate 				/*NOTREACHED*/
15650Sstevel@tonic-gate 			}
15660Sstevel@tonic-gate 			if (rw == S_WRITE) {
15670Sstevel@tonic-gate 				hat_setrefmod(pp);
15680Sstevel@tonic-gate 			} else if (rw != S_OTHER) {
15690Sstevel@tonic-gate 				hat_setref(pp);
15700Sstevel@tonic-gate 			}
15710Sstevel@tonic-gate 
15720Sstevel@tonic-gate 			page_unlock(pp);
15730Sstevel@tonic-gate 		}
15740Sstevel@tonic-gate 	}
15750Sstevel@tonic-gate }
15760Sstevel@tonic-gate 
15770Sstevel@tonic-gate caddr_t
15780Sstevel@tonic-gate segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
15790Sstevel@tonic-gate {
15800Sstevel@tonic-gate 	return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
15810Sstevel@tonic-gate }
15820Sstevel@tonic-gate 
15830Sstevel@tonic-gate /*
15840Sstevel@tonic-gate  * This is the magic virtual address that offset 0 of an ELF
15850Sstevel@tonic-gate  * file gets mapped to in user space. This is used to pick
15860Sstevel@tonic-gate  * the vac color on the freelist.
15870Sstevel@tonic-gate  */
15880Sstevel@tonic-gate #define	ELF_OFFZERO_VA	(0x10000)
15890Sstevel@tonic-gate /*
15900Sstevel@tonic-gate  * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
15910Sstevel@tonic-gate  * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
15920Sstevel@tonic-gate  * The return address is  always MAXBSIZE aligned.
15930Sstevel@tonic-gate  *
15940Sstevel@tonic-gate  * If forcefault is nonzero and the MMU translations haven't yet been created,
15950Sstevel@tonic-gate  * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
15960Sstevel@tonic-gate  */
15970Sstevel@tonic-gate caddr_t
15980Sstevel@tonic-gate segmap_getmapflt(
15990Sstevel@tonic-gate 	struct seg *seg,
16000Sstevel@tonic-gate 	struct vnode *vp,
16010Sstevel@tonic-gate 	u_offset_t off,
16020Sstevel@tonic-gate 	size_t len,
16030Sstevel@tonic-gate 	int forcefault,
16040Sstevel@tonic-gate 	enum seg_rw rw)
16050Sstevel@tonic-gate {
16060Sstevel@tonic-gate 	struct smap *smp, *nsmp;
16070Sstevel@tonic-gate 	extern struct vnode *common_specvp();
16080Sstevel@tonic-gate 	caddr_t baseaddr;			/* MAXBSIZE aligned */
16090Sstevel@tonic-gate 	u_offset_t baseoff;
16100Sstevel@tonic-gate 	int newslot;
16110Sstevel@tonic-gate 	caddr_t vaddr;
16120Sstevel@tonic-gate 	int color, hashid;
16130Sstevel@tonic-gate 	kmutex_t *hashmtx, *smapmtx;
16140Sstevel@tonic-gate 	struct smfree *sm;
16150Sstevel@tonic-gate 	page_t	*pp;
16160Sstevel@tonic-gate 	struct kpme *kpme;
16170Sstevel@tonic-gate 	uint_t	prot;
16180Sstevel@tonic-gate 	caddr_t base;
16190Sstevel@tonic-gate 	page_t	*pl[MAXPPB + 1];
16200Sstevel@tonic-gate 	int	error;
16210Sstevel@tonic-gate 	int	is_kpm = 1;
16220Sstevel@tonic-gate 
16230Sstevel@tonic-gate 	ASSERT(seg->s_as == &kas);
16240Sstevel@tonic-gate 	ASSERT(seg == segkmap);
16250Sstevel@tonic-gate 
16260Sstevel@tonic-gate 	baseoff = off & (offset_t)MAXBMASK;
16270Sstevel@tonic-gate 	if (off + len > baseoff + MAXBSIZE) {
16280Sstevel@tonic-gate 		panic("segmap_getmap bad len");
16290Sstevel@tonic-gate 		/*NOTREACHED*/
16300Sstevel@tonic-gate 	}
16310Sstevel@tonic-gate 
16320Sstevel@tonic-gate 	/*
16330Sstevel@tonic-gate 	 * If this is a block device we have to be sure to use the
16340Sstevel@tonic-gate 	 * "common" block device vnode for the mapping.
16350Sstevel@tonic-gate 	 */
16360Sstevel@tonic-gate 	if (vp->v_type == VBLK)
16370Sstevel@tonic-gate 		vp = common_specvp(vp);
16380Sstevel@tonic-gate 
16390Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
16400Sstevel@tonic-gate 
16410Sstevel@tonic-gate 	if (segmap_kpm == 0 ||
16420Sstevel@tonic-gate 	    (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
16430Sstevel@tonic-gate 		is_kpm = 0;
16440Sstevel@tonic-gate 	}
16450Sstevel@tonic-gate 
16460Sstevel@tonic-gate 	SMAP_HASHFUNC(vp, off, hashid);	/* macro assigns hashid */
16470Sstevel@tonic-gate 	hashmtx = SHASHMTX(hashid);
16480Sstevel@tonic-gate 
16490Sstevel@tonic-gate retry_hash:
16500Sstevel@tonic-gate 	mutex_enter(hashmtx);
16510Sstevel@tonic-gate 	for (smp = smd_hash[hashid].sh_hash_list;
16520Sstevel@tonic-gate 	    smp != NULL; smp = smp->sm_hash)
16530Sstevel@tonic-gate 		if (smp->sm_vp == vp && smp->sm_off == baseoff)
16540Sstevel@tonic-gate 			break;
16550Sstevel@tonic-gate 	mutex_exit(hashmtx);
16560Sstevel@tonic-gate 
16570Sstevel@tonic-gate vrfy_smp:
16580Sstevel@tonic-gate 	if (smp != NULL) {
16590Sstevel@tonic-gate 
16600Sstevel@tonic-gate 		ASSERT(vp->v_count != 0);
16610Sstevel@tonic-gate 
16620Sstevel@tonic-gate 		/*
16630Sstevel@tonic-gate 		 * Get smap lock and recheck its tag. The hash lock
16640Sstevel@tonic-gate 		 * is dropped since the hash is based on (vp, off)
16650Sstevel@tonic-gate 		 * and (vp, off) won't change when we have smap mtx.
16660Sstevel@tonic-gate 		 */
16670Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
16680Sstevel@tonic-gate 		mutex_enter(smapmtx);
16690Sstevel@tonic-gate 		if (smp->sm_vp != vp || smp->sm_off != baseoff) {
16700Sstevel@tonic-gate 			mutex_exit(smapmtx);
16710Sstevel@tonic-gate 			goto retry_hash;
16720Sstevel@tonic-gate 		}
16730Sstevel@tonic-gate 
16740Sstevel@tonic-gate 		if (smp->sm_refcnt == 0) {
16750Sstevel@tonic-gate 
16760Sstevel@tonic-gate 			smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
16770Sstevel@tonic-gate 
16780Sstevel@tonic-gate 			/*
16790Sstevel@tonic-gate 			 * Could still be on the free list. However, this
16800Sstevel@tonic-gate 			 * could also be an smp that is transitioning from
16810Sstevel@tonic-gate 			 * the free list when we have too much contention
16820Sstevel@tonic-gate 			 * for the smapmtx's. In this case, we have an
16830Sstevel@tonic-gate 			 * unlocked smp that is not on the free list any
16840Sstevel@tonic-gate 			 * longer, but still has a 0 refcnt.  The only way
16850Sstevel@tonic-gate 			 * to be sure is to check the freelist pointers.
16860Sstevel@tonic-gate 			 * Since we now have the smapmtx, we are guaranteed
16870Sstevel@tonic-gate 			 * that the (vp, off) won't change, so we are safe
16880Sstevel@tonic-gate 			 * to reclaim it.  get_free_smp() knows that this
16890Sstevel@tonic-gate 			 * can happen, and it will check the refcnt.
16900Sstevel@tonic-gate 			 */
16910Sstevel@tonic-gate 
16920Sstevel@tonic-gate 			if ((smp->sm_next != NULL)) {
16930Sstevel@tonic-gate 				struct sm_freeq *freeq;
16940Sstevel@tonic-gate 
16950Sstevel@tonic-gate 				ASSERT(smp->sm_prev != NULL);
16960Sstevel@tonic-gate 				sm = &smd_free[smp->sm_free_ndx];
16970Sstevel@tonic-gate 
16980Sstevel@tonic-gate 				if (smp->sm_flags & SM_QNDX_ZERO)
16990Sstevel@tonic-gate 					freeq = &sm->sm_freeq[0];
17000Sstevel@tonic-gate 				else
17010Sstevel@tonic-gate 					freeq = &sm->sm_freeq[1];
17020Sstevel@tonic-gate 
17030Sstevel@tonic-gate 				mutex_enter(&freeq->smq_mtx);
17040Sstevel@tonic-gate 				if (freeq->smq_free != smp) {
17050Sstevel@tonic-gate 					/*
17060Sstevel@tonic-gate 					 * fastpath normal case
17070Sstevel@tonic-gate 					 */
17080Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
17090Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
17100Sstevel@tonic-gate 				} else if (smp == smp->sm_next) {
17110Sstevel@tonic-gate 					/*
17120Sstevel@tonic-gate 					 * Taking the last smap on freelist
17130Sstevel@tonic-gate 					 */
17140Sstevel@tonic-gate 					freeq->smq_free = NULL;
17150Sstevel@tonic-gate 				} else {
17160Sstevel@tonic-gate 					/*
17170Sstevel@tonic-gate 					 * Reclaiming 1st smap on list
17180Sstevel@tonic-gate 					 */
17190Sstevel@tonic-gate 					freeq->smq_free = smp->sm_next;
17200Sstevel@tonic-gate 					smp->sm_prev->sm_next = smp->sm_next;
17210Sstevel@tonic-gate 					smp->sm_next->sm_prev = smp->sm_prev;
17220Sstevel@tonic-gate 				}
17230Sstevel@tonic-gate 				mutex_exit(&freeq->smq_mtx);
17240Sstevel@tonic-gate 				smp->sm_prev = smp->sm_next = NULL;
17250Sstevel@tonic-gate 			} else {
17260Sstevel@tonic-gate 				ASSERT(smp->sm_prev == NULL);
17270Sstevel@tonic-gate 				segmapcnt.smp_stolen.value.ul++;
17280Sstevel@tonic-gate 			}
17290Sstevel@tonic-gate 
17300Sstevel@tonic-gate 		} else {
17310Sstevel@tonic-gate 			segmapcnt.smp_get_use.value.ul++;
17320Sstevel@tonic-gate 		}
17330Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
17340Sstevel@tonic-gate 
17350Sstevel@tonic-gate 		/*
17360Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
17370Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
17380Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
17390Sstevel@tonic-gate 		 */
17400Sstevel@tonic-gate 		if (is_kpm) {
17410Sstevel@tonic-gate 			if (rw == S_WRITE) {
17420Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
17430Sstevel@tonic-gate 			} else if (rw == S_READ) {
17440Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
17450Sstevel@tonic-gate 			}
17460Sstevel@tonic-gate 		}
17470Sstevel@tonic-gate 		mutex_exit(smapmtx);
17480Sstevel@tonic-gate 
17490Sstevel@tonic-gate 		newslot = 0;
17500Sstevel@tonic-gate 	} else {
17510Sstevel@tonic-gate 
17520Sstevel@tonic-gate 		uint32_t free_ndx, *free_ndxp;
17530Sstevel@tonic-gate 		union segmap_cpu *scpu;
17540Sstevel@tonic-gate 
17550Sstevel@tonic-gate 		/*
17560Sstevel@tonic-gate 		 * On a PAC machine or a machine with anti-alias
17570Sstevel@tonic-gate 		 * hardware, smd_colormsk will be zero.
17580Sstevel@tonic-gate 		 *
17590Sstevel@tonic-gate 		 * On a VAC machine- pick color by offset in the file
17600Sstevel@tonic-gate 		 * so we won't get VAC conflicts on elf files.
17610Sstevel@tonic-gate 		 * On data files, color does not matter but we
17620Sstevel@tonic-gate 		 * don't know what kind of file it is so we always
17630Sstevel@tonic-gate 		 * pick color by offset. This causes color
17640Sstevel@tonic-gate 		 * corresponding to file offset zero to be used more
17650Sstevel@tonic-gate 		 * heavily.
17660Sstevel@tonic-gate 		 */
17670Sstevel@tonic-gate 		color = (baseoff >> MAXBSHIFT) & smd_colormsk;
17680Sstevel@tonic-gate 		scpu = smd_cpu+CPU->cpu_seqid;
17690Sstevel@tonic-gate 		free_ndxp = &scpu->scpu.scpu_free_ndx[color];
17700Sstevel@tonic-gate 		free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
17710Sstevel@tonic-gate #ifdef DEBUG
17720Sstevel@tonic-gate 		colors_used[free_ndx]++;
17730Sstevel@tonic-gate #endif /* DEBUG */
17740Sstevel@tonic-gate 
17750Sstevel@tonic-gate 		/*
17760Sstevel@tonic-gate 		 * Get a locked smp slot from the free list.
17770Sstevel@tonic-gate 		 */
17780Sstevel@tonic-gate 		smp = get_free_smp(free_ndx);
17790Sstevel@tonic-gate 		smapmtx = SMAPMTX(smp);
17800Sstevel@tonic-gate 
17810Sstevel@tonic-gate 		ASSERT(smp->sm_vp == NULL);
17820Sstevel@tonic-gate 
17830Sstevel@tonic-gate 		if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
17840Sstevel@tonic-gate 			/*
17850Sstevel@tonic-gate 			 * Failed to hashin, there exists one now.
17860Sstevel@tonic-gate 			 * Return the smp we just allocated.
17870Sstevel@tonic-gate 			 */
17880Sstevel@tonic-gate 			segmap_smapadd(smp);
17890Sstevel@tonic-gate 			mutex_exit(smapmtx);
17900Sstevel@tonic-gate 
17910Sstevel@tonic-gate 			smp = nsmp;
17920Sstevel@tonic-gate 			goto vrfy_smp;
17930Sstevel@tonic-gate 		}
17940Sstevel@tonic-gate 		smp->sm_refcnt++;		/* another user */
17950Sstevel@tonic-gate 
17960Sstevel@tonic-gate 		/*
17970Sstevel@tonic-gate 		 * We don't invoke segmap_fault via TLB miss, so we set ref
17980Sstevel@tonic-gate 		 * and mod bits in advance. For S_OTHER  we set them in
17990Sstevel@tonic-gate 		 * segmap_fault F_SOFTUNLOCK.
18000Sstevel@tonic-gate 		 */
18010Sstevel@tonic-gate 		if (is_kpm) {
18020Sstevel@tonic-gate 			if (rw == S_WRITE) {
18030Sstevel@tonic-gate 				smp->sm_flags |= SM_WRITE_DATA;
18040Sstevel@tonic-gate 			} else if (rw == S_READ) {
18050Sstevel@tonic-gate 				smp->sm_flags |= SM_READ_DATA;
18060Sstevel@tonic-gate 			}
18070Sstevel@tonic-gate 		}
18080Sstevel@tonic-gate 		mutex_exit(smapmtx);
18090Sstevel@tonic-gate 
18100Sstevel@tonic-gate 		newslot = 1;
18110Sstevel@tonic-gate 	}
18120Sstevel@tonic-gate 
18130Sstevel@tonic-gate 	if (!is_kpm)
18140Sstevel@tonic-gate 		goto use_segmap_range;
18150Sstevel@tonic-gate 
18160Sstevel@tonic-gate 	/*
18170Sstevel@tonic-gate 	 * Use segkpm
18180Sstevel@tonic-gate 	 */
18197632SNick.Todd@Sun.COM 	/* Lint directive required until 6746211 is fixed */
18207632SNick.Todd@Sun.COM 	/*CONSTCOND*/
18210Sstevel@tonic-gate 	ASSERT(PAGESIZE == MAXBSIZE);
18220Sstevel@tonic-gate 
18230Sstevel@tonic-gate 	/*
18240Sstevel@tonic-gate 	 * remember the last smp faulted on this cpu.
18250Sstevel@tonic-gate 	 */
18260Sstevel@tonic-gate 	(smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
18270Sstevel@tonic-gate 
18280Sstevel@tonic-gate 	if (forcefault == SM_PAGECREATE) {
18290Sstevel@tonic-gate 		baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
18300Sstevel@tonic-gate 		return (baseaddr);
18310Sstevel@tonic-gate 	}
18320Sstevel@tonic-gate 
18330Sstevel@tonic-gate 	if (newslot == 0 &&
18340Sstevel@tonic-gate 	    (pp = GET_KPME(smp)->kpe_page) != NULL) {
18350Sstevel@tonic-gate 
18360Sstevel@tonic-gate 		/* fastpath */
18370Sstevel@tonic-gate 		switch (rw) {
18380Sstevel@tonic-gate 		case S_READ:
18390Sstevel@tonic-gate 		case S_WRITE:
18400Sstevel@tonic-gate 			if (page_trylock(pp, SE_SHARED)) {
18410Sstevel@tonic-gate 				if (PP_ISFREE(pp) ||
18420Sstevel@tonic-gate 				    !(pp->p_vnode == vp &&
18430Sstevel@tonic-gate 				    pp->p_offset == baseoff)) {
18440Sstevel@tonic-gate 					page_unlock(pp);
18450Sstevel@tonic-gate 					pp = page_lookup(vp, baseoff,
18469281SPrakash.Sangappa@Sun.COM 					    SE_SHARED);
18470Sstevel@tonic-gate 				}
18480Sstevel@tonic-gate 			} else {
18490Sstevel@tonic-gate 				pp = page_lookup(vp, baseoff, SE_SHARED);
18500Sstevel@tonic-gate 			}
18510Sstevel@tonic-gate 
18520Sstevel@tonic-gate 			if (pp == NULL) {
18530Sstevel@tonic-gate 				ASSERT(GET_KPME(smp)->kpe_page == NULL);
18540Sstevel@tonic-gate 				break;
18550Sstevel@tonic-gate 			}
18560Sstevel@tonic-gate 
18570Sstevel@tonic-gate 			if (rw == S_WRITE &&
18580Sstevel@tonic-gate 			    hat_page_getattr(pp, P_MOD | P_REF) !=
18590Sstevel@tonic-gate 			    (P_MOD | P_REF)) {
18600Sstevel@tonic-gate 				page_unlock(pp);
18610Sstevel@tonic-gate 				break;
18620Sstevel@tonic-gate 			}
18630Sstevel@tonic-gate 
18640Sstevel@tonic-gate 			/*
18650Sstevel@tonic-gate 			 * We have the p_selock as reader, grab_smp
18660Sstevel@tonic-gate 			 * can't hit us, we have bumped the smap
18670Sstevel@tonic-gate 			 * refcnt and hat_pageunload needs the
18680Sstevel@tonic-gate 			 * p_selock exclusive.
18690Sstevel@tonic-gate 			 */
18700Sstevel@tonic-gate 			kpme = GET_KPME(smp);
18710Sstevel@tonic-gate 			if (kpme->kpe_page == pp) {
18720Sstevel@tonic-gate 				baseaddr = hat_kpm_page2va(pp, 0);
18730Sstevel@tonic-gate 			} else if (kpme->kpe_page == NULL) {
18740Sstevel@tonic-gate 				baseaddr = hat_kpm_mapin(pp, kpme);
18750Sstevel@tonic-gate 			} else {
18760Sstevel@tonic-gate 				panic("segmap_getmapflt: stale "
18770Sstevel@tonic-gate 				    "kpme page, kpme %p", (void *)kpme);
18780Sstevel@tonic-gate 				/*NOTREACHED*/
18790Sstevel@tonic-gate 			}
18800Sstevel@tonic-gate 
18810Sstevel@tonic-gate 			/*
18820Sstevel@tonic-gate 			 * We don't invoke segmap_fault via TLB miss,
18830Sstevel@tonic-gate 			 * so we set ref and mod bits in advance.
18840Sstevel@tonic-gate 			 * For S_OTHER and we set them in segmap_fault
18850Sstevel@tonic-gate 			 * F_SOFTUNLOCK.
18860Sstevel@tonic-gate 			 */
18870Sstevel@tonic-gate 			if (rw == S_READ && !hat_isref(pp))
18880Sstevel@tonic-gate 				hat_setref(pp);
18890Sstevel@tonic-gate 
18900Sstevel@tonic-gate 			return (baseaddr);
18910Sstevel@tonic-gate 		default:
18920Sstevel@tonic-gate 			break;
18930Sstevel@tonic-gate 		}
18940Sstevel@tonic-gate 	}
18950Sstevel@tonic-gate 
18960Sstevel@tonic-gate 	base = segkpm_create_va(baseoff);
18970Sstevel@tonic-gate 	error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
18985331Samw 	    seg, base, rw, CRED(), NULL);
18990Sstevel@tonic-gate 
19000Sstevel@tonic-gate 	pp = pl[0];
19010Sstevel@tonic-gate 	if (error || pp == NULL) {
19020Sstevel@tonic-gate 		/*
19030Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault deal
19040Sstevel@tonic-gate 		 * with the error cases. There is no error return
19050Sstevel@tonic-gate 		 * possible here.
19060Sstevel@tonic-gate 		 */
19070Sstevel@tonic-gate 		goto use_segmap_range;
19080Sstevel@tonic-gate 	}
19090Sstevel@tonic-gate 
19100Sstevel@tonic-gate 	ASSERT(pl[1] == NULL);
19110Sstevel@tonic-gate 
19120Sstevel@tonic-gate 	/*
19130Sstevel@tonic-gate 	 * When prot is not returned w/ PROT_ALL the returned pages
19140Sstevel@tonic-gate 	 * are not backed by fs blocks. For most of the segmap users
19150Sstevel@tonic-gate 	 * this is no problem, they don't write to the pages in the
19160Sstevel@tonic-gate 	 * same request and therefore don't rely on a following
19170Sstevel@tonic-gate 	 * trap driven segmap_fault. With SM_LOCKPROTO users it
19180Sstevel@tonic-gate 	 * is more secure to use segkmap adresses to allow
19190Sstevel@tonic-gate 	 * protection segmap_fault's.
19200Sstevel@tonic-gate 	 */
19210Sstevel@tonic-gate 	if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
19220Sstevel@tonic-gate 		/*
19230Sstevel@tonic-gate 		 * Use segmap address slot and let segmap_fault
19240Sstevel@tonic-gate 		 * do the error return.
19250Sstevel@tonic-gate 		 */
19260Sstevel@tonic-gate 		ASSERT(rw != S_WRITE);
19270Sstevel@tonic-gate 		ASSERT(PAGE_LOCKED(pp));
19280Sstevel@tonic-gate 		page_unlock(pp);
19290Sstevel@tonic-gate 		forcefault = 0;
19300Sstevel@tonic-gate 		goto use_segmap_range;
19310Sstevel@tonic-gate 	}
19320Sstevel@tonic-gate 
19330Sstevel@tonic-gate 	/*
19340Sstevel@tonic-gate 	 * We have the p_selock as reader, grab_smp can't hit us, we
19350Sstevel@tonic-gate 	 * have bumped the smap refcnt and hat_pageunload needs the
19360Sstevel@tonic-gate 	 * p_selock exclusive.
19370Sstevel@tonic-gate 	 */
19380Sstevel@tonic-gate 	kpme = GET_KPME(smp);
19390Sstevel@tonic-gate 	if (kpme->kpe_page == pp) {
19400Sstevel@tonic-gate 		baseaddr = hat_kpm_page2va(pp, 0);
19410Sstevel@tonic-gate 	} else if (kpme->kpe_page == NULL) {
19420Sstevel@tonic-gate 		baseaddr = hat_kpm_mapin(pp, kpme);
19430Sstevel@tonic-gate 	} else {
19440Sstevel@tonic-gate 		panic("segmap_getmapflt: stale kpme page after "
19450Sstevel@tonic-gate 		    "VOP_GETPAGE, kpme %p", (void *)kpme);
19460Sstevel@tonic-gate 		/*NOTREACHED*/
19470Sstevel@tonic-gate 	}
19480Sstevel@tonic-gate 
19490Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
19500Sstevel@tonic-gate 
19510Sstevel@tonic-gate 	return (baseaddr);
19520Sstevel@tonic-gate 
19530Sstevel@tonic-gate 
19540Sstevel@tonic-gate use_segmap_range:
19550Sstevel@tonic-gate 	baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
19560Sstevel@tonic-gate 	TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
19570Sstevel@tonic-gate 	    "segmap_getmap:seg %p addr %p vp %p offset %llx",
19580Sstevel@tonic-gate 	    seg, baseaddr, vp, baseoff);
19590Sstevel@tonic-gate 
19600Sstevel@tonic-gate 	/*
19610Sstevel@tonic-gate 	 * Prefault the translations
19620Sstevel@tonic-gate 	 */
19630Sstevel@tonic-gate 	vaddr = baseaddr + (off - baseoff);
19640Sstevel@tonic-gate 	if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
19650Sstevel@tonic-gate 
19660Sstevel@tonic-gate 		caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
19670Sstevel@tonic-gate 		    (uintptr_t)PAGEMASK);
19680Sstevel@tonic-gate 
19690Sstevel@tonic-gate 		(void) segmap_fault(kas.a_hat, seg, pgaddr,
19700Sstevel@tonic-gate 		    (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
19710Sstevel@tonic-gate 		    F_INVAL, rw);
19720Sstevel@tonic-gate 	}
19730Sstevel@tonic-gate 
19740Sstevel@tonic-gate 	return (baseaddr);
19750Sstevel@tonic-gate }
19760Sstevel@tonic-gate 
19770Sstevel@tonic-gate int
19780Sstevel@tonic-gate segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
19790Sstevel@tonic-gate {
19800Sstevel@tonic-gate 	struct smap	*smp;
19810Sstevel@tonic-gate 	int 		error;
19820Sstevel@tonic-gate 	int		bflags = 0;
19830Sstevel@tonic-gate 	struct vnode	*vp;
19840Sstevel@tonic-gate 	u_offset_t	offset;
19850Sstevel@tonic-gate 	kmutex_t	*smtx;
19860Sstevel@tonic-gate 	int		is_kpm = 0;
19870Sstevel@tonic-gate 	page_t		*pp;
19880Sstevel@tonic-gate 
19890Sstevel@tonic-gate 	if (segmap_kpm && IS_KPM_ADDR(addr)) {
19900Sstevel@tonic-gate 
19910Sstevel@tonic-gate 		if (((uintptr_t)addr & MAXBOFFSET) != 0) {
19920Sstevel@tonic-gate 			panic("segmap_release: addr %p not "
19930Sstevel@tonic-gate 			    "MAXBSIZE aligned", (void *)addr);
19940Sstevel@tonic-gate 			/*NOTREACHED*/
19950Sstevel@tonic-gate 		}
19960Sstevel@tonic-gate 
19970Sstevel@tonic-gate 		if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
19980Sstevel@tonic-gate 			panic("segmap_release: smap not found "
19990Sstevel@tonic-gate 			    "for addr %p", (void *)addr);
20000Sstevel@tonic-gate 			/*NOTREACHED*/
20010Sstevel@tonic-gate 		}
20020Sstevel@tonic-gate 
20030Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
20049281SPrakash.Sangappa@Sun.COM 		    "segmap_relmap:seg %p addr %p smp %p",
20059281SPrakash.Sangappa@Sun.COM 		    seg, addr, smp);
20060Sstevel@tonic-gate 
20070Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
20080Sstevel@tonic-gate 
20090Sstevel@tonic-gate 		/*
20105331Samw 		 * For compatibility reasons segmap_pagecreate_kpm sets this
20110Sstevel@tonic-gate 		 * flag to allow a following segmap_pagecreate to return
20120Sstevel@tonic-gate 		 * this as "newpage" flag. When segmap_pagecreate is not
20130Sstevel@tonic-gate 		 * called at all we clear it now.
20140Sstevel@tonic-gate 		 */
20150Sstevel@tonic-gate 		smp->sm_flags &= ~SM_KPM_NEWPAGE;
20160Sstevel@tonic-gate 		is_kpm = 1;
20170Sstevel@tonic-gate 		if (smp->sm_flags & SM_WRITE_DATA) {
20180Sstevel@tonic-gate 			hat_setrefmod(pp);
20190Sstevel@tonic-gate 		} else if (smp->sm_flags & SM_READ_DATA) {
20200Sstevel@tonic-gate 			hat_setref(pp);
20210Sstevel@tonic-gate 		}
20220Sstevel@tonic-gate 	} else {
20230Sstevel@tonic-gate 		if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
20240Sstevel@tonic-gate 		    ((uintptr_t)addr & MAXBOFFSET) != 0) {
20250Sstevel@tonic-gate 			panic("segmap_release: bad addr %p", (void *)addr);
20260Sstevel@tonic-gate 			/*NOTREACHED*/
20270Sstevel@tonic-gate 		}
20280Sstevel@tonic-gate 		smp = GET_SMAP(seg, addr);
20290Sstevel@tonic-gate 
20300Sstevel@tonic-gate 		TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
20319281SPrakash.Sangappa@Sun.COM 		    "segmap_relmap:seg %p addr %p smp %p",
20329281SPrakash.Sangappa@Sun.COM 		    seg, addr, smp);
20330Sstevel@tonic-gate 
20340Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
20350Sstevel@tonic-gate 		mutex_enter(smtx);
20360Sstevel@tonic-gate 		smp->sm_flags |= SM_NOTKPM_RELEASED;
20370Sstevel@tonic-gate 	}
20380Sstevel@tonic-gate 
20390Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
20400Sstevel@tonic-gate 
20410Sstevel@tonic-gate 	/*
20420Sstevel@tonic-gate 	 * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
20430Sstevel@tonic-gate 	 * are set.
20440Sstevel@tonic-gate 	 */
20450Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
20460Sstevel@tonic-gate 		if (flags & SM_WRITE)
20470Sstevel@tonic-gate 			segmapcnt.smp_rel_write.value.ul++;
20480Sstevel@tonic-gate 		if (flags & SM_ASYNC) {
20490Sstevel@tonic-gate 			bflags |= B_ASYNC;
20500Sstevel@tonic-gate 			segmapcnt.smp_rel_async.value.ul++;
20510Sstevel@tonic-gate 		}
20520Sstevel@tonic-gate 		if (flags & SM_INVAL) {
20530Sstevel@tonic-gate 			bflags |= B_INVAL;
20540Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
20550Sstevel@tonic-gate 		}
20560Sstevel@tonic-gate 		if (flags & SM_DESTROY) {
20570Sstevel@tonic-gate 			bflags |= (B_INVAL|B_TRUNC);
20580Sstevel@tonic-gate 			segmapcnt.smp_rel_abort.value.ul++;
20590Sstevel@tonic-gate 		}
20600Sstevel@tonic-gate 		if (smp->sm_refcnt == 1) {
20610Sstevel@tonic-gate 			/*
20620Sstevel@tonic-gate 			 * We only bother doing the FREE and DONTNEED flags
20630Sstevel@tonic-gate 			 * if no one else is still referencing this mapping.
20640Sstevel@tonic-gate 			 */
20650Sstevel@tonic-gate 			if (flags & SM_FREE) {
20660Sstevel@tonic-gate 				bflags |= B_FREE;
20670Sstevel@tonic-gate 				segmapcnt.smp_rel_free.value.ul++;
20680Sstevel@tonic-gate 			}
20690Sstevel@tonic-gate 			if (flags & SM_DONTNEED) {
20700Sstevel@tonic-gate 				bflags |= B_DONTNEED;
20710Sstevel@tonic-gate 				segmapcnt.smp_rel_dontneed.value.ul++;
20720Sstevel@tonic-gate 			}
20730Sstevel@tonic-gate 		}
20740Sstevel@tonic-gate 	} else {
20750Sstevel@tonic-gate 		smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
20760Sstevel@tonic-gate 	}
20770Sstevel@tonic-gate 
20780Sstevel@tonic-gate 	vp = smp->sm_vp;
20790Sstevel@tonic-gate 	offset = smp->sm_off;
20800Sstevel@tonic-gate 
20810Sstevel@tonic-gate 	if (--smp->sm_refcnt == 0) {
20820Sstevel@tonic-gate 
2083848Sstans 		smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2084848Sstans 
20850Sstevel@tonic-gate 		if (flags & (SM_INVAL|SM_DESTROY)) {
20860Sstevel@tonic-gate 			segmap_hashout(smp);	/* remove map info */
20870Sstevel@tonic-gate 			if (is_kpm) {
20880Sstevel@tonic-gate 				hat_kpm_mapout(pp, GET_KPME(smp), addr);
20890Sstevel@tonic-gate 				if (smp->sm_flags & SM_NOTKPM_RELEASED) {
20900Sstevel@tonic-gate 					smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2091*10010SPrakash.Sangappa@Sun.COM 					hat_unload(kas.a_hat, segkmap->s_base +
2092*10010SPrakash.Sangappa@Sun.COM 					    ((smp - smd_smap) * MAXBSIZE),
2093*10010SPrakash.Sangappa@Sun.COM 					    MAXBSIZE, HAT_UNLOAD);
20940Sstevel@tonic-gate 				}
20950Sstevel@tonic-gate 
20960Sstevel@tonic-gate 			} else {
20970Sstevel@tonic-gate 				if (segmap_kpm)
20980Sstevel@tonic-gate 					segkpm_mapout_validkpme(GET_KPME(smp));
20990Sstevel@tonic-gate 
21000Sstevel@tonic-gate 				smp->sm_flags &= ~SM_NOTKPM_RELEASED;
21010Sstevel@tonic-gate 				hat_unload(kas.a_hat, addr, MAXBSIZE,
21029281SPrakash.Sangappa@Sun.COM 				    HAT_UNLOAD);
21030Sstevel@tonic-gate 			}
21040Sstevel@tonic-gate 		}
21050Sstevel@tonic-gate 		segmap_smapadd(smp);	/* add to free list */
21060Sstevel@tonic-gate 	}
21070Sstevel@tonic-gate 
21080Sstevel@tonic-gate 	mutex_exit(smtx);
21090Sstevel@tonic-gate 
21100Sstevel@tonic-gate 	if (is_kpm)
21110Sstevel@tonic-gate 		page_unlock(pp);
21120Sstevel@tonic-gate 	/*
21130Sstevel@tonic-gate 	 * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
21140Sstevel@tonic-gate 	 * are set.
21150Sstevel@tonic-gate 	 */
21160Sstevel@tonic-gate 	if ((flags & ~SM_DONTNEED) != 0) {
21170Sstevel@tonic-gate 		error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
21185331Samw 		    bflags, CRED(), NULL);
21190Sstevel@tonic-gate 	} else {
21200Sstevel@tonic-gate 		error = 0;
21210Sstevel@tonic-gate 	}
21220Sstevel@tonic-gate 
21230Sstevel@tonic-gate 	return (error);
21240Sstevel@tonic-gate }
21250Sstevel@tonic-gate 
21260Sstevel@tonic-gate /*
21270Sstevel@tonic-gate  * Dump the pages belonging to this segmap segment.
21280Sstevel@tonic-gate  */
21290Sstevel@tonic-gate static void
21300Sstevel@tonic-gate segmap_dump(struct seg *seg)
21310Sstevel@tonic-gate {
21320Sstevel@tonic-gate 	struct segmap_data *smd;
21330Sstevel@tonic-gate 	struct smap *smp, *smp_end;
21340Sstevel@tonic-gate 	page_t *pp;
21350Sstevel@tonic-gate 	pfn_t pfn;
21360Sstevel@tonic-gate 	u_offset_t off;
21370Sstevel@tonic-gate 	caddr_t addr;
21380Sstevel@tonic-gate 
21390Sstevel@tonic-gate 	smd = (struct segmap_data *)seg->s_data;
21400Sstevel@tonic-gate 	addr = seg->s_base;
21410Sstevel@tonic-gate 	for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
21420Sstevel@tonic-gate 	    smp < smp_end; smp++) {
21430Sstevel@tonic-gate 
21440Sstevel@tonic-gate 		if (smp->sm_refcnt) {
21450Sstevel@tonic-gate 			for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
21460Sstevel@tonic-gate 				int we_own_it = 0;
21470Sstevel@tonic-gate 
21480Sstevel@tonic-gate 				/*
21490Sstevel@tonic-gate 				 * If pp == NULL, the page either does
21500Sstevel@tonic-gate 				 * not exist or is exclusively locked.
21510Sstevel@tonic-gate 				 * So determine if it exists before
21520Sstevel@tonic-gate 				 * searching for it.
21530Sstevel@tonic-gate 				 */
21540Sstevel@tonic-gate 				if ((pp = page_lookup_nowait(smp->sm_vp,
21550Sstevel@tonic-gate 				    smp->sm_off + off, SE_SHARED)))
21560Sstevel@tonic-gate 					we_own_it = 1;
21570Sstevel@tonic-gate 				else
21580Sstevel@tonic-gate 					pp = page_exists(smp->sm_vp,
21590Sstevel@tonic-gate 					    smp->sm_off + off);
21600Sstevel@tonic-gate 
21610Sstevel@tonic-gate 				if (pp) {
21620Sstevel@tonic-gate 					pfn = page_pptonum(pp);
21630Sstevel@tonic-gate 					dump_addpage(seg->s_as,
21649281SPrakash.Sangappa@Sun.COM 					    addr + off, pfn);
21650Sstevel@tonic-gate 					if (we_own_it)
21660Sstevel@tonic-gate 						page_unlock(pp);
21670Sstevel@tonic-gate 				}
21680Sstevel@tonic-gate 				dump_timeleft = dump_timeout;
21690Sstevel@tonic-gate 			}
21700Sstevel@tonic-gate 		}
21710Sstevel@tonic-gate 		addr += MAXBSIZE;
21720Sstevel@tonic-gate 	}
21730Sstevel@tonic-gate }
21740Sstevel@tonic-gate 
21750Sstevel@tonic-gate /*ARGSUSED*/
21760Sstevel@tonic-gate static int
21770Sstevel@tonic-gate segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
21780Sstevel@tonic-gate     struct page ***ppp, enum lock_type type, enum seg_rw rw)
21790Sstevel@tonic-gate {
21800Sstevel@tonic-gate 	return (ENOTSUP);
21810Sstevel@tonic-gate }
21820Sstevel@tonic-gate 
21830Sstevel@tonic-gate static int
21840Sstevel@tonic-gate segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
21850Sstevel@tonic-gate {
21860Sstevel@tonic-gate 	struct segmap_data *smd = (struct segmap_data *)seg->s_data;
21870Sstevel@tonic-gate 
21880Sstevel@tonic-gate 	memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
21890Sstevel@tonic-gate 	memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
21900Sstevel@tonic-gate 	return (0);
21910Sstevel@tonic-gate }
21920Sstevel@tonic-gate 
21930Sstevel@tonic-gate /*ARGSUSED*/
21940Sstevel@tonic-gate static lgrp_mem_policy_info_t *
21950Sstevel@tonic-gate segmap_getpolicy(struct seg *seg, caddr_t addr)
21960Sstevel@tonic-gate {
21970Sstevel@tonic-gate 	return (NULL);
21980Sstevel@tonic-gate }
21990Sstevel@tonic-gate 
2200670Selowe /*ARGSUSED*/
2201670Selowe static int
2202670Selowe segmap_capable(struct seg *seg, segcapability_t capability)
2203670Selowe {
2204670Selowe 	return (0);
2205670Selowe }
2206670Selowe 
22070Sstevel@tonic-gate 
22080Sstevel@tonic-gate #ifdef	SEGKPM_SUPPORT
22090Sstevel@tonic-gate 
22100Sstevel@tonic-gate /*
22110Sstevel@tonic-gate  * segkpm support routines
22120Sstevel@tonic-gate  */
22130Sstevel@tonic-gate 
22140Sstevel@tonic-gate static caddr_t
22150Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
22160Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
22170Sstevel@tonic-gate {
22180Sstevel@tonic-gate 	caddr_t	base;
22190Sstevel@tonic-gate 	page_t	*pp;
22200Sstevel@tonic-gate 	int	newpage = 0;
22210Sstevel@tonic-gate 	struct kpme	*kpme;
22220Sstevel@tonic-gate 
22230Sstevel@tonic-gate 	ASSERT(smp->sm_refcnt > 0);
22240Sstevel@tonic-gate 
22250Sstevel@tonic-gate 	if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
22260Sstevel@tonic-gate 		kmutex_t *smtx;
22270Sstevel@tonic-gate 
22280Sstevel@tonic-gate 		base = segkpm_create_va(off);
22290Sstevel@tonic-gate 
22300Sstevel@tonic-gate 		if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
22310Sstevel@tonic-gate 		    seg, base)) == NULL) {
22320Sstevel@tonic-gate 			panic("segmap_pagecreate_kpm: "
22330Sstevel@tonic-gate 			    "page_create failed");
22340Sstevel@tonic-gate 			/*NOTREACHED*/
22350Sstevel@tonic-gate 		}
22360Sstevel@tonic-gate 
22370Sstevel@tonic-gate 		newpage = 1;
22380Sstevel@tonic-gate 		page_io_unlock(pp);
22390Sstevel@tonic-gate 		ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
22400Sstevel@tonic-gate 
22410Sstevel@tonic-gate 		/*
22420Sstevel@tonic-gate 		 * Mark this here until the following segmap_pagecreate
22430Sstevel@tonic-gate 		 * or segmap_release.
22440Sstevel@tonic-gate 		 */
22450Sstevel@tonic-gate 		smtx = SMAPMTX(smp);
22460Sstevel@tonic-gate 		mutex_enter(smtx);
22470Sstevel@tonic-gate 		smp->sm_flags |= SM_KPM_NEWPAGE;
22480Sstevel@tonic-gate 		mutex_exit(smtx);
22490Sstevel@tonic-gate 	}
22500Sstevel@tonic-gate 
22510Sstevel@tonic-gate 	kpme = GET_KPME(smp);
22520Sstevel@tonic-gate 	if (!newpage && kpme->kpe_page == pp)
22530Sstevel@tonic-gate 		base = hat_kpm_page2va(pp, 0);
22540Sstevel@tonic-gate 	else
22550Sstevel@tonic-gate 		base = hat_kpm_mapin(pp, kpme);
22560Sstevel@tonic-gate 
22570Sstevel@tonic-gate 	/*
22580Sstevel@tonic-gate 	 * FS code may decide not to call segmap_pagecreate and we
22590Sstevel@tonic-gate 	 * don't invoke segmap_fault via TLB miss, so we have to set
22600Sstevel@tonic-gate 	 * ref and mod bits in advance.
22610Sstevel@tonic-gate 	 */
22620Sstevel@tonic-gate 	if (rw == S_WRITE) {
22630Sstevel@tonic-gate 		hat_setrefmod(pp);
22640Sstevel@tonic-gate 	} else {
22650Sstevel@tonic-gate 		ASSERT(rw == S_READ);
22660Sstevel@tonic-gate 		hat_setref(pp);
22670Sstevel@tonic-gate 	}
22680Sstevel@tonic-gate 
22690Sstevel@tonic-gate 	smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
22700Sstevel@tonic-gate 
22710Sstevel@tonic-gate 	return (base);
22720Sstevel@tonic-gate }
22730Sstevel@tonic-gate 
22740Sstevel@tonic-gate /*
22750Sstevel@tonic-gate  * Find the smap structure corresponding to the
22760Sstevel@tonic-gate  * KPM addr and return it locked.
22770Sstevel@tonic-gate  */
22780Sstevel@tonic-gate struct smap *
22790Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
22800Sstevel@tonic-gate {
22810Sstevel@tonic-gate 	struct smap	*smp;
22820Sstevel@tonic-gate 	struct vnode	*vp;
22830Sstevel@tonic-gate 	u_offset_t	offset;
22840Sstevel@tonic-gate 	caddr_t		baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
22850Sstevel@tonic-gate 	int		hashid;
22860Sstevel@tonic-gate 	kmutex_t	*hashmtx;
22870Sstevel@tonic-gate 	page_t		*pp;
22880Sstevel@tonic-gate 	union segmap_cpu *scpu;
22890Sstevel@tonic-gate 
22900Sstevel@tonic-gate 	pp = hat_kpm_vaddr2page(baseaddr);
22910Sstevel@tonic-gate 
22920Sstevel@tonic-gate 	ASSERT(pp && !PP_ISFREE(pp));
22930Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
22940Sstevel@tonic-gate 	ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
22950Sstevel@tonic-gate 
22960Sstevel@tonic-gate 	vp = pp->p_vnode;
22970Sstevel@tonic-gate 	offset = pp->p_offset;
22980Sstevel@tonic-gate 	ASSERT(vp != NULL);
22990Sstevel@tonic-gate 
23000Sstevel@tonic-gate 	/*
23010Sstevel@tonic-gate 	 * Assume the last smap used on this cpu is the one needed.
23020Sstevel@tonic-gate 	 */
23030Sstevel@tonic-gate 	scpu = smd_cpu+CPU->cpu_seqid;
23040Sstevel@tonic-gate 	smp = scpu->scpu.scpu_last_smap;
23050Sstevel@tonic-gate 	mutex_enter(&smp->sm_mtx);
23060Sstevel@tonic-gate 	if (smp->sm_vp == vp && smp->sm_off == offset) {
23070Sstevel@tonic-gate 		ASSERT(smp->sm_refcnt > 0);
23080Sstevel@tonic-gate 	} else {
23090Sstevel@tonic-gate 		/*
23100Sstevel@tonic-gate 		 * Assumption wrong, find the smap on the hash chain.
23110Sstevel@tonic-gate 		 */
23120Sstevel@tonic-gate 		mutex_exit(&smp->sm_mtx);
23130Sstevel@tonic-gate 		SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
23140Sstevel@tonic-gate 		hashmtx = SHASHMTX(hashid);
23150Sstevel@tonic-gate 
23160Sstevel@tonic-gate 		mutex_enter(hashmtx);
23170Sstevel@tonic-gate 		smp = smd_hash[hashid].sh_hash_list;
23180Sstevel@tonic-gate 		for (; smp != NULL; smp = smp->sm_hash) {
23190Sstevel@tonic-gate 			if (smp->sm_vp == vp && smp->sm_off == offset)
23200Sstevel@tonic-gate 				break;
23210Sstevel@tonic-gate 		}
23220Sstevel@tonic-gate 		mutex_exit(hashmtx);
23230Sstevel@tonic-gate 		if (smp) {
23240Sstevel@tonic-gate 			mutex_enter(&smp->sm_mtx);
23250Sstevel@tonic-gate 			ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
23260Sstevel@tonic-gate 		}
23270Sstevel@tonic-gate 	}
23280Sstevel@tonic-gate 
23290Sstevel@tonic-gate 	if (ppp)
23300Sstevel@tonic-gate 		*ppp = smp ? pp : NULL;
23310Sstevel@tonic-gate 
23320Sstevel@tonic-gate 	return (smp);
23330Sstevel@tonic-gate }
23340Sstevel@tonic-gate 
23350Sstevel@tonic-gate #else	/* SEGKPM_SUPPORT */
23360Sstevel@tonic-gate 
23370Sstevel@tonic-gate /* segkpm stubs */
23380Sstevel@tonic-gate 
23390Sstevel@tonic-gate /*ARGSUSED*/
23400Sstevel@tonic-gate static caddr_t
23410Sstevel@tonic-gate segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
23420Sstevel@tonic-gate 	struct smap *smp, enum seg_rw rw)
23430Sstevel@tonic-gate {
23440Sstevel@tonic-gate 	return (NULL);
23450Sstevel@tonic-gate }
23460Sstevel@tonic-gate 
23470Sstevel@tonic-gate /*ARGSUSED*/
23480Sstevel@tonic-gate struct smap *
23490Sstevel@tonic-gate get_smap_kpm(caddr_t addr, page_t **ppp)
23500Sstevel@tonic-gate {
23510Sstevel@tonic-gate 	return (NULL);
23520Sstevel@tonic-gate }
23530Sstevel@tonic-gate 
23540Sstevel@tonic-gate #endif	/* SEGKPM_SUPPORT */
2355