xref: /onnv-gate/usr/src/uts/common/os/grow.c (revision 9351:f85876ac403e)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52414Saguzovsk  * Common Development and Distribution License (the "License").
62414Saguzovsk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
219200SRoger.Faulkner@Sun.COM 
220Sstevel@tonic-gate /*
239200SRoger.Faulkner@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
280Sstevel@tonic-gate /*	  All Rights Reserved  	*/
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #include <sys/types.h>
310Sstevel@tonic-gate #include <sys/inttypes.h>
320Sstevel@tonic-gate #include <sys/param.h>
330Sstevel@tonic-gate #include <sys/sysmacros.h>
340Sstevel@tonic-gate #include <sys/systm.h>
350Sstevel@tonic-gate #include <sys/signal.h>
360Sstevel@tonic-gate #include <sys/user.h>
370Sstevel@tonic-gate #include <sys/errno.h>
380Sstevel@tonic-gate #include <sys/var.h>
390Sstevel@tonic-gate #include <sys/proc.h>
400Sstevel@tonic-gate #include <sys/tuneable.h>
410Sstevel@tonic-gate #include <sys/debug.h>
420Sstevel@tonic-gate #include <sys/cmn_err.h>
430Sstevel@tonic-gate #include <sys/cred.h>
440Sstevel@tonic-gate #include <sys/vnode.h>
450Sstevel@tonic-gate #include <sys/vfs.h>
460Sstevel@tonic-gate #include <sys/vm.h>
470Sstevel@tonic-gate #include <sys/file.h>
480Sstevel@tonic-gate #include <sys/mman.h>
490Sstevel@tonic-gate #include <sys/vmparam.h>
500Sstevel@tonic-gate #include <sys/fcntl.h>
510Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
525331Samw #include <sys/nbmlock.h>
530Sstevel@tonic-gate 
540Sstevel@tonic-gate #include <vm/hat.h>
550Sstevel@tonic-gate #include <vm/as.h>
560Sstevel@tonic-gate #include <vm/seg.h>
570Sstevel@tonic-gate #include <vm/seg_dev.h>
580Sstevel@tonic-gate #include <vm/seg_vn.h>
590Sstevel@tonic-gate 
600Sstevel@tonic-gate int use_brk_lpg = 1;
610Sstevel@tonic-gate int use_stk_lpg = 1;
620Sstevel@tonic-gate 
630Sstevel@tonic-gate static int brk_lpg(caddr_t nva);
640Sstevel@tonic-gate static int grow_lpg(caddr_t sp);
650Sstevel@tonic-gate 
660Sstevel@tonic-gate int
brk(caddr_t nva)670Sstevel@tonic-gate brk(caddr_t nva)
680Sstevel@tonic-gate {
690Sstevel@tonic-gate 	int error;
700Sstevel@tonic-gate 	proc_t *p = curproc;
710Sstevel@tonic-gate 
720Sstevel@tonic-gate 	/*
730Sstevel@tonic-gate 	 * Serialize brk operations on an address space.
740Sstevel@tonic-gate 	 * This also serves as the lock protecting p_brksize
750Sstevel@tonic-gate 	 * and p_brkpageszc.
760Sstevel@tonic-gate 	 */
770Sstevel@tonic-gate 	as_rangelock(p->p_as);
780Sstevel@tonic-gate 	if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
790Sstevel@tonic-gate 		error = brk_lpg(nva);
800Sstevel@tonic-gate 	} else {
810Sstevel@tonic-gate 		error = brk_internal(nva, p->p_brkpageszc);
820Sstevel@tonic-gate 	}
830Sstevel@tonic-gate 	as_rangeunlock(p->p_as);
840Sstevel@tonic-gate 	return ((error != 0 ? set_errno(error) : 0));
850Sstevel@tonic-gate }
860Sstevel@tonic-gate 
870Sstevel@tonic-gate /*
880Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
890Sstevel@tonic-gate  * then call brk_internal().
900Sstevel@tonic-gate  * Returns 0 on success.
910Sstevel@tonic-gate  */
920Sstevel@tonic-gate static int
brk_lpg(caddr_t nva)930Sstevel@tonic-gate brk_lpg(caddr_t nva)
940Sstevel@tonic-gate {
950Sstevel@tonic-gate 	struct proc *p = curproc;
960Sstevel@tonic-gate 	size_t pgsz, len;
972991Ssusans 	caddr_t addr, brkend;
980Sstevel@tonic-gate 	caddr_t bssbase = p->p_bssbase;
990Sstevel@tonic-gate 	caddr_t brkbase = p->p_brkbase;
1000Sstevel@tonic-gate 	int oszc, szc;
1010Sstevel@tonic-gate 	int err;
1020Sstevel@tonic-gate 
1030Sstevel@tonic-gate 	oszc = p->p_brkpageszc;
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	/*
1060Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
1070Sstevel@tonic-gate 	 * to brk_internal() will initialize it.
1080Sstevel@tonic-gate 	 */
1090Sstevel@tonic-gate 	if (brkbase == 0) {
1100Sstevel@tonic-gate 		return (brk_internal(nva, oszc));
1110Sstevel@tonic-gate 	}
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate 	len = nva - bssbase;
1140Sstevel@tonic-gate 
1152991Ssusans 	pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
1160Sstevel@tonic-gate 	szc = page_szc(pgsz);
1170Sstevel@tonic-gate 
1180Sstevel@tonic-gate 	/*
1190Sstevel@tonic-gate 	 * Covers two cases:
1200Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
1210Sstevel@tonic-gate 	 * ignore it in that case.
1220Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
1230Sstevel@tonic-gate 	 */
1240Sstevel@tonic-gate 	if (szc <= oszc) {
1250Sstevel@tonic-gate 		err = brk_internal(nva, oszc);
1260Sstevel@tonic-gate 		/* If failed, back off to base page size. */
1270Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
1280Sstevel@tonic-gate 			err = brk_internal(nva, 0);
1290Sstevel@tonic-gate 		}
1300Sstevel@tonic-gate 		return (err);
1310Sstevel@tonic-gate 	}
1320Sstevel@tonic-gate 
1330Sstevel@tonic-gate 	err = brk_internal(nva, szc);
1340Sstevel@tonic-gate 	/* If using szc failed, map with base page size and return. */
1350Sstevel@tonic-gate 	if (err != 0) {
1360Sstevel@tonic-gate 		if (szc != 0) {
1370Sstevel@tonic-gate 			err = brk_internal(nva, 0);
1380Sstevel@tonic-gate 		}
1390Sstevel@tonic-gate 		return (err);
1400Sstevel@tonic-gate 	}
1410Sstevel@tonic-gate 
1422991Ssusans 	/*
1432991Ssusans 	 * Round up brk base to a large page boundary and remap
1442991Ssusans 	 * anything in the segment already faulted in beyond that
1452991Ssusans 	 * point.
1462991Ssusans 	 */
1472991Ssusans 	addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
1482991Ssusans 	brkend = brkbase + p->p_brksize;
1492991Ssusans 	len = brkend - addr;
1502991Ssusans 	/* Check that len is not negative. Update page size code for heap. */
1512991Ssusans 	if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
1520Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
1532991Ssusans 		p->p_brkpageszc = szc;
1540Sstevel@tonic-gate 	}
1550Sstevel@tonic-gate 
1560Sstevel@tonic-gate 	ASSERT(err == 0);
1570Sstevel@tonic-gate 	return (err);		/* should always be 0 */
1580Sstevel@tonic-gate }
1590Sstevel@tonic-gate 
1600Sstevel@tonic-gate /*
1610Sstevel@tonic-gate  * Returns 0 on success.
1620Sstevel@tonic-gate  */
1630Sstevel@tonic-gate int
brk_internal(caddr_t nva,uint_t brkszc)1640Sstevel@tonic-gate brk_internal(caddr_t nva, uint_t brkszc)
1650Sstevel@tonic-gate {
1660Sstevel@tonic-gate 	caddr_t ova;			/* current break address */
1670Sstevel@tonic-gate 	size_t size;
1680Sstevel@tonic-gate 	int	error;
1690Sstevel@tonic-gate 	struct proc *p = curproc;
1700Sstevel@tonic-gate 	struct as *as = p->p_as;
1710Sstevel@tonic-gate 	size_t pgsz;
1720Sstevel@tonic-gate 	uint_t szc;
1730Sstevel@tonic-gate 	rctl_qty_t as_rctl;
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	/*
1760Sstevel@tonic-gate 	 * extend heap to brkszc alignment but use current p->p_brkpageszc
1770Sstevel@tonic-gate 	 * for the newly created segment. This allows the new extension
1780Sstevel@tonic-gate 	 * segment to be concatenated successfully with the existing brk
1790Sstevel@tonic-gate 	 * segment.
1800Sstevel@tonic-gate 	 */
1810Sstevel@tonic-gate 	if ((szc = brkszc) != 0) {
1820Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
1830Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
1840Sstevel@tonic-gate 	} else {
1850Sstevel@tonic-gate 		pgsz = PAGESIZE;
1860Sstevel@tonic-gate 	}
1870Sstevel@tonic-gate 
1880Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
1890Sstevel@tonic-gate 	as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
1900Sstevel@tonic-gate 	    p->p_rctls, p);
1910Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
1920Sstevel@tonic-gate 
1930Sstevel@tonic-gate 	/*
1940Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
1950Sstevel@tonic-gate 	 * to brk() will initialize it.
1960Sstevel@tonic-gate 	 */
1970Sstevel@tonic-gate 	if (p->p_brkbase == 0)
1980Sstevel@tonic-gate 		p->p_brkbase = nva;
1990Sstevel@tonic-gate 
2000Sstevel@tonic-gate 	/*
2010Sstevel@tonic-gate 	 * Before multiple page size support existed p_brksize was the value
2020Sstevel@tonic-gate 	 * not rounded to the pagesize (i.e. it stored the exact user request
2030Sstevel@tonic-gate 	 * for heap size). If pgsz is greater than PAGESIZE calculate the
2040Sstevel@tonic-gate 	 * heap size as the real new heap size by rounding it up to pgsz.
2050Sstevel@tonic-gate 	 * This is useful since we may want to know where the heap ends
2060Sstevel@tonic-gate 	 * without knowing heap pagesize (e.g. some old code) and also if
2070Sstevel@tonic-gate 	 * heap pagesize changes we can update p_brkpageszc but delay adding
2080Sstevel@tonic-gate 	 * new mapping yet still know from p_brksize where the heap really
2090Sstevel@tonic-gate 	 * ends. The user requested heap end is stored in libc variable.
2100Sstevel@tonic-gate 	 */
2110Sstevel@tonic-gate 	if (pgsz > PAGESIZE) {
2120Sstevel@tonic-gate 		caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
2130Sstevel@tonic-gate 		size = tnva - p->p_brkbase;
2140Sstevel@tonic-gate 		if (tnva < p->p_brkbase || (size > p->p_brksize &&
2150Sstevel@tonic-gate 		    size > (size_t)as_rctl)) {
2160Sstevel@tonic-gate 			szc = 0;
2170Sstevel@tonic-gate 			pgsz = PAGESIZE;
2180Sstevel@tonic-gate 			size = nva - p->p_brkbase;
2190Sstevel@tonic-gate 		}
2200Sstevel@tonic-gate 	} else {
2210Sstevel@tonic-gate 		size = nva - p->p_brkbase;
2220Sstevel@tonic-gate 	}
2230Sstevel@tonic-gate 
2240Sstevel@tonic-gate 	/*
2250Sstevel@tonic-gate 	 * use PAGESIZE to roundup ova because we want to know the real value
2260Sstevel@tonic-gate 	 * of the current heap end in case p_brkpageszc changes since the last
2270Sstevel@tonic-gate 	 * p_brksize was computed.
2280Sstevel@tonic-gate 	 */
2290Sstevel@tonic-gate 	nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
2300Sstevel@tonic-gate 	ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
2316036Smec 	    PAGESIZE);
2320Sstevel@tonic-gate 
2330Sstevel@tonic-gate 	if ((nva < p->p_brkbase) || (size > p->p_brksize &&
2340Sstevel@tonic-gate 	    size > as_rctl)) {
2350Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
2360Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
2370Sstevel@tonic-gate 		    RCA_SAFE);
2380Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
2390Sstevel@tonic-gate 		return (ENOMEM);
2400Sstevel@tonic-gate 	}
2410Sstevel@tonic-gate 
2420Sstevel@tonic-gate 	if (nva > ova) {
2430Sstevel@tonic-gate 		struct segvn_crargs crargs =
2440Sstevel@tonic-gate 		    SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
2450Sstevel@tonic-gate 
2460Sstevel@tonic-gate 		if (!(p->p_datprot & PROT_EXEC)) {
2470Sstevel@tonic-gate 			crargs.prot &= ~PROT_EXEC;
2480Sstevel@tonic-gate 		}
2490Sstevel@tonic-gate 
2500Sstevel@tonic-gate 		/*
2510Sstevel@tonic-gate 		 * Add new zfod mapping to extend UNIX data segment
2522991Ssusans 		 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
2532991Ssusans 		 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
2542991Ssusans 		 * page sizes if ova is not aligned to szc's pgsz.
2550Sstevel@tonic-gate 		 */
2562991Ssusans 		if (szc > 0) {
2572991Ssusans 			caddr_t rbss;
2582991Ssusans 
2592991Ssusans 			rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
2602991Ssusans 			    pgsz);
2612991Ssusans 			if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
2622991Ssusans 				crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
2632991Ssusans 				    AS_MAP_NO_LPOOB;
2642991Ssusans 			} else if (ova == rbss) {
2652991Ssusans 				crargs.szc = szc;
2662991Ssusans 			} else {
2672991Ssusans 				crargs.szc = AS_MAP_HEAP;
2682991Ssusans 			}
2692991Ssusans 		} else {
2702991Ssusans 			crargs.szc = AS_MAP_NO_LPOOB;
2712991Ssusans 		}
2720Sstevel@tonic-gate 		crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
2730Sstevel@tonic-gate 		error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
2740Sstevel@tonic-gate 		    &crargs);
2750Sstevel@tonic-gate 		if (error) {
2760Sstevel@tonic-gate 			return (error);
2770Sstevel@tonic-gate 		}
2780Sstevel@tonic-gate 
2790Sstevel@tonic-gate 	} else if (nva < ova) {
2800Sstevel@tonic-gate 		/*
2810Sstevel@tonic-gate 		 * Release mapping to shrink UNIX data segment.
2820Sstevel@tonic-gate 		 */
2830Sstevel@tonic-gate 		(void) as_unmap(as, nva, (size_t)(ova - nva));
2840Sstevel@tonic-gate 	}
2850Sstevel@tonic-gate 	p->p_brksize = size;
2860Sstevel@tonic-gate 	return (0);
2870Sstevel@tonic-gate }
2880Sstevel@tonic-gate 
2890Sstevel@tonic-gate /*
2900Sstevel@tonic-gate  * Grow the stack to include sp.  Return 1 if successful, 0 otherwise.
2910Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
2920Sstevel@tonic-gate  */
2930Sstevel@tonic-gate int
grow(caddr_t sp)2940Sstevel@tonic-gate grow(caddr_t sp)
2950Sstevel@tonic-gate {
2960Sstevel@tonic-gate 	struct proc *p = curproc;
2972991Ssusans 	struct as *as = p->p_as;
2982991Ssusans 	size_t oldsize = p->p_stksize;
2992991Ssusans 	size_t newsize;
3000Sstevel@tonic-gate 	int err;
3010Sstevel@tonic-gate 
3020Sstevel@tonic-gate 	/*
3030Sstevel@tonic-gate 	 * Serialize grow operations on an address space.
3040Sstevel@tonic-gate 	 * This also serves as the lock protecting p_stksize
3050Sstevel@tonic-gate 	 * and p_stkpageszc.
3060Sstevel@tonic-gate 	 */
3072991Ssusans 	as_rangelock(as);
3080Sstevel@tonic-gate 	if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
3090Sstevel@tonic-gate 		err = grow_lpg(sp);
3100Sstevel@tonic-gate 	} else {
3110Sstevel@tonic-gate 		err = grow_internal(sp, p->p_stkpageszc);
3120Sstevel@tonic-gate 	}
3132991Ssusans 	as_rangeunlock(as);
3142991Ssusans 
3152991Ssusans 	if (err == 0 && (newsize = p->p_stksize) > oldsize) {
3162991Ssusans 		ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
3172991Ssusans 		ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
3182991Ssusans 		/*
3192991Ssusans 		 * Set up translations so the process doesn't have to fault in
3202991Ssusans 		 * the stack pages we just gave it.
3212991Ssusans 		 */
3222991Ssusans 		(void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
3232991Ssusans 		    newsize - oldsize, F_INVAL, S_WRITE);
3242991Ssusans 	}
3250Sstevel@tonic-gate 	return ((err == 0 ? 1 : 0));
3260Sstevel@tonic-gate }
3270Sstevel@tonic-gate 
3280Sstevel@tonic-gate /*
3290Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
3300Sstevel@tonic-gate  * then call grow_internal().
3310Sstevel@tonic-gate  * Returns 0 on success.
3320Sstevel@tonic-gate  */
3330Sstevel@tonic-gate static int
grow_lpg(caddr_t sp)3340Sstevel@tonic-gate grow_lpg(caddr_t sp)
3350Sstevel@tonic-gate {
3360Sstevel@tonic-gate 	struct proc *p = curproc;
3370Sstevel@tonic-gate 	size_t pgsz;
3380Sstevel@tonic-gate 	size_t len, newsize;
3392991Ssusans 	caddr_t addr, saddr;
3402991Ssusans 	caddr_t growend;
3410Sstevel@tonic-gate 	int oszc, szc;
3420Sstevel@tonic-gate 	int err;
3430Sstevel@tonic-gate 
3440Sstevel@tonic-gate 	newsize = p->p_usrstack - sp;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 	oszc = p->p_stkpageszc;
3472991Ssusans 	pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
3480Sstevel@tonic-gate 	szc = page_szc(pgsz);
3490Sstevel@tonic-gate 
3500Sstevel@tonic-gate 	/*
3510Sstevel@tonic-gate 	 * Covers two cases:
3520Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
3530Sstevel@tonic-gate 	 * ignore it in that case.
3540Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
3550Sstevel@tonic-gate 	 * This shouldn't happen as the stack never shrinks.
3560Sstevel@tonic-gate 	 */
3570Sstevel@tonic-gate 	if (szc <= oszc) {
3580Sstevel@tonic-gate 		err = grow_internal(sp, oszc);
3590Sstevel@tonic-gate 		/* failed, fall back to base page size */
3600Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
3610Sstevel@tonic-gate 			err = grow_internal(sp, 0);
3620Sstevel@tonic-gate 		}
3630Sstevel@tonic-gate 		return (err);
3640Sstevel@tonic-gate 	}
3650Sstevel@tonic-gate 
3660Sstevel@tonic-gate 	/*
3670Sstevel@tonic-gate 	 * We've grown sufficiently to switch to a new page size.
3682991Ssusans 	 * So we are going to remap the whole segment with the new page size.
3690Sstevel@tonic-gate 	 */
3700Sstevel@tonic-gate 	err = grow_internal(sp, szc);
3710Sstevel@tonic-gate 	/* The grow with szc failed, so fall back to base page size. */
3720Sstevel@tonic-gate 	if (err != 0) {
3730Sstevel@tonic-gate 		if (szc != 0) {
3740Sstevel@tonic-gate 			err = grow_internal(sp, 0);
3750Sstevel@tonic-gate 		}
3760Sstevel@tonic-gate 		return (err);
3770Sstevel@tonic-gate 	}
3780Sstevel@tonic-gate 
3792991Ssusans 	/*
3802991Ssusans 	 * Round up stack pointer to a large page boundary and remap
3812991Ssusans 	 * any pgsz pages in the segment already faulted in beyond that
3822991Ssusans 	 * point.
3832991Ssusans 	 */
3842991Ssusans 	saddr = p->p_usrstack - p->p_stksize;
3852991Ssusans 	addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
3862991Ssusans 	growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
3872991Ssusans 	len = growend - addr;
3882991Ssusans 	/* Check that len is not negative. Update page size code for stack. */
3892991Ssusans 	if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
3900Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
3912991Ssusans 		p->p_stkpageszc = szc;
3920Sstevel@tonic-gate 	}
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	ASSERT(err == 0);
3950Sstevel@tonic-gate 	return (err);		/* should always be 0 */
3960Sstevel@tonic-gate }
3970Sstevel@tonic-gate 
3980Sstevel@tonic-gate /*
3990Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
4000Sstevel@tonic-gate  * Returns 0 on success, errno on failure.
4010Sstevel@tonic-gate  */
4020Sstevel@tonic-gate int
grow_internal(caddr_t sp,uint_t growszc)4030Sstevel@tonic-gate grow_internal(caddr_t sp, uint_t growszc)
4040Sstevel@tonic-gate {
4050Sstevel@tonic-gate 	struct proc *p = curproc;
4062991Ssusans 	size_t newsize;
4070Sstevel@tonic-gate 	size_t oldsize;
4080Sstevel@tonic-gate 	int    error;
4090Sstevel@tonic-gate 	size_t pgsz;
4100Sstevel@tonic-gate 	uint_t szc;
4110Sstevel@tonic-gate 	struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
4120Sstevel@tonic-gate 
4130Sstevel@tonic-gate 	ASSERT(sp < p->p_usrstack);
4142991Ssusans 	sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
4150Sstevel@tonic-gate 
4160Sstevel@tonic-gate 	/*
4170Sstevel@tonic-gate 	 * grow to growszc alignment but use current p->p_stkpageszc for
4180Sstevel@tonic-gate 	 * the segvn_crargs szc passed to segvn_create. For memcntl to
4190Sstevel@tonic-gate 	 * increase the szc, this allows the new extension segment to be
4200Sstevel@tonic-gate 	 * concatenated successfully with the existing stack segment.
4210Sstevel@tonic-gate 	 */
4220Sstevel@tonic-gate 	if ((szc = growszc) != 0) {
4230Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
4240Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
4252991Ssusans 		newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
4260Sstevel@tonic-gate 		if (newsize > (size_t)p->p_stk_ctl) {
4270Sstevel@tonic-gate 			szc = 0;
4280Sstevel@tonic-gate 			pgsz = PAGESIZE;
4290Sstevel@tonic-gate 			newsize = p->p_usrstack - sp;
4300Sstevel@tonic-gate 		}
4310Sstevel@tonic-gate 	} else {
4320Sstevel@tonic-gate 		pgsz = PAGESIZE;
4332991Ssusans 		newsize = p->p_usrstack - sp;
4340Sstevel@tonic-gate 	}
4350Sstevel@tonic-gate 
4360Sstevel@tonic-gate 	if (newsize > (size_t)p->p_stk_ctl) {
4370Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
4380Sstevel@tonic-gate 		    RCA_UNSAFE_ALL);
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 		return (ENOMEM);
4410Sstevel@tonic-gate 	}
4420Sstevel@tonic-gate 
4430Sstevel@tonic-gate 	oldsize = p->p_stksize;
4440Sstevel@tonic-gate 	ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
4450Sstevel@tonic-gate 
4460Sstevel@tonic-gate 	if (newsize <= oldsize) {	/* prevent the stack from shrinking */
4470Sstevel@tonic-gate 		return (0);
4480Sstevel@tonic-gate 	}
4490Sstevel@tonic-gate 
4500Sstevel@tonic-gate 	if (!(p->p_stkprot & PROT_EXEC)) {
4510Sstevel@tonic-gate 		crargs.prot &= ~PROT_EXEC;
4520Sstevel@tonic-gate 	}
4530Sstevel@tonic-gate 	/*
4542991Ssusans 	 * extend stack with the proposed new growszc, which is different
4552991Ssusans 	 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
4562991Ssusans 	 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
4572991Ssusans 	 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
4582991Ssusans 	 * if not aligned to szc's pgsz.
4590Sstevel@tonic-gate 	 */
4602991Ssusans 	if (szc > 0) {
4612991Ssusans 		caddr_t oldsp = p->p_usrstack - oldsize;
4622991Ssusans 		caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
4632991Ssusans 		    pgsz);
4642991Ssusans 
4652991Ssusans 		if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
4662991Ssusans 			crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
4672991Ssusans 			    AS_MAP_NO_LPOOB;
4682991Ssusans 		} else if (oldsp == austk) {
4692991Ssusans 			crargs.szc = szc;
4702991Ssusans 		} else {
4712991Ssusans 			crargs.szc = AS_MAP_STACK;
4722991Ssusans 		}
4732991Ssusans 	} else {
4742991Ssusans 		crargs.szc = AS_MAP_NO_LPOOB;
4752991Ssusans 	}
4760Sstevel@tonic-gate 	crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
4770Sstevel@tonic-gate 
4782991Ssusans 	if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
4790Sstevel@tonic-gate 	    segvn_create, &crargs)) != 0) {
4800Sstevel@tonic-gate 		if (error == EAGAIN) {
4810Sstevel@tonic-gate 			cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
4823446Smrj 			    "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
4830Sstevel@tonic-gate 		}
4840Sstevel@tonic-gate 		return (error);
4850Sstevel@tonic-gate 	}
4860Sstevel@tonic-gate 	p->p_stksize = newsize;
4870Sstevel@tonic-gate 	return (0);
4880Sstevel@tonic-gate }
4890Sstevel@tonic-gate 
4900Sstevel@tonic-gate /*
4916036Smec  * Find address for user to map.
4926036Smec  * If MAP_FIXED is not specified, we can pick any address we want, but we will
4936036Smec  * first try the value in *addrp if it is non-NULL.  Thus this is implementing
4946036Smec  * a way to try and get a preferred address.
4956036Smec  */
4966036Smec int
choose_addr(struct as * as,caddr_t * addrp,size_t len,offset_t off,int vacalign,uint_t flags)4976036Smec choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
4986036Smec     int vacalign, uint_t flags)
4996036Smec {
5006036Smec 	caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
5016036Smec 	size_t lenp = len;
5026036Smec 
5036036Smec 	ASSERT(AS_ISCLAIMGAP(as));	/* searches should be serialized */
5046036Smec 	if (flags & MAP_FIXED) {
5056036Smec 		(void) as_unmap(as, *addrp, len);
5066036Smec 		return (0);
5076036Smec 	} else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
5086036Smec 	    !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
5096036Smec 		/* User supplied address was available */
5106036Smec 		*addrp = basep;
5116036Smec 	} else {
5126036Smec 		/*
5136036Smec 		 * No user supplied address or the address supplied was not
5146036Smec 		 * available.
5156036Smec 		 */
5166036Smec 		map_addr(addrp, len, off, vacalign, flags);
5176036Smec 	}
5186036Smec 	if (*addrp == NULL)
5196036Smec 		return (ENOMEM);
5206036Smec 	return (0);
5216036Smec }
5226036Smec 
5236036Smec 
5246036Smec /*
5250Sstevel@tonic-gate  * Used for MAP_ANON - fast way to get anonymous pages
5260Sstevel@tonic-gate  */
5270Sstevel@tonic-gate static int
zmap(struct as * as,caddr_t * addrp,size_t len,uint_t uprot,int flags,offset_t pos)5280Sstevel@tonic-gate zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
5290Sstevel@tonic-gate     offset_t pos)
5300Sstevel@tonic-gate {
5312991Ssusans 	struct segvn_crargs vn_a;
5326036Smec 	int error;
5330Sstevel@tonic-gate 
5340Sstevel@tonic-gate 	if (((PROT_ALL & uprot) != uprot))
5350Sstevel@tonic-gate 		return (EACCES);
5360Sstevel@tonic-gate 
5370Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
5380Sstevel@tonic-gate 		caddr_t userlimit;
5390Sstevel@tonic-gate 
5400Sstevel@tonic-gate 		/*
5410Sstevel@tonic-gate 		 * Use the user address.  First verify that
5420Sstevel@tonic-gate 		 * the address to be used is page aligned.
5430Sstevel@tonic-gate 		 * Then make some simple bounds checks.
5440Sstevel@tonic-gate 		 */
5450Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
5460Sstevel@tonic-gate 			return (EINVAL);
5470Sstevel@tonic-gate 
5480Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
5490Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
5500Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
5510Sstevel@tonic-gate 		case RANGE_OKAY:
5520Sstevel@tonic-gate 			break;
5530Sstevel@tonic-gate 		case RANGE_BADPROT:
5540Sstevel@tonic-gate 			return (ENOTSUP);
5550Sstevel@tonic-gate 		case RANGE_BADADDR:
5560Sstevel@tonic-gate 		default:
5570Sstevel@tonic-gate 			return (ENOMEM);
5580Sstevel@tonic-gate 		}
5596036Smec 	}
5606036Smec 	/*
5616036Smec 	 * No need to worry about vac alignment for anonymous
5626036Smec 	 * pages since this is a "clone" object that doesn't
5636036Smec 	 * yet exist.
5646036Smec 	 */
5656036Smec 	error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
5666036Smec 	if (error != 0) {
5676036Smec 		return (error);
5680Sstevel@tonic-gate 	}
5690Sstevel@tonic-gate 
5700Sstevel@tonic-gate 	/*
5710Sstevel@tonic-gate 	 * Use the seg_vn segment driver; passing in the NULL amp
5720Sstevel@tonic-gate 	 * gives the desired "cloning" effect.
5730Sstevel@tonic-gate 	 */
5742991Ssusans 	vn_a.vp = NULL;
5752991Ssusans 	vn_a.offset = 0;
5762991Ssusans 	vn_a.type = flags & MAP_TYPE;
5772991Ssusans 	vn_a.prot = uprot;
5782991Ssusans 	vn_a.maxprot = PROT_ALL;
5792991Ssusans 	vn_a.flags = flags & ~MAP_TYPE;
5802991Ssusans 	vn_a.cred = CRED();
5812991Ssusans 	vn_a.amp = NULL;
5822991Ssusans 	vn_a.szc = 0;
5832991Ssusans 	vn_a.lgrp_mem_policy_flags = 0;
5840Sstevel@tonic-gate 
5852991Ssusans 	return (as_map(as, *addrp, len, segvn_create, &vn_a));
5860Sstevel@tonic-gate }
5870Sstevel@tonic-gate 
5880Sstevel@tonic-gate static int
smmap_common(caddr_t * addrp,size_t len,int prot,int flags,struct file * fp,offset_t pos)5890Sstevel@tonic-gate smmap_common(caddr_t *addrp, size_t len,
5900Sstevel@tonic-gate     int prot, int flags, struct file *fp, offset_t pos)
5910Sstevel@tonic-gate {
5920Sstevel@tonic-gate 	struct vnode *vp;
5930Sstevel@tonic-gate 	struct as *as = curproc->p_as;
5940Sstevel@tonic-gate 	uint_t uprot, maxprot, type;
5950Sstevel@tonic-gate 	int error;
5965331Samw 	int in_crit = 0;
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
5990Sstevel@tonic-gate 	    _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
6000Sstevel@tonic-gate 	    MAP_TEXT | MAP_INITDATA)) != 0) {
6010Sstevel@tonic-gate 		/* | MAP_RENAME */	/* not implemented, let user know */
6020Sstevel@tonic-gate 		return (EINVAL);
6030Sstevel@tonic-gate 	}
6040Sstevel@tonic-gate 
6050Sstevel@tonic-gate 	if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
6060Sstevel@tonic-gate 		return (EINVAL);
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate 
6090Sstevel@tonic-gate 	if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
6100Sstevel@tonic-gate 		return (EINVAL);
6110Sstevel@tonic-gate 	}
6120Sstevel@tonic-gate 
6130Sstevel@tonic-gate #if defined(__sparc)
6140Sstevel@tonic-gate 	/*
6150Sstevel@tonic-gate 	 * See if this is an "old mmap call".  If so, remember this
6160Sstevel@tonic-gate 	 * fact and convert the flags value given to mmap to indicate
6170Sstevel@tonic-gate 	 * the specified address in the system call must be used.
6180Sstevel@tonic-gate 	 * _MAP_NEW is turned set by all new uses of mmap.
6190Sstevel@tonic-gate 	 */
6200Sstevel@tonic-gate 	if ((flags & _MAP_NEW) == 0)
6210Sstevel@tonic-gate 		flags |= MAP_FIXED;
6220Sstevel@tonic-gate #endif
6230Sstevel@tonic-gate 	flags &= ~_MAP_NEW;
6240Sstevel@tonic-gate 
6250Sstevel@tonic-gate 	type = flags & MAP_TYPE;
6260Sstevel@tonic-gate 	if (type != MAP_PRIVATE && type != MAP_SHARED)
6270Sstevel@tonic-gate 		return (EINVAL);
6280Sstevel@tonic-gate 
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 	if (flags & MAP_ALIGN) {
6310Sstevel@tonic-gate 
6320Sstevel@tonic-gate 		if (flags & MAP_FIXED)
6330Sstevel@tonic-gate 			return (EINVAL);
6340Sstevel@tonic-gate 
6350Sstevel@tonic-gate 		/* alignment needs to be a power of 2 >= page size */
6360Sstevel@tonic-gate 		if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
6376036Smec 		    !ISP2((uintptr_t)*addrp))
6380Sstevel@tonic-gate 			return (EINVAL);
6390Sstevel@tonic-gate 	}
6400Sstevel@tonic-gate 	/*
6410Sstevel@tonic-gate 	 * Check for bad lengths and file position.
6420Sstevel@tonic-gate 	 * We let the VOP_MAP routine check for negative lengths
6430Sstevel@tonic-gate 	 * since on some vnode types this might be appropriate.
6440Sstevel@tonic-gate 	 */
6450Sstevel@tonic-gate 	if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
6460Sstevel@tonic-gate 		return (EINVAL);
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	maxprot = PROT_ALL;		/* start out allowing all accesses */
6490Sstevel@tonic-gate 	uprot = prot | PROT_USER;
6500Sstevel@tonic-gate 
6510Sstevel@tonic-gate 	if (fp == NULL) {
6520Sstevel@tonic-gate 		ASSERT(flags & MAP_ANON);
6539200SRoger.Faulkner@Sun.COM 		/* discard lwpchan mappings, like munmap() */
6549200SRoger.Faulkner@Sun.COM 		if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
6559200SRoger.Faulkner@Sun.COM 			lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
6560Sstevel@tonic-gate 		as_rangelock(as);
6570Sstevel@tonic-gate 		error = zmap(as, addrp, len, uprot, flags, pos);
6580Sstevel@tonic-gate 		as_rangeunlock(as);
659*9351SPrashanth.Sreenivasa@Sun.COM 		/*
660*9351SPrashanth.Sreenivasa@Sun.COM 		 * Tell machine specific code that lwp has mapped shared memory
661*9351SPrashanth.Sreenivasa@Sun.COM 		 */
662*9351SPrashanth.Sreenivasa@Sun.COM 		if (error == 0 && (flags & MAP_SHARED)) {
663*9351SPrashanth.Sreenivasa@Sun.COM 			/* EMPTY */
664*9351SPrashanth.Sreenivasa@Sun.COM 			LWP_MMODEL_SHARED_AS(*addrp, len);
665*9351SPrashanth.Sreenivasa@Sun.COM 		}
6660Sstevel@tonic-gate 		return (error);
6670Sstevel@tonic-gate 	} else if ((flags & MAP_ANON) != 0)
6680Sstevel@tonic-gate 		return (EINVAL);
6690Sstevel@tonic-gate 
6700Sstevel@tonic-gate 	vp = fp->f_vnode;
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	/* Can't execute code from "noexec" mounted filesystem. */
6730Sstevel@tonic-gate 	if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
6740Sstevel@tonic-gate 		maxprot &= ~PROT_EXEC;
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	/*
6770Sstevel@tonic-gate 	 * These checks were added as part of large files.
6780Sstevel@tonic-gate 	 *
679146Speterte 	 * Return ENXIO if the initial position is negative; return EOVERFLOW
6800Sstevel@tonic-gate 	 * if (offset + len) would overflow the maximum allowed offset for the
6810Sstevel@tonic-gate 	 * type of file descriptor being used.
6820Sstevel@tonic-gate 	 */
6830Sstevel@tonic-gate 	if (vp->v_type == VREG) {
684146Speterte 		if (pos < 0)
685146Speterte 			return (ENXIO);
6860Sstevel@tonic-gate 		if ((offset_t)len > (OFFSET_MAX(fp) - pos))
6870Sstevel@tonic-gate 			return (EOVERFLOW);
6880Sstevel@tonic-gate 	}
6890Sstevel@tonic-gate 
6900Sstevel@tonic-gate 	if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
6910Sstevel@tonic-gate 		/* no write access allowed */
6920Sstevel@tonic-gate 		maxprot &= ~PROT_WRITE;
6930Sstevel@tonic-gate 	}
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate 	/*
6960Sstevel@tonic-gate 	 * XXX - Do we also adjust maxprot based on protections
6970Sstevel@tonic-gate 	 * of the vnode?  E.g. if no execute permission is given
6980Sstevel@tonic-gate 	 * on the vnode for the current user, maxprot probably
6990Sstevel@tonic-gate 	 * should disallow PROT_EXEC also?  This is different
7000Sstevel@tonic-gate 	 * from the write access as this would be a per vnode
7010Sstevel@tonic-gate 	 * test as opposed to a per fd test for writability.
7020Sstevel@tonic-gate 	 */
7030Sstevel@tonic-gate 
7040Sstevel@tonic-gate 	/*
7050Sstevel@tonic-gate 	 * Verify that the specified protections are not greater than
7060Sstevel@tonic-gate 	 * the maximum allowable protections.  Also test to make sure
7070Sstevel@tonic-gate 	 * that the file descriptor does allows for read access since
7080Sstevel@tonic-gate 	 * "write only" mappings are hard to do since normally we do
7090Sstevel@tonic-gate 	 * the read from the file before the page can be written.
7100Sstevel@tonic-gate 	 */
7110Sstevel@tonic-gate 	if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
7120Sstevel@tonic-gate 		return (EACCES);
7130Sstevel@tonic-gate 
7140Sstevel@tonic-gate 	/*
7150Sstevel@tonic-gate 	 * If the user specified an address, do some simple checks here
7160Sstevel@tonic-gate 	 */
7170Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
7180Sstevel@tonic-gate 		caddr_t userlimit;
7190Sstevel@tonic-gate 
7200Sstevel@tonic-gate 		/*
7210Sstevel@tonic-gate 		 * Use the user address.  First verify that
7220Sstevel@tonic-gate 		 * the address to be used is page aligned.
7230Sstevel@tonic-gate 		 * Then make some simple bounds checks.
7240Sstevel@tonic-gate 		 */
7250Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
7260Sstevel@tonic-gate 			return (EINVAL);
7270Sstevel@tonic-gate 
7280Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
7290Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
7300Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
7310Sstevel@tonic-gate 		case RANGE_OKAY:
7320Sstevel@tonic-gate 			break;
7330Sstevel@tonic-gate 		case RANGE_BADPROT:
7340Sstevel@tonic-gate 			return (ENOTSUP);
7350Sstevel@tonic-gate 		case RANGE_BADADDR:
7360Sstevel@tonic-gate 		default:
7370Sstevel@tonic-gate 			return (ENOMEM);
7380Sstevel@tonic-gate 		}
7390Sstevel@tonic-gate 	}
7400Sstevel@tonic-gate 
7415331Samw 	if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
7425331Samw 	    nbl_need_check(vp)) {
7435331Samw 		int svmand;
7445331Samw 		nbl_op_t nop;
7455331Samw 
7465331Samw 		nbl_start_crit(vp, RW_READER);
7475331Samw 		in_crit = 1;
7485331Samw 		error = nbl_svmand(vp, fp->f_cred, &svmand);
7495331Samw 		if (error != 0)
7505331Samw 			goto done;
7515331Samw 		if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
7525331Samw 			if (prot & (PROT_READ | PROT_EXEC)) {
7535331Samw 				nop = NBL_READWRITE;
7545331Samw 			} else {
7555331Samw 				nop = NBL_WRITE;
7565331Samw 			}
7575331Samw 		} else {
7585331Samw 			nop = NBL_READ;
7595331Samw 		}
7605331Samw 		if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
7615331Samw 			error = EACCES;
7625331Samw 			goto done;
7635331Samw 		}
7645331Samw 	}
7650Sstevel@tonic-gate 
7669200SRoger.Faulkner@Sun.COM 	/* discard lwpchan mappings, like munmap() */
7679200SRoger.Faulkner@Sun.COM 	if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
7689200SRoger.Faulkner@Sun.COM 		lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
7699200SRoger.Faulkner@Sun.COM 
7700Sstevel@tonic-gate 	/*
7710Sstevel@tonic-gate 	 * Ok, now let the vnode map routine do its thing to set things up.
7720Sstevel@tonic-gate 	 */
7730Sstevel@tonic-gate 	error = VOP_MAP(vp, pos, as,
7745331Samw 	    addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	if (error == 0) {
777*9351SPrashanth.Sreenivasa@Sun.COM 		/*
778*9351SPrashanth.Sreenivasa@Sun.COM 		 * Tell machine specific code that lwp has mapped shared memory
779*9351SPrashanth.Sreenivasa@Sun.COM 		 */
780*9351SPrashanth.Sreenivasa@Sun.COM 		if (flags & MAP_SHARED) {
781*9351SPrashanth.Sreenivasa@Sun.COM 			/* EMPTY */
782*9351SPrashanth.Sreenivasa@Sun.COM 			LWP_MMODEL_SHARED_AS(*addrp, len);
783*9351SPrashanth.Sreenivasa@Sun.COM 		}
7840Sstevel@tonic-gate 		if (vp->v_type == VREG &&
7850Sstevel@tonic-gate 		    (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
7860Sstevel@tonic-gate 			/*
7870Sstevel@tonic-gate 			 * Mark this as an executable vnode
7880Sstevel@tonic-gate 			 */
7890Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
7900Sstevel@tonic-gate 			vp->v_flag |= VVMEXEC;
7910Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
7920Sstevel@tonic-gate 		}
7930Sstevel@tonic-gate 	}
7940Sstevel@tonic-gate 
7955331Samw done:
7965331Samw 	if (in_crit)
7975331Samw 		nbl_end_crit(vp);
7980Sstevel@tonic-gate 	return (error);
7990Sstevel@tonic-gate }
8000Sstevel@tonic-gate 
8010Sstevel@tonic-gate #ifdef _LP64
8020Sstevel@tonic-gate /*
8030Sstevel@tonic-gate  * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
8040Sstevel@tonic-gate  *
8050Sstevel@tonic-gate  * The "large file" mmap routine mmap64(2) is also mapped to this routine
8060Sstevel@tonic-gate  * by the 64-bit version of libc.
8070Sstevel@tonic-gate  *
8080Sstevel@tonic-gate  * Eventually, this should be the only version, and have smmap_common()
8090Sstevel@tonic-gate  * folded back into it again.  Some day.
8100Sstevel@tonic-gate  */
8110Sstevel@tonic-gate caddr_t
smmap64(caddr_t addr,size_t len,int prot,int flags,int fd,off_t pos)8120Sstevel@tonic-gate smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
8130Sstevel@tonic-gate {
8140Sstevel@tonic-gate 	struct file *fp;
8150Sstevel@tonic-gate 	int error;
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
8180Sstevel@tonic-gate 		error = EINVAL;
8190Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
8200Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
8210Sstevel@tonic-gate 		    NULL, (offset_t)pos);
8220Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
8230Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
8240Sstevel@tonic-gate 		    fp, (offset_t)pos);
8250Sstevel@tonic-gate 		releasef(fd);
8260Sstevel@tonic-gate 	} else
8270Sstevel@tonic-gate 		error = EBADF;
8280Sstevel@tonic-gate 
8290Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
8300Sstevel@tonic-gate }
8310Sstevel@tonic-gate #endif	/* _LP64 */
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
8340Sstevel@tonic-gate 
8350Sstevel@tonic-gate /*
8360Sstevel@tonic-gate  * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
8370Sstevel@tonic-gate  */
8380Sstevel@tonic-gate caddr_t
smmap32(caddr32_t addr,size32_t len,int prot,int flags,int fd,off32_t pos)8390Sstevel@tonic-gate smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
8400Sstevel@tonic-gate {
8410Sstevel@tonic-gate 	struct file *fp;
8420Sstevel@tonic-gate 	int error;
8430Sstevel@tonic-gate 	caddr_t a = (caddr_t)(uintptr_t)addr;
8440Sstevel@tonic-gate 
8450Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
8460Sstevel@tonic-gate 		error = EINVAL;
8470Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
8480Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
8490Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, (offset_t)pos);
8500Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
8510Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
8520Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, (offset_t)pos);
8530Sstevel@tonic-gate 		releasef(fd);
8540Sstevel@tonic-gate 	} else
8550Sstevel@tonic-gate 		error = EBADF;
8560Sstevel@tonic-gate 
8570Sstevel@tonic-gate 	ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
8580Sstevel@tonic-gate 
8590Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate /*
8630Sstevel@tonic-gate  * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
8640Sstevel@tonic-gate  *
8650Sstevel@tonic-gate  * Now things really get ugly because we can't use the C-style
8660Sstevel@tonic-gate  * calling convention for more than 6 args, and 64-bit parameter
8670Sstevel@tonic-gate  * passing on 32-bit systems is less than clean.
8680Sstevel@tonic-gate  */
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate struct mmaplf32a {
8710Sstevel@tonic-gate 	caddr_t addr;
8720Sstevel@tonic-gate 	size_t len;
8730Sstevel@tonic-gate #ifdef _LP64
8740Sstevel@tonic-gate 	/*
8750Sstevel@tonic-gate 	 * 32-bit contents, 64-bit cells
8760Sstevel@tonic-gate 	 */
8770Sstevel@tonic-gate 	uint64_t prot;
8780Sstevel@tonic-gate 	uint64_t flags;
8790Sstevel@tonic-gate 	uint64_t fd;
8800Sstevel@tonic-gate 	uint64_t offhi;
8810Sstevel@tonic-gate 	uint64_t offlo;
8820Sstevel@tonic-gate #else
8830Sstevel@tonic-gate 	/*
8840Sstevel@tonic-gate 	 * 32-bit contents, 32-bit cells
8850Sstevel@tonic-gate 	 */
8860Sstevel@tonic-gate 	uint32_t prot;
8870Sstevel@tonic-gate 	uint32_t flags;
8880Sstevel@tonic-gate 	uint32_t fd;
8890Sstevel@tonic-gate 	uint32_t offhi;
8900Sstevel@tonic-gate 	uint32_t offlo;
8910Sstevel@tonic-gate #endif
8920Sstevel@tonic-gate };
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate int
smmaplf32(struct mmaplf32a * uap,rval_t * rvp)8950Sstevel@tonic-gate smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
8960Sstevel@tonic-gate {
8970Sstevel@tonic-gate 	struct file *fp;
8980Sstevel@tonic-gate 	int error;
8990Sstevel@tonic-gate 	caddr_t a = uap->addr;
9000Sstevel@tonic-gate 	int flags = (int)uap->flags;
9010Sstevel@tonic-gate 	int fd = (int)uap->fd;
9020Sstevel@tonic-gate #ifdef _BIG_ENDIAN
9030Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
9040Sstevel@tonic-gate #else
9050Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
9060Sstevel@tonic-gate #endif
9070Sstevel@tonic-gate 
9080Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
9090Sstevel@tonic-gate 		error = EINVAL;
9100Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
9110Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
9120Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, off);
9130Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
9140Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
9150Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, off);
9160Sstevel@tonic-gate 		releasef(fd);
9170Sstevel@tonic-gate 	} else
9180Sstevel@tonic-gate 		error = EBADF;
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	if (error == 0)
9210Sstevel@tonic-gate 		rvp->r_val1 = (uintptr_t)a;
9220Sstevel@tonic-gate 	return (error);
9230Sstevel@tonic-gate }
9240Sstevel@tonic-gate 
9250Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL || _ILP32 */
9260Sstevel@tonic-gate 
9270Sstevel@tonic-gate int
munmap(caddr_t addr,size_t len)9280Sstevel@tonic-gate munmap(caddr_t addr, size_t len)
9290Sstevel@tonic-gate {
9300Sstevel@tonic-gate 	struct proc *p = curproc;
9310Sstevel@tonic-gate 	struct as *as = p->p_as;
9320Sstevel@tonic-gate 
9330Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
9340Sstevel@tonic-gate 		return (set_errno(EINVAL));
9350Sstevel@tonic-gate 
9360Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
9370Sstevel@tonic-gate 		return (set_errno(EINVAL));
9380Sstevel@tonic-gate 
9390Sstevel@tonic-gate 	/*
9400Sstevel@tonic-gate 	 * Discard lwpchan mappings.
9410Sstevel@tonic-gate 	 */
9420Sstevel@tonic-gate 	if (p->p_lcp != NULL)
9430Sstevel@tonic-gate 		lwpchan_delete_mapping(p, addr, addr + len);
9440Sstevel@tonic-gate 	if (as_unmap(as, addr, len) != 0)
9450Sstevel@tonic-gate 		return (set_errno(EINVAL));
9460Sstevel@tonic-gate 
9470Sstevel@tonic-gate 	return (0);
9480Sstevel@tonic-gate }
9490Sstevel@tonic-gate 
9500Sstevel@tonic-gate int
mprotect(caddr_t addr,size_t len,int prot)9510Sstevel@tonic-gate mprotect(caddr_t addr, size_t len, int prot)
9520Sstevel@tonic-gate {
9530Sstevel@tonic-gate 	struct as *as = curproc->p_as;
9540Sstevel@tonic-gate 	uint_t uprot = prot | PROT_USER;
9550Sstevel@tonic-gate 	int error;
9560Sstevel@tonic-gate 
9570Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
9580Sstevel@tonic-gate 		return (set_errno(EINVAL));
9590Sstevel@tonic-gate 
9600Sstevel@tonic-gate 	switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
9610Sstevel@tonic-gate 	case RANGE_OKAY:
9620Sstevel@tonic-gate 		break;
9630Sstevel@tonic-gate 	case RANGE_BADPROT:
9640Sstevel@tonic-gate 		return (set_errno(ENOTSUP));
9650Sstevel@tonic-gate 	case RANGE_BADADDR:
9660Sstevel@tonic-gate 	default:
9670Sstevel@tonic-gate 		return (set_errno(ENOMEM));
9680Sstevel@tonic-gate 	}
9690Sstevel@tonic-gate 
9700Sstevel@tonic-gate 	error = as_setprot(as, addr, len, uprot);
9710Sstevel@tonic-gate 	if (error)
9720Sstevel@tonic-gate 		return (set_errno(error));
9730Sstevel@tonic-gate 	return (0);
9740Sstevel@tonic-gate }
9750Sstevel@tonic-gate 
9760Sstevel@tonic-gate #define	MC_CACHE	128			/* internal result buffer */
9770Sstevel@tonic-gate #define	MC_QUANTUM	(MC_CACHE * PAGESIZE)	/* addresses covered in loop */
9780Sstevel@tonic-gate 
9790Sstevel@tonic-gate int
mincore(caddr_t addr,size_t len,char * vecp)9800Sstevel@tonic-gate mincore(caddr_t addr, size_t len, char *vecp)
9810Sstevel@tonic-gate {
9820Sstevel@tonic-gate 	struct as *as = curproc->p_as;
9830Sstevel@tonic-gate 	caddr_t ea;			/* end address of loop */
9840Sstevel@tonic-gate 	size_t rl;			/* inner result length */
9850Sstevel@tonic-gate 	char vec[MC_CACHE];		/* local vector cache */
9860Sstevel@tonic-gate 	int error;
9870Sstevel@tonic-gate 	model_t model;
9880Sstevel@tonic-gate 	long	llen;
9890Sstevel@tonic-gate 
9900Sstevel@tonic-gate 	model = get_udatamodel();
9910Sstevel@tonic-gate 	/*
9920Sstevel@tonic-gate 	 * Validate form of address parameters.
9930Sstevel@tonic-gate 	 */
9940Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
9950Sstevel@tonic-gate 		llen = (long)len;
9960Sstevel@tonic-gate 	} else {
9970Sstevel@tonic-gate 		llen = (int32_t)(size32_t)len;
9980Sstevel@tonic-gate 	}
9990Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
10000Sstevel@tonic-gate 		return (set_errno(EINVAL));
10010Sstevel@tonic-gate 
10020Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
10030Sstevel@tonic-gate 		return (set_errno(ENOMEM));
10040Sstevel@tonic-gate 
10050Sstevel@tonic-gate 	/*
10060Sstevel@tonic-gate 	 * Loop over subranges of interval [addr : addr + len), recovering
10070Sstevel@tonic-gate 	 * results internally and then copying them out to caller.  Subrange
10080Sstevel@tonic-gate 	 * is based on the size of MC_CACHE, defined above.
10090Sstevel@tonic-gate 	 */
10100Sstevel@tonic-gate 	for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
10110Sstevel@tonic-gate 		error = as_incore(as, addr,
10120Sstevel@tonic-gate 		    (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
10130Sstevel@tonic-gate 		if (rl != 0) {
10140Sstevel@tonic-gate 			rl = (rl + PAGESIZE - 1) / PAGESIZE;
10150Sstevel@tonic-gate 			if (copyout(vec, vecp, rl) != 0)
10160Sstevel@tonic-gate 				return (set_errno(EFAULT));
10170Sstevel@tonic-gate 			vecp += rl;
10180Sstevel@tonic-gate 		}
10190Sstevel@tonic-gate 		if (error != 0)
10200Sstevel@tonic-gate 			return (set_errno(ENOMEM));
10210Sstevel@tonic-gate 	}
10220Sstevel@tonic-gate 	return (0);
10230Sstevel@tonic-gate }
1024