xref: /onnv-gate/usr/src/uts/common/os/grow.c (revision 5331:3047ad28a67b)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
52414Saguzovsk  * Common Development and Distribution License (the "License").
62414Saguzovsk  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
210Sstevel@tonic-gate /*
223446Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
230Sstevel@tonic-gate  * Use is subject to license terms.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
270Sstevel@tonic-gate /*	  All Rights Reserved  	*/
280Sstevel@tonic-gate 
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
310Sstevel@tonic-gate 
320Sstevel@tonic-gate #include <sys/types.h>
330Sstevel@tonic-gate #include <sys/inttypes.h>
340Sstevel@tonic-gate #include <sys/param.h>
350Sstevel@tonic-gate #include <sys/sysmacros.h>
360Sstevel@tonic-gate #include <sys/systm.h>
370Sstevel@tonic-gate #include <sys/signal.h>
380Sstevel@tonic-gate #include <sys/user.h>
390Sstevel@tonic-gate #include <sys/errno.h>
400Sstevel@tonic-gate #include <sys/var.h>
410Sstevel@tonic-gate #include <sys/proc.h>
420Sstevel@tonic-gate #include <sys/tuneable.h>
430Sstevel@tonic-gate #include <sys/debug.h>
440Sstevel@tonic-gate #include <sys/cmn_err.h>
450Sstevel@tonic-gate #include <sys/cred.h>
460Sstevel@tonic-gate #include <sys/vnode.h>
470Sstevel@tonic-gate #include <sys/vfs.h>
480Sstevel@tonic-gate #include <sys/vm.h>
490Sstevel@tonic-gate #include <sys/file.h>
500Sstevel@tonic-gate #include <sys/mman.h>
510Sstevel@tonic-gate #include <sys/vmparam.h>
520Sstevel@tonic-gate #include <sys/fcntl.h>
530Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
54*5331Samw #include <sys/nbmlock.h>
550Sstevel@tonic-gate 
560Sstevel@tonic-gate #include <vm/hat.h>
570Sstevel@tonic-gate #include <vm/as.h>
580Sstevel@tonic-gate #include <vm/seg.h>
590Sstevel@tonic-gate #include <vm/seg_dev.h>
600Sstevel@tonic-gate #include <vm/seg_vn.h>
610Sstevel@tonic-gate 
620Sstevel@tonic-gate int use_brk_lpg = 1;
630Sstevel@tonic-gate int use_stk_lpg = 1;
640Sstevel@tonic-gate 
650Sstevel@tonic-gate static int brk_lpg(caddr_t nva);
660Sstevel@tonic-gate static int grow_lpg(caddr_t sp);
670Sstevel@tonic-gate 
680Sstevel@tonic-gate int
690Sstevel@tonic-gate brk(caddr_t nva)
700Sstevel@tonic-gate {
710Sstevel@tonic-gate 	int error;
720Sstevel@tonic-gate 	proc_t *p = curproc;
730Sstevel@tonic-gate 
740Sstevel@tonic-gate 	/*
750Sstevel@tonic-gate 	 * Serialize brk operations on an address space.
760Sstevel@tonic-gate 	 * This also serves as the lock protecting p_brksize
770Sstevel@tonic-gate 	 * and p_brkpageszc.
780Sstevel@tonic-gate 	 */
790Sstevel@tonic-gate 	as_rangelock(p->p_as);
800Sstevel@tonic-gate 	if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
810Sstevel@tonic-gate 		error = brk_lpg(nva);
820Sstevel@tonic-gate 	} else {
830Sstevel@tonic-gate 		error = brk_internal(nva, p->p_brkpageszc);
840Sstevel@tonic-gate 	}
850Sstevel@tonic-gate 	as_rangeunlock(p->p_as);
860Sstevel@tonic-gate 	return ((error != 0 ? set_errno(error) : 0));
870Sstevel@tonic-gate }
880Sstevel@tonic-gate 
890Sstevel@tonic-gate /*
900Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
910Sstevel@tonic-gate  * then call brk_internal().
920Sstevel@tonic-gate  * Returns 0 on success.
930Sstevel@tonic-gate  */
940Sstevel@tonic-gate static int
950Sstevel@tonic-gate brk_lpg(caddr_t nva)
960Sstevel@tonic-gate {
970Sstevel@tonic-gate 	struct proc *p = curproc;
980Sstevel@tonic-gate 	size_t pgsz, len;
992991Ssusans 	caddr_t addr, brkend;
1000Sstevel@tonic-gate 	caddr_t bssbase = p->p_bssbase;
1010Sstevel@tonic-gate 	caddr_t brkbase = p->p_brkbase;
1020Sstevel@tonic-gate 	int oszc, szc;
1030Sstevel@tonic-gate 	int err;
1040Sstevel@tonic-gate 
1050Sstevel@tonic-gate 	oszc = p->p_brkpageszc;
1060Sstevel@tonic-gate 
1070Sstevel@tonic-gate 	/*
1080Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
1090Sstevel@tonic-gate 	 * to brk_internal() will initialize it.
1100Sstevel@tonic-gate 	 */
1110Sstevel@tonic-gate 	if (brkbase == 0) {
1120Sstevel@tonic-gate 		return (brk_internal(nva, oszc));
1130Sstevel@tonic-gate 	}
1140Sstevel@tonic-gate 
1150Sstevel@tonic-gate 	len = nva - bssbase;
1160Sstevel@tonic-gate 
1172991Ssusans 	pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
1180Sstevel@tonic-gate 	szc = page_szc(pgsz);
1190Sstevel@tonic-gate 
1200Sstevel@tonic-gate 	/*
1210Sstevel@tonic-gate 	 * Covers two cases:
1220Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
1230Sstevel@tonic-gate 	 * ignore it in that case.
1240Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
1250Sstevel@tonic-gate 	 */
1260Sstevel@tonic-gate 	if (szc <= oszc) {
1270Sstevel@tonic-gate 		err = brk_internal(nva, oszc);
1280Sstevel@tonic-gate 		/* If failed, back off to base page size. */
1290Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
1300Sstevel@tonic-gate 			err = brk_internal(nva, 0);
1310Sstevel@tonic-gate 		}
1320Sstevel@tonic-gate 		return (err);
1330Sstevel@tonic-gate 	}
1340Sstevel@tonic-gate 
1350Sstevel@tonic-gate 	err = brk_internal(nva, szc);
1360Sstevel@tonic-gate 	/* If using szc failed, map with base page size and return. */
1370Sstevel@tonic-gate 	if (err != 0) {
1380Sstevel@tonic-gate 		if (szc != 0) {
1390Sstevel@tonic-gate 			err = brk_internal(nva, 0);
1400Sstevel@tonic-gate 		}
1410Sstevel@tonic-gate 		return (err);
1420Sstevel@tonic-gate 	}
1430Sstevel@tonic-gate 
1442991Ssusans 	/*
1452991Ssusans 	 * Round up brk base to a large page boundary and remap
1462991Ssusans 	 * anything in the segment already faulted in beyond that
1472991Ssusans 	 * point.
1482991Ssusans 	 */
1492991Ssusans 	addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
1502991Ssusans 	brkend = brkbase + p->p_brksize;
1512991Ssusans 	len = brkend - addr;
1522991Ssusans 	/* Check that len is not negative. Update page size code for heap. */
1532991Ssusans 	if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
1540Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
1552991Ssusans 		p->p_brkpageszc = szc;
1560Sstevel@tonic-gate 	}
1570Sstevel@tonic-gate 
1580Sstevel@tonic-gate 	ASSERT(err == 0);
1590Sstevel@tonic-gate 	return (err);		/* should always be 0 */
1600Sstevel@tonic-gate }
1610Sstevel@tonic-gate 
1620Sstevel@tonic-gate /*
1630Sstevel@tonic-gate  * Returns 0 on success.
1640Sstevel@tonic-gate  */
1650Sstevel@tonic-gate int
1660Sstevel@tonic-gate brk_internal(caddr_t nva, uint_t brkszc)
1670Sstevel@tonic-gate {
1680Sstevel@tonic-gate 	caddr_t ova;			/* current break address */
1690Sstevel@tonic-gate 	size_t size;
1700Sstevel@tonic-gate 	int	error;
1710Sstevel@tonic-gate 	struct proc *p = curproc;
1720Sstevel@tonic-gate 	struct as *as = p->p_as;
1730Sstevel@tonic-gate 	size_t pgsz;
1740Sstevel@tonic-gate 	uint_t szc;
1750Sstevel@tonic-gate 	rctl_qty_t as_rctl;
1760Sstevel@tonic-gate 
1770Sstevel@tonic-gate 	/*
1780Sstevel@tonic-gate 	 * extend heap to brkszc alignment but use current p->p_brkpageszc
1790Sstevel@tonic-gate 	 * for the newly created segment. This allows the new extension
1800Sstevel@tonic-gate 	 * segment to be concatenated successfully with the existing brk
1810Sstevel@tonic-gate 	 * segment.
1820Sstevel@tonic-gate 	 */
1830Sstevel@tonic-gate 	if ((szc = brkszc) != 0) {
1840Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
1850Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
1860Sstevel@tonic-gate 	} else {
1870Sstevel@tonic-gate 		pgsz = PAGESIZE;
1880Sstevel@tonic-gate 	}
1890Sstevel@tonic-gate 
1900Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
1910Sstevel@tonic-gate 	as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
1920Sstevel@tonic-gate 	    p->p_rctls, p);
1930Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
1940Sstevel@tonic-gate 
1950Sstevel@tonic-gate 	/*
1960Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
1970Sstevel@tonic-gate 	 * to brk() will initialize it.
1980Sstevel@tonic-gate 	 */
1990Sstevel@tonic-gate 	if (p->p_brkbase == 0)
2000Sstevel@tonic-gate 		p->p_brkbase = nva;
2010Sstevel@tonic-gate 
2020Sstevel@tonic-gate 	/*
2030Sstevel@tonic-gate 	 * Before multiple page size support existed p_brksize was the value
2040Sstevel@tonic-gate 	 * not rounded to the pagesize (i.e. it stored the exact user request
2050Sstevel@tonic-gate 	 * for heap size). If pgsz is greater than PAGESIZE calculate the
2060Sstevel@tonic-gate 	 * heap size as the real new heap size by rounding it up to pgsz.
2070Sstevel@tonic-gate 	 * This is useful since we may want to know where the heap ends
2080Sstevel@tonic-gate 	 * without knowing heap pagesize (e.g. some old code) and also if
2090Sstevel@tonic-gate 	 * heap pagesize changes we can update p_brkpageszc but delay adding
2100Sstevel@tonic-gate 	 * new mapping yet still know from p_brksize where the heap really
2110Sstevel@tonic-gate 	 * ends. The user requested heap end is stored in libc variable.
2120Sstevel@tonic-gate 	 */
2130Sstevel@tonic-gate 	if (pgsz > PAGESIZE) {
2140Sstevel@tonic-gate 		caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
2150Sstevel@tonic-gate 		size = tnva - p->p_brkbase;
2160Sstevel@tonic-gate 		if (tnva < p->p_brkbase || (size > p->p_brksize &&
2170Sstevel@tonic-gate 		    size > (size_t)as_rctl)) {
2180Sstevel@tonic-gate 			szc = 0;
2190Sstevel@tonic-gate 			pgsz = PAGESIZE;
2200Sstevel@tonic-gate 			size = nva - p->p_brkbase;
2210Sstevel@tonic-gate 		}
2220Sstevel@tonic-gate 	} else {
2230Sstevel@tonic-gate 		size = nva - p->p_brkbase;
2240Sstevel@tonic-gate 	}
2250Sstevel@tonic-gate 
2260Sstevel@tonic-gate 	/*
2270Sstevel@tonic-gate 	 * use PAGESIZE to roundup ova because we want to know the real value
2280Sstevel@tonic-gate 	 * of the current heap end in case p_brkpageszc changes since the last
2290Sstevel@tonic-gate 	 * p_brksize was computed.
2300Sstevel@tonic-gate 	 */
2310Sstevel@tonic-gate 	nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
2320Sstevel@tonic-gate 	ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
2330Sstevel@tonic-gate 		PAGESIZE);
2340Sstevel@tonic-gate 
2350Sstevel@tonic-gate 	if ((nva < p->p_brkbase) || (size > p->p_brksize &&
2360Sstevel@tonic-gate 	    size > as_rctl)) {
2370Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
2380Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
2390Sstevel@tonic-gate 		    RCA_SAFE);
2400Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
2410Sstevel@tonic-gate 		return (ENOMEM);
2420Sstevel@tonic-gate 	}
2430Sstevel@tonic-gate 
2440Sstevel@tonic-gate 	if (nva > ova) {
2450Sstevel@tonic-gate 		struct segvn_crargs crargs =
2460Sstevel@tonic-gate 		    SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
2470Sstevel@tonic-gate 
2480Sstevel@tonic-gate 		if (!(p->p_datprot & PROT_EXEC)) {
2490Sstevel@tonic-gate 			crargs.prot &= ~PROT_EXEC;
2500Sstevel@tonic-gate 		}
2510Sstevel@tonic-gate 
2520Sstevel@tonic-gate 		/*
2530Sstevel@tonic-gate 		 * Add new zfod mapping to extend UNIX data segment
2542991Ssusans 		 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
2552991Ssusans 		 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
2562991Ssusans 		 * page sizes if ova is not aligned to szc's pgsz.
2570Sstevel@tonic-gate 		 */
2582991Ssusans 		if (szc > 0) {
2592991Ssusans 			caddr_t rbss;
2602991Ssusans 
2612991Ssusans 			rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
2622991Ssusans 			    pgsz);
2632991Ssusans 			if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
2642991Ssusans 				crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
2652991Ssusans 				    AS_MAP_NO_LPOOB;
2662991Ssusans 			} else if (ova == rbss) {
2672991Ssusans 				crargs.szc = szc;
2682991Ssusans 			} else {
2692991Ssusans 				crargs.szc = AS_MAP_HEAP;
2702991Ssusans 			}
2712991Ssusans 		} else {
2722991Ssusans 			crargs.szc = AS_MAP_NO_LPOOB;
2732991Ssusans 		}
2740Sstevel@tonic-gate 		crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
2750Sstevel@tonic-gate 		error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
2760Sstevel@tonic-gate 		    &crargs);
2770Sstevel@tonic-gate 		if (error) {
2780Sstevel@tonic-gate 			return (error);
2790Sstevel@tonic-gate 		}
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 	} else if (nva < ova) {
2820Sstevel@tonic-gate 		/*
2830Sstevel@tonic-gate 		 * Release mapping to shrink UNIX data segment.
2840Sstevel@tonic-gate 		 */
2850Sstevel@tonic-gate 		(void) as_unmap(as, nva, (size_t)(ova - nva));
2860Sstevel@tonic-gate 	}
2870Sstevel@tonic-gate 	p->p_brksize = size;
2880Sstevel@tonic-gate 	return (0);
2890Sstevel@tonic-gate }
2900Sstevel@tonic-gate 
2910Sstevel@tonic-gate /*
2920Sstevel@tonic-gate  * Grow the stack to include sp.  Return 1 if successful, 0 otherwise.
2930Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
2940Sstevel@tonic-gate  */
2950Sstevel@tonic-gate int
2960Sstevel@tonic-gate grow(caddr_t sp)
2970Sstevel@tonic-gate {
2980Sstevel@tonic-gate 	struct proc *p = curproc;
2992991Ssusans 	struct as *as = p->p_as;
3002991Ssusans 	size_t oldsize = p->p_stksize;
3012991Ssusans 	size_t newsize;
3020Sstevel@tonic-gate 	int err;
3030Sstevel@tonic-gate 
3040Sstevel@tonic-gate 	/*
3050Sstevel@tonic-gate 	 * Serialize grow operations on an address space.
3060Sstevel@tonic-gate 	 * This also serves as the lock protecting p_stksize
3070Sstevel@tonic-gate 	 * and p_stkpageszc.
3080Sstevel@tonic-gate 	 */
3092991Ssusans 	as_rangelock(as);
3100Sstevel@tonic-gate 	if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
3110Sstevel@tonic-gate 		err = grow_lpg(sp);
3120Sstevel@tonic-gate 	} else {
3130Sstevel@tonic-gate 		err = grow_internal(sp, p->p_stkpageszc);
3140Sstevel@tonic-gate 	}
3152991Ssusans 	as_rangeunlock(as);
3162991Ssusans 
3172991Ssusans 	if (err == 0 && (newsize = p->p_stksize) > oldsize) {
3182991Ssusans 		ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
3192991Ssusans 		ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
3202991Ssusans 		/*
3212991Ssusans 		 * Set up translations so the process doesn't have to fault in
3222991Ssusans 		 * the stack pages we just gave it.
3232991Ssusans 		 */
3242991Ssusans 		(void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
3252991Ssusans 		    newsize - oldsize, F_INVAL, S_WRITE);
3262991Ssusans 	}
3270Sstevel@tonic-gate 	return ((err == 0 ? 1 : 0));
3280Sstevel@tonic-gate }
3290Sstevel@tonic-gate 
3300Sstevel@tonic-gate /*
3310Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
3320Sstevel@tonic-gate  * then call grow_internal().
3330Sstevel@tonic-gate  * Returns 0 on success.
3340Sstevel@tonic-gate  */
3350Sstevel@tonic-gate static int
3360Sstevel@tonic-gate grow_lpg(caddr_t sp)
3370Sstevel@tonic-gate {
3380Sstevel@tonic-gate 	struct proc *p = curproc;
3390Sstevel@tonic-gate 	size_t pgsz;
3400Sstevel@tonic-gate 	size_t len, newsize;
3412991Ssusans 	caddr_t addr, saddr;
3422991Ssusans 	caddr_t growend;
3430Sstevel@tonic-gate 	int oszc, szc;
3440Sstevel@tonic-gate 	int err;
3450Sstevel@tonic-gate 
3460Sstevel@tonic-gate 	newsize = p->p_usrstack - sp;
3470Sstevel@tonic-gate 
3480Sstevel@tonic-gate 	oszc = p->p_stkpageszc;
3492991Ssusans 	pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
3500Sstevel@tonic-gate 	szc = page_szc(pgsz);
3510Sstevel@tonic-gate 
3520Sstevel@tonic-gate 	/*
3530Sstevel@tonic-gate 	 * Covers two cases:
3540Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
3550Sstevel@tonic-gate 	 * ignore it in that case.
3560Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
3570Sstevel@tonic-gate 	 * This shouldn't happen as the stack never shrinks.
3580Sstevel@tonic-gate 	 */
3590Sstevel@tonic-gate 	if (szc <= oszc) {
3600Sstevel@tonic-gate 		err = grow_internal(sp, oszc);
3610Sstevel@tonic-gate 		/* failed, fall back to base page size */
3620Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
3630Sstevel@tonic-gate 			err = grow_internal(sp, 0);
3640Sstevel@tonic-gate 		}
3650Sstevel@tonic-gate 		return (err);
3660Sstevel@tonic-gate 	}
3670Sstevel@tonic-gate 
3680Sstevel@tonic-gate 	/*
3690Sstevel@tonic-gate 	 * We've grown sufficiently to switch to a new page size.
3702991Ssusans 	 * So we are going to remap the whole segment with the new page size.
3710Sstevel@tonic-gate 	 */
3720Sstevel@tonic-gate 	err = grow_internal(sp, szc);
3730Sstevel@tonic-gate 	/* The grow with szc failed, so fall back to base page size. */
3740Sstevel@tonic-gate 	if (err != 0) {
3750Sstevel@tonic-gate 		if (szc != 0) {
3760Sstevel@tonic-gate 			err = grow_internal(sp, 0);
3770Sstevel@tonic-gate 		}
3780Sstevel@tonic-gate 		return (err);
3790Sstevel@tonic-gate 	}
3800Sstevel@tonic-gate 
3812991Ssusans 	/*
3822991Ssusans 	 * Round up stack pointer to a large page boundary and remap
3832991Ssusans 	 * any pgsz pages in the segment already faulted in beyond that
3842991Ssusans 	 * point.
3852991Ssusans 	 */
3862991Ssusans 	saddr = p->p_usrstack - p->p_stksize;
3872991Ssusans 	addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
3882991Ssusans 	growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
3892991Ssusans 	len = growend - addr;
3902991Ssusans 	/* Check that len is not negative. Update page size code for stack. */
3912991Ssusans 	if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
3920Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
3932991Ssusans 		p->p_stkpageszc = szc;
3940Sstevel@tonic-gate 	}
3950Sstevel@tonic-gate 
3960Sstevel@tonic-gate 	ASSERT(err == 0);
3970Sstevel@tonic-gate 	return (err);		/* should always be 0 */
3980Sstevel@tonic-gate }
3990Sstevel@tonic-gate 
4000Sstevel@tonic-gate /*
4010Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
4020Sstevel@tonic-gate  * Returns 0 on success, errno on failure.
4030Sstevel@tonic-gate  */
4040Sstevel@tonic-gate int
4050Sstevel@tonic-gate grow_internal(caddr_t sp, uint_t growszc)
4060Sstevel@tonic-gate {
4070Sstevel@tonic-gate 	struct proc *p = curproc;
4082991Ssusans 	size_t newsize;
4090Sstevel@tonic-gate 	size_t oldsize;
4100Sstevel@tonic-gate 	int    error;
4110Sstevel@tonic-gate 	size_t pgsz;
4120Sstevel@tonic-gate 	uint_t szc;
4130Sstevel@tonic-gate 	struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
4140Sstevel@tonic-gate 
4150Sstevel@tonic-gate 	ASSERT(sp < p->p_usrstack);
4162991Ssusans 	sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
4170Sstevel@tonic-gate 
4180Sstevel@tonic-gate 	/*
4190Sstevel@tonic-gate 	 * grow to growszc alignment but use current p->p_stkpageszc for
4200Sstevel@tonic-gate 	 * the segvn_crargs szc passed to segvn_create. For memcntl to
4210Sstevel@tonic-gate 	 * increase the szc, this allows the new extension segment to be
4220Sstevel@tonic-gate 	 * concatenated successfully with the existing stack segment.
4230Sstevel@tonic-gate 	 */
4240Sstevel@tonic-gate 	if ((szc = growszc) != 0) {
4250Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
4260Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
4272991Ssusans 		newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
4280Sstevel@tonic-gate 		if (newsize > (size_t)p->p_stk_ctl) {
4290Sstevel@tonic-gate 			szc = 0;
4300Sstevel@tonic-gate 			pgsz = PAGESIZE;
4310Sstevel@tonic-gate 			newsize = p->p_usrstack - sp;
4320Sstevel@tonic-gate 		}
4330Sstevel@tonic-gate 	} else {
4340Sstevel@tonic-gate 		pgsz = PAGESIZE;
4352991Ssusans 		newsize = p->p_usrstack - sp;
4360Sstevel@tonic-gate 	}
4370Sstevel@tonic-gate 
4380Sstevel@tonic-gate 	if (newsize > (size_t)p->p_stk_ctl) {
4390Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
4400Sstevel@tonic-gate 		    RCA_UNSAFE_ALL);
4410Sstevel@tonic-gate 
4420Sstevel@tonic-gate 		return (ENOMEM);
4430Sstevel@tonic-gate 	}
4440Sstevel@tonic-gate 
4450Sstevel@tonic-gate 	oldsize = p->p_stksize;
4460Sstevel@tonic-gate 	ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
4470Sstevel@tonic-gate 
4480Sstevel@tonic-gate 	if (newsize <= oldsize) {	/* prevent the stack from shrinking */
4490Sstevel@tonic-gate 		return (0);
4500Sstevel@tonic-gate 	}
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	if (!(p->p_stkprot & PROT_EXEC)) {
4530Sstevel@tonic-gate 		crargs.prot &= ~PROT_EXEC;
4540Sstevel@tonic-gate 	}
4550Sstevel@tonic-gate 	/*
4562991Ssusans 	 * extend stack with the proposed new growszc, which is different
4572991Ssusans 	 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
4582991Ssusans 	 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
4592991Ssusans 	 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
4602991Ssusans 	 * if not aligned to szc's pgsz.
4610Sstevel@tonic-gate 	 */
4622991Ssusans 	if (szc > 0) {
4632991Ssusans 		caddr_t oldsp = p->p_usrstack - oldsize;
4642991Ssusans 		caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
4652991Ssusans 		    pgsz);
4662991Ssusans 
4672991Ssusans 		if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
4682991Ssusans 			crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
4692991Ssusans 			    AS_MAP_NO_LPOOB;
4702991Ssusans 		} else if (oldsp == austk) {
4712991Ssusans 			crargs.szc = szc;
4722991Ssusans 		} else {
4732991Ssusans 			crargs.szc = AS_MAP_STACK;
4742991Ssusans 		}
4752991Ssusans 	} else {
4762991Ssusans 		crargs.szc = AS_MAP_NO_LPOOB;
4772991Ssusans 	}
4780Sstevel@tonic-gate 	crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
4790Sstevel@tonic-gate 
4802991Ssusans 	if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
4810Sstevel@tonic-gate 	    segvn_create, &crargs)) != 0) {
4820Sstevel@tonic-gate 		if (error == EAGAIN) {
4830Sstevel@tonic-gate 			cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
4843446Smrj 			    "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
4850Sstevel@tonic-gate 		}
4860Sstevel@tonic-gate 		return (error);
4870Sstevel@tonic-gate 	}
4880Sstevel@tonic-gate 	p->p_stksize = newsize;
4890Sstevel@tonic-gate 	return (0);
4900Sstevel@tonic-gate }
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate /*
4930Sstevel@tonic-gate  * Used for MAP_ANON - fast way to get anonymous pages
4940Sstevel@tonic-gate  */
4950Sstevel@tonic-gate static int
4960Sstevel@tonic-gate zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
4970Sstevel@tonic-gate     offset_t pos)
4980Sstevel@tonic-gate {
4992991Ssusans 	struct segvn_crargs vn_a;
5000Sstevel@tonic-gate 
5010Sstevel@tonic-gate 	if (((PROT_ALL & uprot) != uprot))
5020Sstevel@tonic-gate 		return (EACCES);
5030Sstevel@tonic-gate 
5040Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
5050Sstevel@tonic-gate 		caddr_t userlimit;
5060Sstevel@tonic-gate 
5070Sstevel@tonic-gate 		/*
5080Sstevel@tonic-gate 		 * Use the user address.  First verify that
5090Sstevel@tonic-gate 		 * the address to be used is page aligned.
5100Sstevel@tonic-gate 		 * Then make some simple bounds checks.
5110Sstevel@tonic-gate 		 */
5120Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
5130Sstevel@tonic-gate 			return (EINVAL);
5140Sstevel@tonic-gate 
5150Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
5160Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
5170Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
5180Sstevel@tonic-gate 		case RANGE_OKAY:
5190Sstevel@tonic-gate 			break;
5200Sstevel@tonic-gate 		case RANGE_BADPROT:
5210Sstevel@tonic-gate 			return (ENOTSUP);
5220Sstevel@tonic-gate 		case RANGE_BADADDR:
5230Sstevel@tonic-gate 		default:
5240Sstevel@tonic-gate 			return (ENOMEM);
5250Sstevel@tonic-gate 		}
5260Sstevel@tonic-gate 		(void) as_unmap(as, *addrp, len);
5270Sstevel@tonic-gate 	} else {
5280Sstevel@tonic-gate 		/*
5290Sstevel@tonic-gate 		 * No need to worry about vac alignment for anonymous
5300Sstevel@tonic-gate 		 * pages since this is a "clone" object that doesn't
5310Sstevel@tonic-gate 		 * yet exist.
5320Sstevel@tonic-gate 		 */
5330Sstevel@tonic-gate 		map_addr(addrp, len, pos, 0, flags);
5340Sstevel@tonic-gate 		if (*addrp == NULL)
5350Sstevel@tonic-gate 			return (ENOMEM);
5360Sstevel@tonic-gate 	}
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate 	/*
5390Sstevel@tonic-gate 	 * Use the seg_vn segment driver; passing in the NULL amp
5400Sstevel@tonic-gate 	 * gives the desired "cloning" effect.
5410Sstevel@tonic-gate 	 */
5422991Ssusans 	vn_a.vp = NULL;
5432991Ssusans 	vn_a.offset = 0;
5442991Ssusans 	vn_a.type = flags & MAP_TYPE;
5452991Ssusans 	vn_a.prot = uprot;
5462991Ssusans 	vn_a.maxprot = PROT_ALL;
5472991Ssusans 	vn_a.flags = flags & ~MAP_TYPE;
5482991Ssusans 	vn_a.cred = CRED();
5492991Ssusans 	vn_a.amp = NULL;
5502991Ssusans 	vn_a.szc = 0;
5512991Ssusans 	vn_a.lgrp_mem_policy_flags = 0;
5520Sstevel@tonic-gate 
5532991Ssusans 	return (as_map(as, *addrp, len, segvn_create, &vn_a));
5540Sstevel@tonic-gate }
5550Sstevel@tonic-gate 
5560Sstevel@tonic-gate static int
5570Sstevel@tonic-gate smmap_common(caddr_t *addrp, size_t len,
5580Sstevel@tonic-gate     int prot, int flags, struct file *fp, offset_t pos)
5590Sstevel@tonic-gate {
5600Sstevel@tonic-gate 	struct vnode *vp;
5610Sstevel@tonic-gate 	struct as *as = curproc->p_as;
5620Sstevel@tonic-gate 	uint_t uprot, maxprot, type;
5630Sstevel@tonic-gate 	int error;
564*5331Samw 	int in_crit = 0;
5650Sstevel@tonic-gate 
5660Sstevel@tonic-gate 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
5670Sstevel@tonic-gate 	    _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
5680Sstevel@tonic-gate 	    MAP_TEXT | MAP_INITDATA)) != 0) {
5690Sstevel@tonic-gate 		/* | MAP_RENAME */	/* not implemented, let user know */
5700Sstevel@tonic-gate 		return (EINVAL);
5710Sstevel@tonic-gate 	}
5720Sstevel@tonic-gate 
5730Sstevel@tonic-gate 	if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
5740Sstevel@tonic-gate 		return (EINVAL);
5750Sstevel@tonic-gate 	}
5760Sstevel@tonic-gate 
5770Sstevel@tonic-gate 	if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
5780Sstevel@tonic-gate 		return (EINVAL);
5790Sstevel@tonic-gate 	}
5800Sstevel@tonic-gate 
5810Sstevel@tonic-gate #if defined(__sparc)
5820Sstevel@tonic-gate 	/*
5830Sstevel@tonic-gate 	 * See if this is an "old mmap call".  If so, remember this
5840Sstevel@tonic-gate 	 * fact and convert the flags value given to mmap to indicate
5850Sstevel@tonic-gate 	 * the specified address in the system call must be used.
5860Sstevel@tonic-gate 	 * _MAP_NEW is turned set by all new uses of mmap.
5870Sstevel@tonic-gate 	 */
5880Sstevel@tonic-gate 	if ((flags & _MAP_NEW) == 0)
5890Sstevel@tonic-gate 		flags |= MAP_FIXED;
5900Sstevel@tonic-gate #endif
5910Sstevel@tonic-gate 	flags &= ~_MAP_NEW;
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	type = flags & MAP_TYPE;
5940Sstevel@tonic-gate 	if (type != MAP_PRIVATE && type != MAP_SHARED)
5950Sstevel@tonic-gate 		return (EINVAL);
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate 
5980Sstevel@tonic-gate 	if (flags & MAP_ALIGN) {
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 		if (flags & MAP_FIXED)
6010Sstevel@tonic-gate 			return (EINVAL);
6020Sstevel@tonic-gate 
6030Sstevel@tonic-gate 		/* alignment needs to be a power of 2 >= page size */
6040Sstevel@tonic-gate 		if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
6050Sstevel@tonic-gate 			!ISP2((uintptr_t)*addrp))
6060Sstevel@tonic-gate 			return (EINVAL);
6070Sstevel@tonic-gate 	}
6080Sstevel@tonic-gate 	/*
6090Sstevel@tonic-gate 	 * Check for bad lengths and file position.
6100Sstevel@tonic-gate 	 * We let the VOP_MAP routine check for negative lengths
6110Sstevel@tonic-gate 	 * since on some vnode types this might be appropriate.
6120Sstevel@tonic-gate 	 */
6130Sstevel@tonic-gate 	if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
6140Sstevel@tonic-gate 		return (EINVAL);
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate 	maxprot = PROT_ALL;		/* start out allowing all accesses */
6170Sstevel@tonic-gate 	uprot = prot | PROT_USER;
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate 	if (fp == NULL) {
6200Sstevel@tonic-gate 		ASSERT(flags & MAP_ANON);
6210Sstevel@tonic-gate 		as_rangelock(as);
6220Sstevel@tonic-gate 		error = zmap(as, addrp, len, uprot, flags, pos);
6230Sstevel@tonic-gate 		as_rangeunlock(as);
6240Sstevel@tonic-gate 		return (error);
6250Sstevel@tonic-gate 	} else if ((flags & MAP_ANON) != 0)
6260Sstevel@tonic-gate 		return (EINVAL);
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 	vp = fp->f_vnode;
6290Sstevel@tonic-gate 
6300Sstevel@tonic-gate 	/* Can't execute code from "noexec" mounted filesystem. */
6310Sstevel@tonic-gate 	if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
6320Sstevel@tonic-gate 		maxprot &= ~PROT_EXEC;
6330Sstevel@tonic-gate 
6340Sstevel@tonic-gate 	/*
6350Sstevel@tonic-gate 	 * These checks were added as part of large files.
6360Sstevel@tonic-gate 	 *
637146Speterte 	 * Return ENXIO if the initial position is negative; return EOVERFLOW
6380Sstevel@tonic-gate 	 * if (offset + len) would overflow the maximum allowed offset for the
6390Sstevel@tonic-gate 	 * type of file descriptor being used.
6400Sstevel@tonic-gate 	 */
6410Sstevel@tonic-gate 	if (vp->v_type == VREG) {
642146Speterte 		if (pos < 0)
643146Speterte 			return (ENXIO);
6440Sstevel@tonic-gate 		if ((offset_t)len > (OFFSET_MAX(fp) - pos))
6450Sstevel@tonic-gate 			return (EOVERFLOW);
6460Sstevel@tonic-gate 	}
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
6490Sstevel@tonic-gate 		/* no write access allowed */
6500Sstevel@tonic-gate 		maxprot &= ~PROT_WRITE;
6510Sstevel@tonic-gate 	}
6520Sstevel@tonic-gate 
6530Sstevel@tonic-gate 	/*
6540Sstevel@tonic-gate 	 * XXX - Do we also adjust maxprot based on protections
6550Sstevel@tonic-gate 	 * of the vnode?  E.g. if no execute permission is given
6560Sstevel@tonic-gate 	 * on the vnode for the current user, maxprot probably
6570Sstevel@tonic-gate 	 * should disallow PROT_EXEC also?  This is different
6580Sstevel@tonic-gate 	 * from the write access as this would be a per vnode
6590Sstevel@tonic-gate 	 * test as opposed to a per fd test for writability.
6600Sstevel@tonic-gate 	 */
6610Sstevel@tonic-gate 
6620Sstevel@tonic-gate 	/*
6630Sstevel@tonic-gate 	 * Verify that the specified protections are not greater than
6640Sstevel@tonic-gate 	 * the maximum allowable protections.  Also test to make sure
6650Sstevel@tonic-gate 	 * that the file descriptor does allows for read access since
6660Sstevel@tonic-gate 	 * "write only" mappings are hard to do since normally we do
6670Sstevel@tonic-gate 	 * the read from the file before the page can be written.
6680Sstevel@tonic-gate 	 */
6690Sstevel@tonic-gate 	if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
6700Sstevel@tonic-gate 		return (EACCES);
6710Sstevel@tonic-gate 
6720Sstevel@tonic-gate 	/*
6730Sstevel@tonic-gate 	 * If the user specified an address, do some simple checks here
6740Sstevel@tonic-gate 	 */
6750Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
6760Sstevel@tonic-gate 		caddr_t userlimit;
6770Sstevel@tonic-gate 
6780Sstevel@tonic-gate 		/*
6790Sstevel@tonic-gate 		 * Use the user address.  First verify that
6800Sstevel@tonic-gate 		 * the address to be used is page aligned.
6810Sstevel@tonic-gate 		 * Then make some simple bounds checks.
6820Sstevel@tonic-gate 		 */
6830Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
6840Sstevel@tonic-gate 			return (EINVAL);
6850Sstevel@tonic-gate 
6860Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
6870Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
6880Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
6890Sstevel@tonic-gate 		case RANGE_OKAY:
6900Sstevel@tonic-gate 			break;
6910Sstevel@tonic-gate 		case RANGE_BADPROT:
6920Sstevel@tonic-gate 			return (ENOTSUP);
6930Sstevel@tonic-gate 		case RANGE_BADADDR:
6940Sstevel@tonic-gate 		default:
6950Sstevel@tonic-gate 			return (ENOMEM);
6960Sstevel@tonic-gate 		}
6970Sstevel@tonic-gate 	}
6980Sstevel@tonic-gate 
699*5331Samw 	if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
700*5331Samw 	    nbl_need_check(vp)) {
701*5331Samw 		int svmand;
702*5331Samw 		nbl_op_t nop;
703*5331Samw 
704*5331Samw 		nbl_start_crit(vp, RW_READER);
705*5331Samw 		in_crit = 1;
706*5331Samw 		error = nbl_svmand(vp, fp->f_cred, &svmand);
707*5331Samw 		if (error != 0)
708*5331Samw 			goto done;
709*5331Samw 		if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
710*5331Samw 			if (prot & (PROT_READ | PROT_EXEC)) {
711*5331Samw 				nop = NBL_READWRITE;
712*5331Samw 			} else {
713*5331Samw 				nop = NBL_WRITE;
714*5331Samw 			}
715*5331Samw 		} else {
716*5331Samw 			nop = NBL_READ;
717*5331Samw 		}
718*5331Samw 		if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
719*5331Samw 			error = EACCES;
720*5331Samw 			goto done;
721*5331Samw 		}
722*5331Samw 	}
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate 	/*
7250Sstevel@tonic-gate 	 * Ok, now let the vnode map routine do its thing to set things up.
7260Sstevel@tonic-gate 	 */
7270Sstevel@tonic-gate 	error = VOP_MAP(vp, pos, as,
728*5331Samw 	    addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
7290Sstevel@tonic-gate 
7300Sstevel@tonic-gate 	if (error == 0) {
7310Sstevel@tonic-gate 		if (vp->v_type == VREG &&
7320Sstevel@tonic-gate 		    (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
7330Sstevel@tonic-gate 			/*
7340Sstevel@tonic-gate 			 * Mark this as an executable vnode
7350Sstevel@tonic-gate 			 */
7360Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
7370Sstevel@tonic-gate 			vp->v_flag |= VVMEXEC;
7380Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
7390Sstevel@tonic-gate 		}
7400Sstevel@tonic-gate 	}
7410Sstevel@tonic-gate 
742*5331Samw done:
743*5331Samw 	if (in_crit)
744*5331Samw 		nbl_end_crit(vp);
7450Sstevel@tonic-gate 	return (error);
7460Sstevel@tonic-gate }
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate #ifdef _LP64
7490Sstevel@tonic-gate /*
7500Sstevel@tonic-gate  * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
7510Sstevel@tonic-gate  *
7520Sstevel@tonic-gate  * The "large file" mmap routine mmap64(2) is also mapped to this routine
7530Sstevel@tonic-gate  * by the 64-bit version of libc.
7540Sstevel@tonic-gate  *
7550Sstevel@tonic-gate  * Eventually, this should be the only version, and have smmap_common()
7560Sstevel@tonic-gate  * folded back into it again.  Some day.
7570Sstevel@tonic-gate  */
7580Sstevel@tonic-gate caddr_t
7590Sstevel@tonic-gate smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
7600Sstevel@tonic-gate {
7610Sstevel@tonic-gate 	struct file *fp;
7620Sstevel@tonic-gate 	int error;
7630Sstevel@tonic-gate 
7640Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
7650Sstevel@tonic-gate 		error = EINVAL;
7660Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
7670Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
7680Sstevel@tonic-gate 		    NULL, (offset_t)pos);
7690Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
7700Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
7710Sstevel@tonic-gate 		    fp, (offset_t)pos);
7720Sstevel@tonic-gate 		releasef(fd);
7730Sstevel@tonic-gate 	} else
7740Sstevel@tonic-gate 		error = EBADF;
7750Sstevel@tonic-gate 
7760Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
7770Sstevel@tonic-gate }
7780Sstevel@tonic-gate #endif	/* _LP64 */
7790Sstevel@tonic-gate 
7800Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate /*
7830Sstevel@tonic-gate  * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
7840Sstevel@tonic-gate  */
7850Sstevel@tonic-gate caddr_t
7860Sstevel@tonic-gate smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
7870Sstevel@tonic-gate {
7880Sstevel@tonic-gate 	struct file *fp;
7890Sstevel@tonic-gate 	int error;
7900Sstevel@tonic-gate 	caddr_t a = (caddr_t)(uintptr_t)addr;
7910Sstevel@tonic-gate 
7920Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
7930Sstevel@tonic-gate 		error = EINVAL;
7940Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
7950Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
7960Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, (offset_t)pos);
7970Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
7980Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
7990Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, (offset_t)pos);
8000Sstevel@tonic-gate 		releasef(fd);
8010Sstevel@tonic-gate 	} else
8020Sstevel@tonic-gate 		error = EBADF;
8030Sstevel@tonic-gate 
8040Sstevel@tonic-gate 	ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
8050Sstevel@tonic-gate 
8060Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
8070Sstevel@tonic-gate }
8080Sstevel@tonic-gate 
8090Sstevel@tonic-gate /*
8100Sstevel@tonic-gate  * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
8110Sstevel@tonic-gate  *
8120Sstevel@tonic-gate  * Now things really get ugly because we can't use the C-style
8130Sstevel@tonic-gate  * calling convention for more than 6 args, and 64-bit parameter
8140Sstevel@tonic-gate  * passing on 32-bit systems is less than clean.
8150Sstevel@tonic-gate  */
8160Sstevel@tonic-gate 
8170Sstevel@tonic-gate struct mmaplf32a {
8180Sstevel@tonic-gate 	caddr_t addr;
8190Sstevel@tonic-gate 	size_t len;
8200Sstevel@tonic-gate #ifdef _LP64
8210Sstevel@tonic-gate 	/*
8220Sstevel@tonic-gate 	 * 32-bit contents, 64-bit cells
8230Sstevel@tonic-gate 	 */
8240Sstevel@tonic-gate 	uint64_t prot;
8250Sstevel@tonic-gate 	uint64_t flags;
8260Sstevel@tonic-gate 	uint64_t fd;
8270Sstevel@tonic-gate 	uint64_t offhi;
8280Sstevel@tonic-gate 	uint64_t offlo;
8290Sstevel@tonic-gate #else
8300Sstevel@tonic-gate 	/*
8310Sstevel@tonic-gate 	 * 32-bit contents, 32-bit cells
8320Sstevel@tonic-gate 	 */
8330Sstevel@tonic-gate 	uint32_t prot;
8340Sstevel@tonic-gate 	uint32_t flags;
8350Sstevel@tonic-gate 	uint32_t fd;
8360Sstevel@tonic-gate 	uint32_t offhi;
8370Sstevel@tonic-gate 	uint32_t offlo;
8380Sstevel@tonic-gate #endif
8390Sstevel@tonic-gate };
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate int
8420Sstevel@tonic-gate smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
8430Sstevel@tonic-gate {
8440Sstevel@tonic-gate 	struct file *fp;
8450Sstevel@tonic-gate 	int error;
8460Sstevel@tonic-gate 	caddr_t a = uap->addr;
8470Sstevel@tonic-gate 	int flags = (int)uap->flags;
8480Sstevel@tonic-gate 	int fd = (int)uap->fd;
8490Sstevel@tonic-gate #ifdef _BIG_ENDIAN
8500Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
8510Sstevel@tonic-gate #else
8520Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
8530Sstevel@tonic-gate #endif
8540Sstevel@tonic-gate 
8550Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
8560Sstevel@tonic-gate 		error = EINVAL;
8570Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
8580Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
8590Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, off);
8600Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
8610Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
8620Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, off);
8630Sstevel@tonic-gate 		releasef(fd);
8640Sstevel@tonic-gate 	} else
8650Sstevel@tonic-gate 		error = EBADF;
8660Sstevel@tonic-gate 
8670Sstevel@tonic-gate 	if (error == 0)
8680Sstevel@tonic-gate 		rvp->r_val1 = (uintptr_t)a;
8690Sstevel@tonic-gate 	return (error);
8700Sstevel@tonic-gate }
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL || _ILP32 */
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate int
8750Sstevel@tonic-gate munmap(caddr_t addr, size_t len)
8760Sstevel@tonic-gate {
8770Sstevel@tonic-gate 	struct proc *p = curproc;
8780Sstevel@tonic-gate 	struct as *as = p->p_as;
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
8810Sstevel@tonic-gate 		return (set_errno(EINVAL));
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
8840Sstevel@tonic-gate 		return (set_errno(EINVAL));
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	/*
8870Sstevel@tonic-gate 	 * Discard lwpchan mappings.
8880Sstevel@tonic-gate 	 */
8890Sstevel@tonic-gate 	if (p->p_lcp != NULL)
8900Sstevel@tonic-gate 		lwpchan_delete_mapping(p, addr, addr + len);
8910Sstevel@tonic-gate 	if (as_unmap(as, addr, len) != 0)
8920Sstevel@tonic-gate 		return (set_errno(EINVAL));
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate 	return (0);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate int
8980Sstevel@tonic-gate mprotect(caddr_t addr, size_t len, int prot)
8990Sstevel@tonic-gate {
9000Sstevel@tonic-gate 	struct as *as = curproc->p_as;
9010Sstevel@tonic-gate 	uint_t uprot = prot | PROT_USER;
9020Sstevel@tonic-gate 	int error;
9030Sstevel@tonic-gate 
9040Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
9050Sstevel@tonic-gate 		return (set_errno(EINVAL));
9060Sstevel@tonic-gate 
9070Sstevel@tonic-gate 	switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
9080Sstevel@tonic-gate 	case RANGE_OKAY:
9090Sstevel@tonic-gate 		break;
9100Sstevel@tonic-gate 	case RANGE_BADPROT:
9110Sstevel@tonic-gate 		return (set_errno(ENOTSUP));
9120Sstevel@tonic-gate 	case RANGE_BADADDR:
9130Sstevel@tonic-gate 	default:
9140Sstevel@tonic-gate 		return (set_errno(ENOMEM));
9150Sstevel@tonic-gate 	}
9160Sstevel@tonic-gate 
9170Sstevel@tonic-gate 	error = as_setprot(as, addr, len, uprot);
9180Sstevel@tonic-gate 	if (error)
9190Sstevel@tonic-gate 		return (set_errno(error));
9200Sstevel@tonic-gate 	return (0);
9210Sstevel@tonic-gate }
9220Sstevel@tonic-gate 
9230Sstevel@tonic-gate #define	MC_CACHE	128			/* internal result buffer */
9240Sstevel@tonic-gate #define	MC_QUANTUM	(MC_CACHE * PAGESIZE)	/* addresses covered in loop */
9250Sstevel@tonic-gate 
9260Sstevel@tonic-gate int
9270Sstevel@tonic-gate mincore(caddr_t addr, size_t len, char *vecp)
9280Sstevel@tonic-gate {
9290Sstevel@tonic-gate 	struct as *as = curproc->p_as;
9300Sstevel@tonic-gate 	caddr_t ea;			/* end address of loop */
9310Sstevel@tonic-gate 	size_t rl;			/* inner result length */
9320Sstevel@tonic-gate 	char vec[MC_CACHE];		/* local vector cache */
9330Sstevel@tonic-gate 	int error;
9340Sstevel@tonic-gate 	model_t model;
9350Sstevel@tonic-gate 	long	llen;
9360Sstevel@tonic-gate 
9370Sstevel@tonic-gate 	model = get_udatamodel();
9380Sstevel@tonic-gate 	/*
9390Sstevel@tonic-gate 	 * Validate form of address parameters.
9400Sstevel@tonic-gate 	 */
9410Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
9420Sstevel@tonic-gate 		llen = (long)len;
9430Sstevel@tonic-gate 	} else {
9440Sstevel@tonic-gate 		llen = (int32_t)(size32_t)len;
9450Sstevel@tonic-gate 	}
9460Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
9470Sstevel@tonic-gate 		return (set_errno(EINVAL));
9480Sstevel@tonic-gate 
9490Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
9500Sstevel@tonic-gate 		return (set_errno(ENOMEM));
9510Sstevel@tonic-gate 
9520Sstevel@tonic-gate 	/*
9530Sstevel@tonic-gate 	 * Loop over subranges of interval [addr : addr + len), recovering
9540Sstevel@tonic-gate 	 * results internally and then copying them out to caller.  Subrange
9550Sstevel@tonic-gate 	 * is based on the size of MC_CACHE, defined above.
9560Sstevel@tonic-gate 	 */
9570Sstevel@tonic-gate 	for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
9580Sstevel@tonic-gate 		error = as_incore(as, addr,
9590Sstevel@tonic-gate 		    (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
9600Sstevel@tonic-gate 		if (rl != 0) {
9610Sstevel@tonic-gate 			rl = (rl + PAGESIZE - 1) / PAGESIZE;
9620Sstevel@tonic-gate 			if (copyout(vec, vecp, rl) != 0)
9630Sstevel@tonic-gate 				return (set_errno(EFAULT));
9640Sstevel@tonic-gate 			vecp += rl;
9650Sstevel@tonic-gate 		}
9660Sstevel@tonic-gate 		if (error != 0)
9670Sstevel@tonic-gate 			return (set_errno(ENOMEM));
9680Sstevel@tonic-gate 	}
9690Sstevel@tonic-gate 	return (0);
9700Sstevel@tonic-gate }
971