xref: /onnv-gate/usr/src/uts/common/os/grow.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*0Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #include <sys/types.h>
34*0Sstevel@tonic-gate #include <sys/inttypes.h>
35*0Sstevel@tonic-gate #include <sys/param.h>
36*0Sstevel@tonic-gate #include <sys/sysmacros.h>
37*0Sstevel@tonic-gate #include <sys/systm.h>
38*0Sstevel@tonic-gate #include <sys/signal.h>
39*0Sstevel@tonic-gate #include <sys/user.h>
40*0Sstevel@tonic-gate #include <sys/errno.h>
41*0Sstevel@tonic-gate #include <sys/var.h>
42*0Sstevel@tonic-gate #include <sys/proc.h>
43*0Sstevel@tonic-gate #include <sys/tuneable.h>
44*0Sstevel@tonic-gate #include <sys/debug.h>
45*0Sstevel@tonic-gate #include <sys/cmn_err.h>
46*0Sstevel@tonic-gate #include <sys/cred.h>
47*0Sstevel@tonic-gate #include <sys/vnode.h>
48*0Sstevel@tonic-gate #include <sys/vfs.h>
49*0Sstevel@tonic-gate #include <sys/vm.h>
50*0Sstevel@tonic-gate #include <sys/file.h>
51*0Sstevel@tonic-gate #include <sys/mman.h>
52*0Sstevel@tonic-gate #include <sys/vmparam.h>
53*0Sstevel@tonic-gate #include <sys/fcntl.h>
54*0Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
55*0Sstevel@tonic-gate 
56*0Sstevel@tonic-gate #include <vm/hat.h>
57*0Sstevel@tonic-gate #include <vm/as.h>
58*0Sstevel@tonic-gate #include <vm/seg.h>
59*0Sstevel@tonic-gate #include <vm/seg_dev.h>
60*0Sstevel@tonic-gate #include <vm/seg_vn.h>
61*0Sstevel@tonic-gate 
62*0Sstevel@tonic-gate int use_brk_lpg = 1;
63*0Sstevel@tonic-gate int use_stk_lpg = 1;
64*0Sstevel@tonic-gate int use_zmap_lpg = 1;
65*0Sstevel@tonic-gate 
66*0Sstevel@tonic-gate static int brk_lpg(caddr_t nva);
67*0Sstevel@tonic-gate static int grow_lpg(caddr_t sp);
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate int
70*0Sstevel@tonic-gate brk(caddr_t nva)
71*0Sstevel@tonic-gate {
72*0Sstevel@tonic-gate 	int error;
73*0Sstevel@tonic-gate 	proc_t *p = curproc;
74*0Sstevel@tonic-gate 
75*0Sstevel@tonic-gate 	/*
76*0Sstevel@tonic-gate 	 * Serialize brk operations on an address space.
77*0Sstevel@tonic-gate 	 * This also serves as the lock protecting p_brksize
78*0Sstevel@tonic-gate 	 * and p_brkpageszc.
79*0Sstevel@tonic-gate 	 */
80*0Sstevel@tonic-gate 	as_rangelock(p->p_as);
81*0Sstevel@tonic-gate 	if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
82*0Sstevel@tonic-gate 		error = brk_lpg(nva);
83*0Sstevel@tonic-gate 	} else {
84*0Sstevel@tonic-gate 		error = brk_internal(nva, p->p_brkpageszc);
85*0Sstevel@tonic-gate 	}
86*0Sstevel@tonic-gate 	as_rangeunlock(p->p_as);
87*0Sstevel@tonic-gate 	return ((error != 0 ? set_errno(error) : 0));
88*0Sstevel@tonic-gate }
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate /*
91*0Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
92*0Sstevel@tonic-gate  * then call brk_internal().
93*0Sstevel@tonic-gate  * Returns 0 on success.
94*0Sstevel@tonic-gate  */
95*0Sstevel@tonic-gate static int
96*0Sstevel@tonic-gate brk_lpg(caddr_t nva)
97*0Sstevel@tonic-gate {
98*0Sstevel@tonic-gate 	struct proc *p = curproc;
99*0Sstevel@tonic-gate 	size_t pgsz, len;
100*0Sstevel@tonic-gate 	caddr_t addr;
101*0Sstevel@tonic-gate 	caddr_t bssbase = p->p_bssbase;
102*0Sstevel@tonic-gate 	caddr_t brkbase = p->p_brkbase;
103*0Sstevel@tonic-gate 	int oszc, szc;
104*0Sstevel@tonic-gate 	int err;
105*0Sstevel@tonic-gate 	int remap = 0;
106*0Sstevel@tonic-gate 
107*0Sstevel@tonic-gate 	oszc = p->p_brkpageszc;
108*0Sstevel@tonic-gate 
109*0Sstevel@tonic-gate 	/*
110*0Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
111*0Sstevel@tonic-gate 	 * to brk_internal() will initialize it.
112*0Sstevel@tonic-gate 	 */
113*0Sstevel@tonic-gate 	if (brkbase == 0) {
114*0Sstevel@tonic-gate 		return (brk_internal(nva, oszc));
115*0Sstevel@tonic-gate 	}
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate 	len = nva - bssbase;
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate 	pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, &remap);
120*0Sstevel@tonic-gate 	szc = page_szc(pgsz);
121*0Sstevel@tonic-gate 
122*0Sstevel@tonic-gate 	/*
123*0Sstevel@tonic-gate 	 * Covers two cases:
124*0Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
125*0Sstevel@tonic-gate 	 * ignore it in that case.
126*0Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
127*0Sstevel@tonic-gate 	 */
128*0Sstevel@tonic-gate 	if (szc <= oszc) {
129*0Sstevel@tonic-gate 		err = brk_internal(nva, oszc);
130*0Sstevel@tonic-gate 		/* If failed, back off to base page size. */
131*0Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
132*0Sstevel@tonic-gate 			err = brk_internal(nva, 0);
133*0Sstevel@tonic-gate 		}
134*0Sstevel@tonic-gate 		return (err);
135*0Sstevel@tonic-gate 	}
136*0Sstevel@tonic-gate 
137*0Sstevel@tonic-gate 	if (remap == 0) {
138*0Sstevel@tonic-gate 		/*
139*0Sstevel@tonic-gate 		 * Map from the current brk end up to the new page size
140*0Sstevel@tonic-gate 		 * alignment using the current page size.
141*0Sstevel@tonic-gate 		 */
142*0Sstevel@tonic-gate 		addr = brkbase + p->p_brksize;
143*0Sstevel@tonic-gate 		addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
144*0Sstevel@tonic-gate 		if (addr < nva) {
145*0Sstevel@tonic-gate 			err = brk_internal(addr, oszc);
146*0Sstevel@tonic-gate 			/*
147*0Sstevel@tonic-gate 			 * In failure case, try again if oszc is not base page
148*0Sstevel@tonic-gate 			 * size, then return err.
149*0Sstevel@tonic-gate 			 */
150*0Sstevel@tonic-gate 			if (err != 0) {
151*0Sstevel@tonic-gate 				if (oszc != 0) {
152*0Sstevel@tonic-gate 					err = brk_internal(nva, 0);
153*0Sstevel@tonic-gate 				}
154*0Sstevel@tonic-gate 				return (err);
155*0Sstevel@tonic-gate 			}
156*0Sstevel@tonic-gate 		}
157*0Sstevel@tonic-gate 	}
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate 	err = brk_internal(nva, szc);
160*0Sstevel@tonic-gate 	/* If using szc failed, map with base page size and return. */
161*0Sstevel@tonic-gate 	if (err != 0) {
162*0Sstevel@tonic-gate 		if (szc != 0) {
163*0Sstevel@tonic-gate 			err = brk_internal(nva, 0);
164*0Sstevel@tonic-gate 		}
165*0Sstevel@tonic-gate 		return (err);
166*0Sstevel@tonic-gate 	}
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 	if (remap != 0) {
169*0Sstevel@tonic-gate 		/*
170*0Sstevel@tonic-gate 		 * Round up brk base to a large page boundary and remap
171*0Sstevel@tonic-gate 		 * anything in the segment already faulted in beyond that
172*0Sstevel@tonic-gate 		 * point.
173*0Sstevel@tonic-gate 		 */
174*0Sstevel@tonic-gate 		addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
175*0Sstevel@tonic-gate 		len = (brkbase + p->p_brksize) - addr;
176*0Sstevel@tonic-gate 		/* advisory, so ignore errors */
177*0Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
178*0Sstevel@tonic-gate 	}
179*0Sstevel@tonic-gate 
180*0Sstevel@tonic-gate 	ASSERT(err == 0);
181*0Sstevel@tonic-gate 	return (err);		/* should always be 0 */
182*0Sstevel@tonic-gate }
183*0Sstevel@tonic-gate 
184*0Sstevel@tonic-gate /*
185*0Sstevel@tonic-gate  * Returns 0 on success.
186*0Sstevel@tonic-gate  */
187*0Sstevel@tonic-gate int
188*0Sstevel@tonic-gate brk_internal(caddr_t nva, uint_t brkszc)
189*0Sstevel@tonic-gate {
190*0Sstevel@tonic-gate 	caddr_t ova;			/* current break address */
191*0Sstevel@tonic-gate 	size_t size;
192*0Sstevel@tonic-gate 	int	error;
193*0Sstevel@tonic-gate 	struct proc *p = curproc;
194*0Sstevel@tonic-gate 	struct as *as = p->p_as;
195*0Sstevel@tonic-gate 	size_t pgsz;
196*0Sstevel@tonic-gate 	uint_t szc;
197*0Sstevel@tonic-gate 	rctl_qty_t as_rctl;
198*0Sstevel@tonic-gate 
199*0Sstevel@tonic-gate 	/*
200*0Sstevel@tonic-gate 	 * extend heap to brkszc alignment but use current p->p_brkpageszc
201*0Sstevel@tonic-gate 	 * for the newly created segment. This allows the new extension
202*0Sstevel@tonic-gate 	 * segment to be concatenated successfully with the existing brk
203*0Sstevel@tonic-gate 	 * segment.
204*0Sstevel@tonic-gate 	 */
205*0Sstevel@tonic-gate 	if ((szc = brkszc) != 0) {
206*0Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
207*0Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
208*0Sstevel@tonic-gate 	} else {
209*0Sstevel@tonic-gate 		pgsz = PAGESIZE;
210*0Sstevel@tonic-gate 	}
211*0Sstevel@tonic-gate 
212*0Sstevel@tonic-gate 	mutex_enter(&p->p_lock);
213*0Sstevel@tonic-gate 	as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
214*0Sstevel@tonic-gate 	    p->p_rctls, p);
215*0Sstevel@tonic-gate 	mutex_exit(&p->p_lock);
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	/*
218*0Sstevel@tonic-gate 	 * If p_brkbase has not yet been set, the first call
219*0Sstevel@tonic-gate 	 * to brk() will initialize it.
220*0Sstevel@tonic-gate 	 */
221*0Sstevel@tonic-gate 	if (p->p_brkbase == 0)
222*0Sstevel@tonic-gate 		p->p_brkbase = nva;
223*0Sstevel@tonic-gate 
224*0Sstevel@tonic-gate 	/*
225*0Sstevel@tonic-gate 	 * Before multiple page size support existed p_brksize was the value
226*0Sstevel@tonic-gate 	 * not rounded to the pagesize (i.e. it stored the exact user request
227*0Sstevel@tonic-gate 	 * for heap size). If pgsz is greater than PAGESIZE calculate the
228*0Sstevel@tonic-gate 	 * heap size as the real new heap size by rounding it up to pgsz.
229*0Sstevel@tonic-gate 	 * This is useful since we may want to know where the heap ends
230*0Sstevel@tonic-gate 	 * without knowing heap pagesize (e.g. some old code) and also if
231*0Sstevel@tonic-gate 	 * heap pagesize changes we can update p_brkpageszc but delay adding
232*0Sstevel@tonic-gate 	 * new mapping yet still know from p_brksize where the heap really
233*0Sstevel@tonic-gate 	 * ends. The user requested heap end is stored in libc variable.
234*0Sstevel@tonic-gate 	 */
235*0Sstevel@tonic-gate 	if (pgsz > PAGESIZE) {
236*0Sstevel@tonic-gate 		caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
237*0Sstevel@tonic-gate 		size = tnva - p->p_brkbase;
238*0Sstevel@tonic-gate 		if (tnva < p->p_brkbase || (size > p->p_brksize &&
239*0Sstevel@tonic-gate 		    size > (size_t)as_rctl)) {
240*0Sstevel@tonic-gate 			szc = 0;
241*0Sstevel@tonic-gate 			pgsz = PAGESIZE;
242*0Sstevel@tonic-gate 			size = nva - p->p_brkbase;
243*0Sstevel@tonic-gate 		}
244*0Sstevel@tonic-gate 	} else {
245*0Sstevel@tonic-gate 		size = nva - p->p_brkbase;
246*0Sstevel@tonic-gate 	}
247*0Sstevel@tonic-gate 
248*0Sstevel@tonic-gate 	/*
249*0Sstevel@tonic-gate 	 * use PAGESIZE to roundup ova because we want to know the real value
250*0Sstevel@tonic-gate 	 * of the current heap end in case p_brkpageszc changes since the last
251*0Sstevel@tonic-gate 	 * p_brksize was computed.
252*0Sstevel@tonic-gate 	 */
253*0Sstevel@tonic-gate 	nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
254*0Sstevel@tonic-gate 	ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
255*0Sstevel@tonic-gate 		PAGESIZE);
256*0Sstevel@tonic-gate 
257*0Sstevel@tonic-gate 	if ((nva < p->p_brkbase) || (size > p->p_brksize &&
258*0Sstevel@tonic-gate 	    size > as_rctl)) {
259*0Sstevel@tonic-gate 		mutex_enter(&p->p_lock);
260*0Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
261*0Sstevel@tonic-gate 		    RCA_SAFE);
262*0Sstevel@tonic-gate 		mutex_exit(&p->p_lock);
263*0Sstevel@tonic-gate 		return (ENOMEM);
264*0Sstevel@tonic-gate 	}
265*0Sstevel@tonic-gate 
266*0Sstevel@tonic-gate 	if (nva > ova) {
267*0Sstevel@tonic-gate 		struct segvn_crargs crargs =
268*0Sstevel@tonic-gate 		    SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
269*0Sstevel@tonic-gate 
270*0Sstevel@tonic-gate 		if (!(p->p_datprot & PROT_EXEC)) {
271*0Sstevel@tonic-gate 			crargs.prot &= ~PROT_EXEC;
272*0Sstevel@tonic-gate 		}
273*0Sstevel@tonic-gate 
274*0Sstevel@tonic-gate 		/*
275*0Sstevel@tonic-gate 		 * Add new zfod mapping to extend UNIX data segment
276*0Sstevel@tonic-gate 		 */
277*0Sstevel@tonic-gate 		crargs.szc = szc;
278*0Sstevel@tonic-gate 		crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
279*0Sstevel@tonic-gate 		error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
280*0Sstevel@tonic-gate 		    &crargs);
281*0Sstevel@tonic-gate 		if (error) {
282*0Sstevel@tonic-gate 			return (error);
283*0Sstevel@tonic-gate 		}
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 	} else if (nva < ova) {
286*0Sstevel@tonic-gate 		/*
287*0Sstevel@tonic-gate 		 * Release mapping to shrink UNIX data segment.
288*0Sstevel@tonic-gate 		 */
289*0Sstevel@tonic-gate 		(void) as_unmap(as, nva, (size_t)(ova - nva));
290*0Sstevel@tonic-gate 	}
291*0Sstevel@tonic-gate 	p->p_brksize = size;
292*0Sstevel@tonic-gate 	p->p_brkpageszc = szc;
293*0Sstevel@tonic-gate 	return (0);
294*0Sstevel@tonic-gate }
295*0Sstevel@tonic-gate 
296*0Sstevel@tonic-gate /*
297*0Sstevel@tonic-gate  * Grow the stack to include sp.  Return 1 if successful, 0 otherwise.
298*0Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
299*0Sstevel@tonic-gate  */
300*0Sstevel@tonic-gate int
301*0Sstevel@tonic-gate grow(caddr_t sp)
302*0Sstevel@tonic-gate {
303*0Sstevel@tonic-gate 	struct proc *p = curproc;
304*0Sstevel@tonic-gate 	int err;
305*0Sstevel@tonic-gate 
306*0Sstevel@tonic-gate 	/*
307*0Sstevel@tonic-gate 	 * Serialize grow operations on an address space.
308*0Sstevel@tonic-gate 	 * This also serves as the lock protecting p_stksize
309*0Sstevel@tonic-gate 	 * and p_stkpageszc.
310*0Sstevel@tonic-gate 	 */
311*0Sstevel@tonic-gate 	as_rangelock(p->p_as);
312*0Sstevel@tonic-gate 	if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
313*0Sstevel@tonic-gate 		err = grow_lpg(sp);
314*0Sstevel@tonic-gate 	} else {
315*0Sstevel@tonic-gate 		err = grow_internal(sp, p->p_stkpageszc);
316*0Sstevel@tonic-gate 	}
317*0Sstevel@tonic-gate 	as_rangeunlock(p->p_as);
318*0Sstevel@tonic-gate 	return ((err == 0 ? 1 : 0));
319*0Sstevel@tonic-gate }
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate /*
322*0Sstevel@tonic-gate  * Algorithm: call arch-specific map_pgsz to get best page size to use,
323*0Sstevel@tonic-gate  * then call grow_internal().
324*0Sstevel@tonic-gate  * Returns 0 on success.
325*0Sstevel@tonic-gate  */
326*0Sstevel@tonic-gate static int
327*0Sstevel@tonic-gate grow_lpg(caddr_t sp)
328*0Sstevel@tonic-gate {
329*0Sstevel@tonic-gate 	struct proc *p = curproc;
330*0Sstevel@tonic-gate 	size_t pgsz;
331*0Sstevel@tonic-gate 	size_t len, newsize;
332*0Sstevel@tonic-gate 	caddr_t addr, oldsp;
333*0Sstevel@tonic-gate 	int oszc, szc;
334*0Sstevel@tonic-gate 	int err;
335*0Sstevel@tonic-gate 	int remap = 0;
336*0Sstevel@tonic-gate 
337*0Sstevel@tonic-gate 	newsize = p->p_usrstack - sp;
338*0Sstevel@tonic-gate 
339*0Sstevel@tonic-gate 	oszc = p->p_stkpageszc;
340*0Sstevel@tonic-gate 	pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, &remap);
341*0Sstevel@tonic-gate 	szc = page_szc(pgsz);
342*0Sstevel@tonic-gate 
343*0Sstevel@tonic-gate 	/*
344*0Sstevel@tonic-gate 	 * Covers two cases:
345*0Sstevel@tonic-gate 	 * 1. page_szc() returns -1 for invalid page size, so we want to
346*0Sstevel@tonic-gate 	 * ignore it in that case.
347*0Sstevel@tonic-gate 	 * 2. By design we never decrease page size, as it is more stable.
348*0Sstevel@tonic-gate 	 * This shouldn't happen as the stack never shrinks.
349*0Sstevel@tonic-gate 	 */
350*0Sstevel@tonic-gate 	if (szc <= oszc) {
351*0Sstevel@tonic-gate 		err = grow_internal(sp, oszc);
352*0Sstevel@tonic-gate 		/* failed, fall back to base page size */
353*0Sstevel@tonic-gate 		if (err != 0 && oszc != 0) {
354*0Sstevel@tonic-gate 			err = grow_internal(sp, 0);
355*0Sstevel@tonic-gate 		}
356*0Sstevel@tonic-gate 		return (err);
357*0Sstevel@tonic-gate 	}
358*0Sstevel@tonic-gate 
359*0Sstevel@tonic-gate 	/*
360*0Sstevel@tonic-gate 	 * We've grown sufficiently to switch to a new page size.
361*0Sstevel@tonic-gate 	 * If we're not going to remap the whole segment with the new
362*0Sstevel@tonic-gate 	 * page size, split the grow into two operations: map to the new
363*0Sstevel@tonic-gate 	 * page size alignment boundary with the existing page size, then
364*0Sstevel@tonic-gate 	 * map the rest with the new page size.
365*0Sstevel@tonic-gate 	 */
366*0Sstevel@tonic-gate 	err = 0;
367*0Sstevel@tonic-gate 	if (remap == 0) {
368*0Sstevel@tonic-gate 		oldsp = p->p_usrstack - p->p_stksize;
369*0Sstevel@tonic-gate 		addr = (caddr_t)P2ALIGN((uintptr_t)oldsp, pgsz);
370*0Sstevel@tonic-gate 		if (addr > sp) {
371*0Sstevel@tonic-gate 			err = grow_internal(addr, oszc);
372*0Sstevel@tonic-gate 			/*
373*0Sstevel@tonic-gate 			 * In this case, grow with oszc failed, so grow all the
374*0Sstevel@tonic-gate 			 * way to sp with base page size.
375*0Sstevel@tonic-gate 			 */
376*0Sstevel@tonic-gate 			if (err != 0) {
377*0Sstevel@tonic-gate 				if (oszc != 0) {
378*0Sstevel@tonic-gate 					err = grow_internal(sp, 0);
379*0Sstevel@tonic-gate 				}
380*0Sstevel@tonic-gate 				return (err);
381*0Sstevel@tonic-gate 			}
382*0Sstevel@tonic-gate 		}
383*0Sstevel@tonic-gate 	}
384*0Sstevel@tonic-gate 
385*0Sstevel@tonic-gate 	err = grow_internal(sp, szc);
386*0Sstevel@tonic-gate 	/* The grow with szc failed, so fall back to base page size. */
387*0Sstevel@tonic-gate 	if (err != 0) {
388*0Sstevel@tonic-gate 		if (szc != 0) {
389*0Sstevel@tonic-gate 			err = grow_internal(sp, 0);
390*0Sstevel@tonic-gate 		}
391*0Sstevel@tonic-gate 		return (err);
392*0Sstevel@tonic-gate 	}
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate 	if (remap) {
395*0Sstevel@tonic-gate 		/*
396*0Sstevel@tonic-gate 		 * Round up stack pointer to a large page boundary and remap
397*0Sstevel@tonic-gate 		 * any pgsz pages in the segment already faulted in beyond that
398*0Sstevel@tonic-gate 		 * point.
399*0Sstevel@tonic-gate 		 */
400*0Sstevel@tonic-gate 		addr = p->p_usrstack - p->p_stksize;
401*0Sstevel@tonic-gate 		addr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
402*0Sstevel@tonic-gate 		len = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz) - addr;
403*0Sstevel@tonic-gate 		/* advisory, so ignore errors */
404*0Sstevel@tonic-gate 		(void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
405*0Sstevel@tonic-gate 	}
406*0Sstevel@tonic-gate 
407*0Sstevel@tonic-gate 	/* Update page size code for stack. */
408*0Sstevel@tonic-gate 	p->p_stkpageszc = szc;
409*0Sstevel@tonic-gate 
410*0Sstevel@tonic-gate 	ASSERT(err == 0);
411*0Sstevel@tonic-gate 	return (err);		/* should always be 0 */
412*0Sstevel@tonic-gate }
413*0Sstevel@tonic-gate 
414*0Sstevel@tonic-gate /*
415*0Sstevel@tonic-gate  * This routine assumes that the stack grows downward.
416*0Sstevel@tonic-gate  * Returns 0 on success, errno on failure.
417*0Sstevel@tonic-gate  */
418*0Sstevel@tonic-gate int
419*0Sstevel@tonic-gate grow_internal(caddr_t sp, uint_t growszc)
420*0Sstevel@tonic-gate {
421*0Sstevel@tonic-gate 	struct proc *p = curproc;
422*0Sstevel@tonic-gate 	struct as *as = p->p_as;
423*0Sstevel@tonic-gate 	size_t newsize = p->p_usrstack - sp;
424*0Sstevel@tonic-gate 	size_t oldsize;
425*0Sstevel@tonic-gate 	int    error;
426*0Sstevel@tonic-gate 	size_t pgsz;
427*0Sstevel@tonic-gate 	uint_t szc;
428*0Sstevel@tonic-gate 	struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 	ASSERT(sp < p->p_usrstack);
431*0Sstevel@tonic-gate 
432*0Sstevel@tonic-gate 	/*
433*0Sstevel@tonic-gate 	 * grow to growszc alignment but use current p->p_stkpageszc for
434*0Sstevel@tonic-gate 	 * the segvn_crargs szc passed to segvn_create. For memcntl to
435*0Sstevel@tonic-gate 	 * increase the szc, this allows the new extension segment to be
436*0Sstevel@tonic-gate 	 * concatenated successfully with the existing stack segment.
437*0Sstevel@tonic-gate 	 */
438*0Sstevel@tonic-gate 	if ((szc = growszc) != 0) {
439*0Sstevel@tonic-gate 		pgsz = page_get_pagesize(szc);
440*0Sstevel@tonic-gate 		ASSERT(pgsz > PAGESIZE);
441*0Sstevel@tonic-gate 		newsize = P2ROUNDUP(newsize, pgsz);
442*0Sstevel@tonic-gate 		if (newsize > (size_t)p->p_stk_ctl) {
443*0Sstevel@tonic-gate 			szc = 0;
444*0Sstevel@tonic-gate 			pgsz = PAGESIZE;
445*0Sstevel@tonic-gate 			newsize = p->p_usrstack - sp;
446*0Sstevel@tonic-gate 		}
447*0Sstevel@tonic-gate 	} else {
448*0Sstevel@tonic-gate 		pgsz = PAGESIZE;
449*0Sstevel@tonic-gate 	}
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 	if (newsize > (size_t)p->p_stk_ctl) {
452*0Sstevel@tonic-gate 		(void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
453*0Sstevel@tonic-gate 		    RCA_UNSAFE_ALL);
454*0Sstevel@tonic-gate 
455*0Sstevel@tonic-gate 		return (ENOMEM);
456*0Sstevel@tonic-gate 	}
457*0Sstevel@tonic-gate 
458*0Sstevel@tonic-gate 	oldsize = p->p_stksize;
459*0Sstevel@tonic-gate 	newsize = P2ROUNDUP(newsize, pgsz);
460*0Sstevel@tonic-gate 	ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate 	if (newsize <= oldsize) {	/* prevent the stack from shrinking */
463*0Sstevel@tonic-gate 		return (0);
464*0Sstevel@tonic-gate 	}
465*0Sstevel@tonic-gate 
466*0Sstevel@tonic-gate 	if (!(p->p_stkprot & PROT_EXEC)) {
467*0Sstevel@tonic-gate 		crargs.prot &= ~PROT_EXEC;
468*0Sstevel@tonic-gate 	}
469*0Sstevel@tonic-gate 	/*
470*0Sstevel@tonic-gate 	 * extend stack with the p_stkpageszc. growszc is different than
471*0Sstevel@tonic-gate 	 * p_stkpageszc only on a memcntl to increase the stack pagesize.
472*0Sstevel@tonic-gate 	 */
473*0Sstevel@tonic-gate 	crargs.szc = p->p_stkpageszc;
474*0Sstevel@tonic-gate 	crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
475*0Sstevel@tonic-gate 
476*0Sstevel@tonic-gate 	if ((error = as_map(as, p->p_usrstack - newsize, newsize - oldsize,
477*0Sstevel@tonic-gate 	    segvn_create, &crargs)) != 0) {
478*0Sstevel@tonic-gate 		if (error == EAGAIN) {
479*0Sstevel@tonic-gate 			cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
480*0Sstevel@tonic-gate 			    "for pid %d (%s)", p->p_pid, u.u_comm);
481*0Sstevel@tonic-gate 		}
482*0Sstevel@tonic-gate 		return (error);
483*0Sstevel@tonic-gate 	}
484*0Sstevel@tonic-gate 	p->p_stksize = newsize;
485*0Sstevel@tonic-gate 
486*0Sstevel@tonic-gate 
487*0Sstevel@tonic-gate 	/*
488*0Sstevel@tonic-gate 	 * Set up translations so the process doesn't have to fault in
489*0Sstevel@tonic-gate 	 * the stack pages we just gave it.
490*0Sstevel@tonic-gate 	 */
491*0Sstevel@tonic-gate 	(void) as_fault(as->a_hat, as,
492*0Sstevel@tonic-gate 	    p->p_usrstack - newsize, newsize - oldsize, F_INVAL, S_WRITE);
493*0Sstevel@tonic-gate 
494*0Sstevel@tonic-gate 	return (0);
495*0Sstevel@tonic-gate }
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate /*
498*0Sstevel@tonic-gate  * Used for MAP_ANON - fast way to get anonymous pages
499*0Sstevel@tonic-gate  */
500*0Sstevel@tonic-gate static int
501*0Sstevel@tonic-gate zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
502*0Sstevel@tonic-gate     offset_t pos)
503*0Sstevel@tonic-gate {
504*0Sstevel@tonic-gate 	struct segvn_crargs a, b;
505*0Sstevel@tonic-gate 	struct proc *p = curproc;
506*0Sstevel@tonic-gate 	int err;
507*0Sstevel@tonic-gate 	size_t pgsz;
508*0Sstevel@tonic-gate 	size_t l0, l1, l2, l3, l4; /* 0th through 5th chunks */
509*0Sstevel@tonic-gate 	caddr_t ruaddr, ruaddr0; /* rounded up addresses */
510*0Sstevel@tonic-gate 	extern size_t auto_lpg_va_default;
511*0Sstevel@tonic-gate 
512*0Sstevel@tonic-gate 	if (((PROT_ALL & uprot) != uprot))
513*0Sstevel@tonic-gate 		return (EACCES);
514*0Sstevel@tonic-gate 
515*0Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
516*0Sstevel@tonic-gate 		caddr_t userlimit;
517*0Sstevel@tonic-gate 
518*0Sstevel@tonic-gate 		/*
519*0Sstevel@tonic-gate 		 * Use the user address.  First verify that
520*0Sstevel@tonic-gate 		 * the address to be used is page aligned.
521*0Sstevel@tonic-gate 		 * Then make some simple bounds checks.
522*0Sstevel@tonic-gate 		 */
523*0Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
524*0Sstevel@tonic-gate 			return (EINVAL);
525*0Sstevel@tonic-gate 
526*0Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
527*0Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
528*0Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
529*0Sstevel@tonic-gate 		case RANGE_OKAY:
530*0Sstevel@tonic-gate 			break;
531*0Sstevel@tonic-gate 		case RANGE_BADPROT:
532*0Sstevel@tonic-gate 			return (ENOTSUP);
533*0Sstevel@tonic-gate 		case RANGE_BADADDR:
534*0Sstevel@tonic-gate 		default:
535*0Sstevel@tonic-gate 			return (ENOMEM);
536*0Sstevel@tonic-gate 		}
537*0Sstevel@tonic-gate 		(void) as_unmap(as, *addrp, len);
538*0Sstevel@tonic-gate 	} else {
539*0Sstevel@tonic-gate 		/*
540*0Sstevel@tonic-gate 		 * No need to worry about vac alignment for anonymous
541*0Sstevel@tonic-gate 		 * pages since this is a "clone" object that doesn't
542*0Sstevel@tonic-gate 		 * yet exist.
543*0Sstevel@tonic-gate 		 */
544*0Sstevel@tonic-gate 		map_addr(addrp, len, pos, 0, flags);
545*0Sstevel@tonic-gate 		if (*addrp == NULL)
546*0Sstevel@tonic-gate 			return (ENOMEM);
547*0Sstevel@tonic-gate 	}
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 	/*
550*0Sstevel@tonic-gate 	 * Use the seg_vn segment driver; passing in the NULL amp
551*0Sstevel@tonic-gate 	 * gives the desired "cloning" effect.
552*0Sstevel@tonic-gate 	 */
553*0Sstevel@tonic-gate 	a.vp = NULL;
554*0Sstevel@tonic-gate 	a.offset = 0;
555*0Sstevel@tonic-gate 	a.type = flags & MAP_TYPE;
556*0Sstevel@tonic-gate 	a.prot = uprot;
557*0Sstevel@tonic-gate 	a.maxprot = PROT_ALL;
558*0Sstevel@tonic-gate 	a.flags = flags & ~MAP_TYPE;
559*0Sstevel@tonic-gate 	a.cred = CRED();
560*0Sstevel@tonic-gate 	a.amp = NULL;
561*0Sstevel@tonic-gate 	a.szc = 0;
562*0Sstevel@tonic-gate 	a.lgrp_mem_policy_flags = 0;
563*0Sstevel@tonic-gate 
564*0Sstevel@tonic-gate 	/*
565*0Sstevel@tonic-gate 	 * Call arch-specific map_pgsz routine to pick best page size to map
566*0Sstevel@tonic-gate 	 * this segment, and break the mapping up into parts if required.
567*0Sstevel@tonic-gate 	 *
568*0Sstevel@tonic-gate 	 * The parts work like this:
569*0Sstevel@tonic-gate 	 *
570*0Sstevel@tonic-gate 	 * addr		---------
571*0Sstevel@tonic-gate 	 *		|	| l0
572*0Sstevel@tonic-gate 	 *		---------
573*0Sstevel@tonic-gate 	 *		|	| l1
574*0Sstevel@tonic-gate 	 *		---------
575*0Sstevel@tonic-gate 	 *		|	| l2
576*0Sstevel@tonic-gate 	 *		---------
577*0Sstevel@tonic-gate 	 *		|	| l3
578*0Sstevel@tonic-gate 	 *		---------
579*0Sstevel@tonic-gate 	 *		|	| l4
580*0Sstevel@tonic-gate 	 *		---------
581*0Sstevel@tonic-gate 	 * addr+len
582*0Sstevel@tonic-gate 	 *
583*0Sstevel@tonic-gate 	 * Starting from the middle, l2 is the number of bytes mapped by the
584*0Sstevel@tonic-gate 	 * selected large page.  l1 and l3 are mapped by auto_lpg_va_default
585*0Sstevel@tonic-gate 	 * page size pages, and l0 and l4 are mapped by base page size pages.
586*0Sstevel@tonic-gate 	 * If auto_lpg_va_default is the base page size, then l0 == l4 == 0.
587*0Sstevel@tonic-gate 	 * If the requested address or length are aligned to the selected large
588*0Sstevel@tonic-gate 	 * page size, l1 or l3 may also be 0.
589*0Sstevel@tonic-gate 	 */
590*0Sstevel@tonic-gate 	if (use_zmap_lpg) {
591*0Sstevel@tonic-gate 
592*0Sstevel@tonic-gate 		pgsz = map_pgsz(MAPPGSZ_VA, p, *addrp, len, NULL);
593*0Sstevel@tonic-gate 		if (pgsz <= PAGESIZE || len < pgsz) {
594*0Sstevel@tonic-gate 			return (as_map(as, *addrp, len, segvn_create, &a));
595*0Sstevel@tonic-gate 		}
596*0Sstevel@tonic-gate 
597*0Sstevel@tonic-gate 		ruaddr = (caddr_t)P2ROUNDUP((uintptr_t)*addrp, pgsz);
598*0Sstevel@tonic-gate 		if (auto_lpg_va_default != MMU_PAGESIZE) {
599*0Sstevel@tonic-gate 			ruaddr0 = (caddr_t)P2ROUNDUP((uintptr_t)*addrp,
600*0Sstevel@tonic-gate 			    auto_lpg_va_default);
601*0Sstevel@tonic-gate 			l0 = ruaddr0 - *addrp;
602*0Sstevel@tonic-gate 		} else {
603*0Sstevel@tonic-gate 			l0 = 0;
604*0Sstevel@tonic-gate 			ruaddr0 = *addrp;
605*0Sstevel@tonic-gate 		}
606*0Sstevel@tonic-gate 		l1 = ruaddr - ruaddr0;
607*0Sstevel@tonic-gate 		l3 = P2PHASE(len - l0 - l1, pgsz);
608*0Sstevel@tonic-gate 		if (auto_lpg_va_default == MMU_PAGESIZE) {
609*0Sstevel@tonic-gate 			l4 = 0;
610*0Sstevel@tonic-gate 		} else {
611*0Sstevel@tonic-gate 			l4 = P2PHASE(l3, auto_lpg_va_default);
612*0Sstevel@tonic-gate 			l3 -= l4;
613*0Sstevel@tonic-gate 		}
614*0Sstevel@tonic-gate 		l2 = len - l0 - l1 - l3 - l4;
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate 		if (l0) {
617*0Sstevel@tonic-gate 			b = a;
618*0Sstevel@tonic-gate 			err = as_map(as, *addrp, l0, segvn_create, &b);
619*0Sstevel@tonic-gate 			if (err) {
620*0Sstevel@tonic-gate 				return (err);
621*0Sstevel@tonic-gate 			}
622*0Sstevel@tonic-gate 		}
623*0Sstevel@tonic-gate 
624*0Sstevel@tonic-gate 		if (l1) {
625*0Sstevel@tonic-gate 			b = a;
626*0Sstevel@tonic-gate 			b.szc = page_szc(auto_lpg_va_default);
627*0Sstevel@tonic-gate 			err = as_map(as, ruaddr0, l1, segvn_create, &b);
628*0Sstevel@tonic-gate 			if (err) {
629*0Sstevel@tonic-gate 				goto error1;
630*0Sstevel@tonic-gate 			}
631*0Sstevel@tonic-gate 		}
632*0Sstevel@tonic-gate 
633*0Sstevel@tonic-gate 		if (l2) {
634*0Sstevel@tonic-gate 			b = a;
635*0Sstevel@tonic-gate 			b.szc = page_szc(pgsz);
636*0Sstevel@tonic-gate 			err = as_map(as, ruaddr, l2, segvn_create, &b);
637*0Sstevel@tonic-gate 			if (err) {
638*0Sstevel@tonic-gate 				goto error2;
639*0Sstevel@tonic-gate 			}
640*0Sstevel@tonic-gate 		}
641*0Sstevel@tonic-gate 
642*0Sstevel@tonic-gate 		if (l3) {
643*0Sstevel@tonic-gate 			b = a;
644*0Sstevel@tonic-gate 			b.szc = page_szc(auto_lpg_va_default);
645*0Sstevel@tonic-gate 			err = as_map(as, ruaddr + l2, l3, segvn_create, &b);
646*0Sstevel@tonic-gate 			if (err) {
647*0Sstevel@tonic-gate 				goto error3;
648*0Sstevel@tonic-gate 			}
649*0Sstevel@tonic-gate 		}
650*0Sstevel@tonic-gate 		if (l4) {
651*0Sstevel@tonic-gate 			err = as_map(as, ruaddr + l2 + l3, l4, segvn_create,
652*0Sstevel@tonic-gate 			    &a);
653*0Sstevel@tonic-gate 			if (err) {
654*0Sstevel@tonic-gate error3:
655*0Sstevel@tonic-gate 				if (l3) {
656*0Sstevel@tonic-gate 					(void) as_unmap(as, ruaddr + l2, l3);
657*0Sstevel@tonic-gate 				}
658*0Sstevel@tonic-gate error2:
659*0Sstevel@tonic-gate 				if (l2) {
660*0Sstevel@tonic-gate 					(void) as_unmap(as, ruaddr, l2);
661*0Sstevel@tonic-gate 				}
662*0Sstevel@tonic-gate error1:
663*0Sstevel@tonic-gate 				if (l1) {
664*0Sstevel@tonic-gate 					(void) as_unmap(as, ruaddr0, l1);
665*0Sstevel@tonic-gate 				}
666*0Sstevel@tonic-gate 				if (l0) {
667*0Sstevel@tonic-gate 					(void) as_unmap(as, *addrp, l0);
668*0Sstevel@tonic-gate 				}
669*0Sstevel@tonic-gate 				return (err);
670*0Sstevel@tonic-gate 			}
671*0Sstevel@tonic-gate 		}
672*0Sstevel@tonic-gate 
673*0Sstevel@tonic-gate 		return (0);
674*0Sstevel@tonic-gate 	}
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate 	return (as_map(as, *addrp, len, segvn_create, &a));
677*0Sstevel@tonic-gate }
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate static int
680*0Sstevel@tonic-gate smmap_common(caddr_t *addrp, size_t len,
681*0Sstevel@tonic-gate     int prot, int flags, struct file *fp, offset_t pos)
682*0Sstevel@tonic-gate {
683*0Sstevel@tonic-gate 	struct vnode *vp;
684*0Sstevel@tonic-gate 	struct as *as = curproc->p_as;
685*0Sstevel@tonic-gate 	uint_t uprot, maxprot, type;
686*0Sstevel@tonic-gate 	int error;
687*0Sstevel@tonic-gate 
688*0Sstevel@tonic-gate 	if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
689*0Sstevel@tonic-gate 	    _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
690*0Sstevel@tonic-gate 	    MAP_TEXT | MAP_INITDATA)) != 0) {
691*0Sstevel@tonic-gate 		/* | MAP_RENAME */	/* not implemented, let user know */
692*0Sstevel@tonic-gate 		return (EINVAL);
693*0Sstevel@tonic-gate 	}
694*0Sstevel@tonic-gate 
695*0Sstevel@tonic-gate 	if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
696*0Sstevel@tonic-gate 		return (EINVAL);
697*0Sstevel@tonic-gate 	}
698*0Sstevel@tonic-gate 
699*0Sstevel@tonic-gate 	if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
700*0Sstevel@tonic-gate 		return (EINVAL);
701*0Sstevel@tonic-gate 	}
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate #if defined(__sparc)
704*0Sstevel@tonic-gate 	/*
705*0Sstevel@tonic-gate 	 * See if this is an "old mmap call".  If so, remember this
706*0Sstevel@tonic-gate 	 * fact and convert the flags value given to mmap to indicate
707*0Sstevel@tonic-gate 	 * the specified address in the system call must be used.
708*0Sstevel@tonic-gate 	 * _MAP_NEW is turned set by all new uses of mmap.
709*0Sstevel@tonic-gate 	 */
710*0Sstevel@tonic-gate 	if ((flags & _MAP_NEW) == 0)
711*0Sstevel@tonic-gate 		flags |= MAP_FIXED;
712*0Sstevel@tonic-gate #endif
713*0Sstevel@tonic-gate 	flags &= ~_MAP_NEW;
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate 	type = flags & MAP_TYPE;
716*0Sstevel@tonic-gate 	if (type != MAP_PRIVATE && type != MAP_SHARED)
717*0Sstevel@tonic-gate 		return (EINVAL);
718*0Sstevel@tonic-gate 
719*0Sstevel@tonic-gate 
720*0Sstevel@tonic-gate 	if (flags & MAP_ALIGN) {
721*0Sstevel@tonic-gate 
722*0Sstevel@tonic-gate 		if (flags & MAP_FIXED)
723*0Sstevel@tonic-gate 			return (EINVAL);
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate 		/* alignment needs to be a power of 2 >= page size */
726*0Sstevel@tonic-gate 		if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
727*0Sstevel@tonic-gate 			!ISP2((uintptr_t)*addrp))
728*0Sstevel@tonic-gate 			return (EINVAL);
729*0Sstevel@tonic-gate 	}
730*0Sstevel@tonic-gate 	/*
731*0Sstevel@tonic-gate 	 * Check for bad lengths and file position.
732*0Sstevel@tonic-gate 	 * We let the VOP_MAP routine check for negative lengths
733*0Sstevel@tonic-gate 	 * since on some vnode types this might be appropriate.
734*0Sstevel@tonic-gate 	 */
735*0Sstevel@tonic-gate 	if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
736*0Sstevel@tonic-gate 		return (EINVAL);
737*0Sstevel@tonic-gate 
738*0Sstevel@tonic-gate 	maxprot = PROT_ALL;		/* start out allowing all accesses */
739*0Sstevel@tonic-gate 	uprot = prot | PROT_USER;
740*0Sstevel@tonic-gate 
741*0Sstevel@tonic-gate 	if (fp == NULL) {
742*0Sstevel@tonic-gate 		ASSERT(flags & MAP_ANON);
743*0Sstevel@tonic-gate 		as_rangelock(as);
744*0Sstevel@tonic-gate 		error = zmap(as, addrp, len, uprot, flags, pos);
745*0Sstevel@tonic-gate 		as_rangeunlock(as);
746*0Sstevel@tonic-gate 		return (error);
747*0Sstevel@tonic-gate 	} else if ((flags & MAP_ANON) != 0)
748*0Sstevel@tonic-gate 		return (EINVAL);
749*0Sstevel@tonic-gate 
750*0Sstevel@tonic-gate 	vp = fp->f_vnode;
751*0Sstevel@tonic-gate 
752*0Sstevel@tonic-gate 	/* Can't execute code from "noexec" mounted filesystem. */
753*0Sstevel@tonic-gate 	if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
754*0Sstevel@tonic-gate 		maxprot &= ~PROT_EXEC;
755*0Sstevel@tonic-gate 
756*0Sstevel@tonic-gate 	/*
757*0Sstevel@tonic-gate 	 * These checks were added as part of large files.
758*0Sstevel@tonic-gate 	 *
759*0Sstevel@tonic-gate 	 * Return EINVAL if the initial position is negative; return EOVERFLOW
760*0Sstevel@tonic-gate 	 * if (offset + len) would overflow the maximum allowed offset for the
761*0Sstevel@tonic-gate 	 * type of file descriptor being used.
762*0Sstevel@tonic-gate 	 */
763*0Sstevel@tonic-gate 	if (vp->v_type == VREG) {
764*0Sstevel@tonic-gate 		if (pos < (offset_t)0)
765*0Sstevel@tonic-gate 			return (EINVAL);
766*0Sstevel@tonic-gate 		if ((offset_t)len > (OFFSET_MAX(fp) - pos))
767*0Sstevel@tonic-gate 			return (EOVERFLOW);
768*0Sstevel@tonic-gate 	}
769*0Sstevel@tonic-gate 
770*0Sstevel@tonic-gate 	if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
771*0Sstevel@tonic-gate 		/* no write access allowed */
772*0Sstevel@tonic-gate 		maxprot &= ~PROT_WRITE;
773*0Sstevel@tonic-gate 	}
774*0Sstevel@tonic-gate 
775*0Sstevel@tonic-gate 	/*
776*0Sstevel@tonic-gate 	 * XXX - Do we also adjust maxprot based on protections
777*0Sstevel@tonic-gate 	 * of the vnode?  E.g. if no execute permission is given
778*0Sstevel@tonic-gate 	 * on the vnode for the current user, maxprot probably
779*0Sstevel@tonic-gate 	 * should disallow PROT_EXEC also?  This is different
780*0Sstevel@tonic-gate 	 * from the write access as this would be a per vnode
781*0Sstevel@tonic-gate 	 * test as opposed to a per fd test for writability.
782*0Sstevel@tonic-gate 	 */
783*0Sstevel@tonic-gate 
784*0Sstevel@tonic-gate 	/*
785*0Sstevel@tonic-gate 	 * Verify that the specified protections are not greater than
786*0Sstevel@tonic-gate 	 * the maximum allowable protections.  Also test to make sure
787*0Sstevel@tonic-gate 	 * that the file descriptor does allows for read access since
788*0Sstevel@tonic-gate 	 * "write only" mappings are hard to do since normally we do
789*0Sstevel@tonic-gate 	 * the read from the file before the page can be written.
790*0Sstevel@tonic-gate 	 */
791*0Sstevel@tonic-gate 	if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
792*0Sstevel@tonic-gate 		return (EACCES);
793*0Sstevel@tonic-gate 
794*0Sstevel@tonic-gate 	/*
795*0Sstevel@tonic-gate 	 * If the user specified an address, do some simple checks here
796*0Sstevel@tonic-gate 	 */
797*0Sstevel@tonic-gate 	if ((flags & MAP_FIXED) != 0) {
798*0Sstevel@tonic-gate 		caddr_t userlimit;
799*0Sstevel@tonic-gate 
800*0Sstevel@tonic-gate 		/*
801*0Sstevel@tonic-gate 		 * Use the user address.  First verify that
802*0Sstevel@tonic-gate 		 * the address to be used is page aligned.
803*0Sstevel@tonic-gate 		 * Then make some simple bounds checks.
804*0Sstevel@tonic-gate 		 */
805*0Sstevel@tonic-gate 		if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
806*0Sstevel@tonic-gate 			return (EINVAL);
807*0Sstevel@tonic-gate 
808*0Sstevel@tonic-gate 		userlimit = flags & _MAP_LOW32 ?
809*0Sstevel@tonic-gate 		    (caddr_t)USERLIMIT32 : as->a_userlimit;
810*0Sstevel@tonic-gate 		switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
811*0Sstevel@tonic-gate 		case RANGE_OKAY:
812*0Sstevel@tonic-gate 			break;
813*0Sstevel@tonic-gate 		case RANGE_BADPROT:
814*0Sstevel@tonic-gate 			return (ENOTSUP);
815*0Sstevel@tonic-gate 		case RANGE_BADADDR:
816*0Sstevel@tonic-gate 		default:
817*0Sstevel@tonic-gate 			return (ENOMEM);
818*0Sstevel@tonic-gate 		}
819*0Sstevel@tonic-gate 	}
820*0Sstevel@tonic-gate 
821*0Sstevel@tonic-gate 
822*0Sstevel@tonic-gate 	/*
823*0Sstevel@tonic-gate 	 * Ok, now let the vnode map routine do its thing to set things up.
824*0Sstevel@tonic-gate 	 */
825*0Sstevel@tonic-gate 	error = VOP_MAP(vp, pos, as,
826*0Sstevel@tonic-gate 	    addrp, len, uprot, maxprot, flags, fp->f_cred);
827*0Sstevel@tonic-gate 
828*0Sstevel@tonic-gate 	if (error == 0) {
829*0Sstevel@tonic-gate 		if (vp->v_type == VREG &&
830*0Sstevel@tonic-gate 		    (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
831*0Sstevel@tonic-gate 			/*
832*0Sstevel@tonic-gate 			 * Mark this as an executable vnode
833*0Sstevel@tonic-gate 			 */
834*0Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
835*0Sstevel@tonic-gate 			vp->v_flag |= VVMEXEC;
836*0Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
837*0Sstevel@tonic-gate 		}
838*0Sstevel@tonic-gate 	}
839*0Sstevel@tonic-gate 
840*0Sstevel@tonic-gate 	return (error);
841*0Sstevel@tonic-gate }
842*0Sstevel@tonic-gate 
843*0Sstevel@tonic-gate #ifdef _LP64
844*0Sstevel@tonic-gate /*
845*0Sstevel@tonic-gate  * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
846*0Sstevel@tonic-gate  *
847*0Sstevel@tonic-gate  * The "large file" mmap routine mmap64(2) is also mapped to this routine
848*0Sstevel@tonic-gate  * by the 64-bit version of libc.
849*0Sstevel@tonic-gate  *
850*0Sstevel@tonic-gate  * Eventually, this should be the only version, and have smmap_common()
851*0Sstevel@tonic-gate  * folded back into it again.  Some day.
852*0Sstevel@tonic-gate  */
853*0Sstevel@tonic-gate caddr_t
854*0Sstevel@tonic-gate smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
855*0Sstevel@tonic-gate {
856*0Sstevel@tonic-gate 	struct file *fp;
857*0Sstevel@tonic-gate 	int error;
858*0Sstevel@tonic-gate 
859*0Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
860*0Sstevel@tonic-gate 		error = EINVAL;
861*0Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
862*0Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
863*0Sstevel@tonic-gate 		    NULL, (offset_t)pos);
864*0Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
865*0Sstevel@tonic-gate 		error = smmap_common(&addr, len, prot, flags,
866*0Sstevel@tonic-gate 		    fp, (offset_t)pos);
867*0Sstevel@tonic-gate 		releasef(fd);
868*0Sstevel@tonic-gate 	} else
869*0Sstevel@tonic-gate 		error = EBADF;
870*0Sstevel@tonic-gate 
871*0Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
872*0Sstevel@tonic-gate }
873*0Sstevel@tonic-gate #endif	/* _LP64 */
874*0Sstevel@tonic-gate 
875*0Sstevel@tonic-gate #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
876*0Sstevel@tonic-gate 
877*0Sstevel@tonic-gate /*
878*0Sstevel@tonic-gate  * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
879*0Sstevel@tonic-gate  */
880*0Sstevel@tonic-gate caddr_t
881*0Sstevel@tonic-gate smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
882*0Sstevel@tonic-gate {
883*0Sstevel@tonic-gate 	struct file *fp;
884*0Sstevel@tonic-gate 	int error;
885*0Sstevel@tonic-gate 	caddr_t a = (caddr_t)(uintptr_t)addr;
886*0Sstevel@tonic-gate 
887*0Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
888*0Sstevel@tonic-gate 		error = EINVAL;
889*0Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
890*0Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
891*0Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, (offset_t)pos);
892*0Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
893*0Sstevel@tonic-gate 		error = smmap_common(&a, (size_t)len, prot,
894*0Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, (offset_t)pos);
895*0Sstevel@tonic-gate 		releasef(fd);
896*0Sstevel@tonic-gate 	} else
897*0Sstevel@tonic-gate 		error = EBADF;
898*0Sstevel@tonic-gate 
899*0Sstevel@tonic-gate 	ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
900*0Sstevel@tonic-gate 
901*0Sstevel@tonic-gate 	return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
902*0Sstevel@tonic-gate }
903*0Sstevel@tonic-gate 
904*0Sstevel@tonic-gate /*
905*0Sstevel@tonic-gate  * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
906*0Sstevel@tonic-gate  *
907*0Sstevel@tonic-gate  * Now things really get ugly because we can't use the C-style
908*0Sstevel@tonic-gate  * calling convention for more than 6 args, and 64-bit parameter
909*0Sstevel@tonic-gate  * passing on 32-bit systems is less than clean.
910*0Sstevel@tonic-gate  */
911*0Sstevel@tonic-gate 
912*0Sstevel@tonic-gate struct mmaplf32a {
913*0Sstevel@tonic-gate 	caddr_t addr;
914*0Sstevel@tonic-gate 	size_t len;
915*0Sstevel@tonic-gate #ifdef _LP64
916*0Sstevel@tonic-gate 	/*
917*0Sstevel@tonic-gate 	 * 32-bit contents, 64-bit cells
918*0Sstevel@tonic-gate 	 */
919*0Sstevel@tonic-gate 	uint64_t prot;
920*0Sstevel@tonic-gate 	uint64_t flags;
921*0Sstevel@tonic-gate 	uint64_t fd;
922*0Sstevel@tonic-gate 	uint64_t offhi;
923*0Sstevel@tonic-gate 	uint64_t offlo;
924*0Sstevel@tonic-gate #else
925*0Sstevel@tonic-gate 	/*
926*0Sstevel@tonic-gate 	 * 32-bit contents, 32-bit cells
927*0Sstevel@tonic-gate 	 */
928*0Sstevel@tonic-gate 	uint32_t prot;
929*0Sstevel@tonic-gate 	uint32_t flags;
930*0Sstevel@tonic-gate 	uint32_t fd;
931*0Sstevel@tonic-gate 	uint32_t offhi;
932*0Sstevel@tonic-gate 	uint32_t offlo;
933*0Sstevel@tonic-gate #endif
934*0Sstevel@tonic-gate };
935*0Sstevel@tonic-gate 
936*0Sstevel@tonic-gate int
937*0Sstevel@tonic-gate smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
938*0Sstevel@tonic-gate {
939*0Sstevel@tonic-gate 	struct file *fp;
940*0Sstevel@tonic-gate 	int error;
941*0Sstevel@tonic-gate 	caddr_t a = uap->addr;
942*0Sstevel@tonic-gate 	int flags = (int)uap->flags;
943*0Sstevel@tonic-gate 	int fd = (int)uap->fd;
944*0Sstevel@tonic-gate #ifdef _BIG_ENDIAN
945*0Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
946*0Sstevel@tonic-gate #else
947*0Sstevel@tonic-gate 	offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
948*0Sstevel@tonic-gate #endif
949*0Sstevel@tonic-gate 
950*0Sstevel@tonic-gate 	if (flags & _MAP_LOW32)
951*0Sstevel@tonic-gate 		error = EINVAL;
952*0Sstevel@tonic-gate 	else if (fd == -1 && (flags & MAP_ANON) != 0)
953*0Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
954*0Sstevel@tonic-gate 		    flags | _MAP_LOW32, NULL, off);
955*0Sstevel@tonic-gate 	else if ((fp = getf(fd)) != NULL) {
956*0Sstevel@tonic-gate 		error = smmap_common(&a, uap->len, (int)uap->prot,
957*0Sstevel@tonic-gate 		    flags | _MAP_LOW32, fp, off);
958*0Sstevel@tonic-gate 		releasef(fd);
959*0Sstevel@tonic-gate 	} else
960*0Sstevel@tonic-gate 		error = EBADF;
961*0Sstevel@tonic-gate 
962*0Sstevel@tonic-gate 	if (error == 0)
963*0Sstevel@tonic-gate 		rvp->r_val1 = (uintptr_t)a;
964*0Sstevel@tonic-gate 	return (error);
965*0Sstevel@tonic-gate }
966*0Sstevel@tonic-gate 
967*0Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL || _ILP32 */
968*0Sstevel@tonic-gate 
969*0Sstevel@tonic-gate int
970*0Sstevel@tonic-gate munmap(caddr_t addr, size_t len)
971*0Sstevel@tonic-gate {
972*0Sstevel@tonic-gate 	struct proc *p = curproc;
973*0Sstevel@tonic-gate 	struct as *as = p->p_as;
974*0Sstevel@tonic-gate 
975*0Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
976*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
977*0Sstevel@tonic-gate 
978*0Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
979*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
980*0Sstevel@tonic-gate 
981*0Sstevel@tonic-gate 	/*
982*0Sstevel@tonic-gate 	 * Discard lwpchan mappings.
983*0Sstevel@tonic-gate 	 */
984*0Sstevel@tonic-gate 	if (p->p_lcp != NULL)
985*0Sstevel@tonic-gate 		lwpchan_delete_mapping(p, addr, addr + len);
986*0Sstevel@tonic-gate 	if (as_unmap(as, addr, len) != 0)
987*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
988*0Sstevel@tonic-gate 
989*0Sstevel@tonic-gate 	return (0);
990*0Sstevel@tonic-gate }
991*0Sstevel@tonic-gate 
992*0Sstevel@tonic-gate int
993*0Sstevel@tonic-gate mprotect(caddr_t addr, size_t len, int prot)
994*0Sstevel@tonic-gate {
995*0Sstevel@tonic-gate 	struct as *as = curproc->p_as;
996*0Sstevel@tonic-gate 	uint_t uprot = prot | PROT_USER;
997*0Sstevel@tonic-gate 	int error;
998*0Sstevel@tonic-gate 
999*0Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
1000*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
1001*0Sstevel@tonic-gate 
1002*0Sstevel@tonic-gate 	switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
1003*0Sstevel@tonic-gate 	case RANGE_OKAY:
1004*0Sstevel@tonic-gate 		break;
1005*0Sstevel@tonic-gate 	case RANGE_BADPROT:
1006*0Sstevel@tonic-gate 		return (set_errno(ENOTSUP));
1007*0Sstevel@tonic-gate 	case RANGE_BADADDR:
1008*0Sstevel@tonic-gate 	default:
1009*0Sstevel@tonic-gate 		return (set_errno(ENOMEM));
1010*0Sstevel@tonic-gate 	}
1011*0Sstevel@tonic-gate 
1012*0Sstevel@tonic-gate 	error = as_setprot(as, addr, len, uprot);
1013*0Sstevel@tonic-gate 	if (error)
1014*0Sstevel@tonic-gate 		return (set_errno(error));
1015*0Sstevel@tonic-gate 	return (0);
1016*0Sstevel@tonic-gate }
1017*0Sstevel@tonic-gate 
1018*0Sstevel@tonic-gate #define	MC_CACHE	128			/* internal result buffer */
1019*0Sstevel@tonic-gate #define	MC_QUANTUM	(MC_CACHE * PAGESIZE)	/* addresses covered in loop */
1020*0Sstevel@tonic-gate 
1021*0Sstevel@tonic-gate int
1022*0Sstevel@tonic-gate mincore(caddr_t addr, size_t len, char *vecp)
1023*0Sstevel@tonic-gate {
1024*0Sstevel@tonic-gate 	struct as *as = curproc->p_as;
1025*0Sstevel@tonic-gate 	caddr_t ea;			/* end address of loop */
1026*0Sstevel@tonic-gate 	size_t rl;			/* inner result length */
1027*0Sstevel@tonic-gate 	char vec[MC_CACHE];		/* local vector cache */
1028*0Sstevel@tonic-gate 	int error;
1029*0Sstevel@tonic-gate 	model_t model;
1030*0Sstevel@tonic-gate 	long	llen;
1031*0Sstevel@tonic-gate 
1032*0Sstevel@tonic-gate 	model = get_udatamodel();
1033*0Sstevel@tonic-gate 	/*
1034*0Sstevel@tonic-gate 	 * Validate form of address parameters.
1035*0Sstevel@tonic-gate 	 */
1036*0Sstevel@tonic-gate 	if (model == DATAMODEL_NATIVE) {
1037*0Sstevel@tonic-gate 		llen = (long)len;
1038*0Sstevel@tonic-gate 	} else {
1039*0Sstevel@tonic-gate 		llen = (int32_t)(size32_t)len;
1040*0Sstevel@tonic-gate 	}
1041*0Sstevel@tonic-gate 	if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1042*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
1043*0Sstevel@tonic-gate 
1044*0Sstevel@tonic-gate 	if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1045*0Sstevel@tonic-gate 		return (set_errno(ENOMEM));
1046*0Sstevel@tonic-gate 
1047*0Sstevel@tonic-gate 	/*
1048*0Sstevel@tonic-gate 	 * Loop over subranges of interval [addr : addr + len), recovering
1049*0Sstevel@tonic-gate 	 * results internally and then copying them out to caller.  Subrange
1050*0Sstevel@tonic-gate 	 * is based on the size of MC_CACHE, defined above.
1051*0Sstevel@tonic-gate 	 */
1052*0Sstevel@tonic-gate 	for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1053*0Sstevel@tonic-gate 		error = as_incore(as, addr,
1054*0Sstevel@tonic-gate 		    (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1055*0Sstevel@tonic-gate 		if (rl != 0) {
1056*0Sstevel@tonic-gate 			rl = (rl + PAGESIZE - 1) / PAGESIZE;
1057*0Sstevel@tonic-gate 			if (copyout(vec, vecp, rl) != 0)
1058*0Sstevel@tonic-gate 				return (set_errno(EFAULT));
1059*0Sstevel@tonic-gate 			vecp += rl;
1060*0Sstevel@tonic-gate 		}
1061*0Sstevel@tonic-gate 		if (error != 0)
1062*0Sstevel@tonic-gate 			return (set_errno(ENOMEM));
1063*0Sstevel@tonic-gate 	}
1064*0Sstevel@tonic-gate 	return (0);
1065*0Sstevel@tonic-gate }
1066