xref: /onnv-gate/usr/src/uts/common/os/shm.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28*0Sstevel@tonic-gate /*	  All Rights Reserved	*/
29*0Sstevel@tonic-gate 
30*0Sstevel@tonic-gate /*
31*0Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
32*0Sstevel@tonic-gate  * The Regents of the University of California
33*0Sstevel@tonic-gate  * All Rights Reserved
34*0Sstevel@tonic-gate  *
35*0Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
36*0Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
37*0Sstevel@tonic-gate  * contributors.
38*0Sstevel@tonic-gate  */
39*0Sstevel@tonic-gate 
40*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
41*0Sstevel@tonic-gate 
42*0Sstevel@tonic-gate /*
43*0Sstevel@tonic-gate  * Inter-Process Communication Shared Memory Facility.
44*0Sstevel@tonic-gate  *
45*0Sstevel@tonic-gate  * See os/ipc.c for a description of common IPC functionality.
46*0Sstevel@tonic-gate  *
47*0Sstevel@tonic-gate  * Resource controls
48*0Sstevel@tonic-gate  * -----------------
49*0Sstevel@tonic-gate  *
50*0Sstevel@tonic-gate  * Control:      project.max-shm-ids (rc_project_shmmni)
51*0Sstevel@tonic-gate  * Description:  Maximum number of shared memory ids allowed a project.
52*0Sstevel@tonic-gate  *
53*0Sstevel@tonic-gate  *   When shmget() is used to allocate a shared memory segment, one id
54*0Sstevel@tonic-gate  *   is allocated.  If the id allocation doesn't succeed, shmget()
55*0Sstevel@tonic-gate  *   fails and errno is set to ENOSPC.  Upon successful shmctl(,
56*0Sstevel@tonic-gate  *   IPC_RMID) the id is deallocated.
57*0Sstevel@tonic-gate  *
58*0Sstevel@tonic-gate  * Control:      project.max-shm-memory (rc_project_shmmax)
59*0Sstevel@tonic-gate  * Description:  Total amount of shared memory allowed a project.
60*0Sstevel@tonic-gate  *
61*0Sstevel@tonic-gate  *   When shmget() is used to allocate a shared memory segment, the
62*0Sstevel@tonic-gate  *   segment's size is allocated against this limit.  If the space
63*0Sstevel@tonic-gate  *   allocation doesn't succeed, shmget() fails and errno is set to
64*0Sstevel@tonic-gate  *   EINVAL.  The size will be deallocated once the last process has
65*0Sstevel@tonic-gate  *   detached the segment and the segment has been successfully
66*0Sstevel@tonic-gate  *   shmctl(, IPC_RMID)ed.
67*0Sstevel@tonic-gate  */
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate #include <sys/types.h>
70*0Sstevel@tonic-gate #include <sys/param.h>
71*0Sstevel@tonic-gate #include <sys/cred.h>
72*0Sstevel@tonic-gate #include <sys/errno.h>
73*0Sstevel@tonic-gate #include <sys/time.h>
74*0Sstevel@tonic-gate #include <sys/kmem.h>
75*0Sstevel@tonic-gate #include <sys/user.h>
76*0Sstevel@tonic-gate #include <sys/proc.h>
77*0Sstevel@tonic-gate #include <sys/systm.h>
78*0Sstevel@tonic-gate #include <sys/prsystm.h>
79*0Sstevel@tonic-gate #include <sys/sysmacros.h>
80*0Sstevel@tonic-gate #include <sys/tuneable.h>
81*0Sstevel@tonic-gate #include <sys/vm.h>
82*0Sstevel@tonic-gate #include <sys/mman.h>
83*0Sstevel@tonic-gate #include <sys/swap.h>
84*0Sstevel@tonic-gate #include <sys/cmn_err.h>
85*0Sstevel@tonic-gate #include <sys/debug.h>
86*0Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
87*0Sstevel@tonic-gate #include <sys/avl.h>
88*0Sstevel@tonic-gate #include <sys/modctl.h>
89*0Sstevel@tonic-gate #include <sys/syscall.h>
90*0Sstevel@tonic-gate #include <sys/task.h>
91*0Sstevel@tonic-gate #include <sys/project.h>
92*0Sstevel@tonic-gate #include <sys/policy.h>
93*0Sstevel@tonic-gate #include <sys/zone.h>
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate #include <sys/ipc.h>
96*0Sstevel@tonic-gate #include <sys/ipc_impl.h>
97*0Sstevel@tonic-gate #include <sys/shm.h>
98*0Sstevel@tonic-gate #include <sys/shm_impl.h>
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate #include <vm/hat.h>
101*0Sstevel@tonic-gate #include <vm/seg.h>
102*0Sstevel@tonic-gate #include <vm/as.h>
103*0Sstevel@tonic-gate #include <vm/seg_vn.h>
104*0Sstevel@tonic-gate #include <vm/anon.h>
105*0Sstevel@tonic-gate #include <vm/page.h>
106*0Sstevel@tonic-gate #include <vm/vpage.h>
107*0Sstevel@tonic-gate #include <vm/seg_spt.h>
108*0Sstevel@tonic-gate 
109*0Sstevel@tonic-gate #include <c2/audit.h>
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate static int shmem_lock(struct anon_map *amp);
112*0Sstevel@tonic-gate static void shmem_unlock(struct anon_map *amp, uint_t lck);
113*0Sstevel@tonic-gate static void sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags,
114*0Sstevel@tonic-gate 	kshmid_t *id);
115*0Sstevel@tonic-gate static void shm_rm_amp(struct anon_map *amp, uint_t lckflag);
116*0Sstevel@tonic-gate static void shm_dtor(kipc_perm_t *);
117*0Sstevel@tonic-gate static void shm_rmid(kipc_perm_t *);
118*0Sstevel@tonic-gate static void shm_remove_zone(zoneid_t, void *);
119*0Sstevel@tonic-gate 
120*0Sstevel@tonic-gate /*
121*0Sstevel@tonic-gate  * Semantics for share_page_table and ism_off:
122*0Sstevel@tonic-gate  *
123*0Sstevel@tonic-gate  * These are hooks in /etc/system - only for internal testing purpose.
124*0Sstevel@tonic-gate  *
125*0Sstevel@tonic-gate  * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
126*0Sstevel@tonic-gate  * in a call to shmat(2). In other words, with share_page_table set, you always
127*0Sstevel@tonic-gate  * get ISM, even if say, DISM is specified. It should really be called "ism_on".
128*0Sstevel@tonic-gate  *
129*0Sstevel@tonic-gate  * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
130*0Sstevel@tonic-gate  * shmat(2).
131*0Sstevel@tonic-gate  *
132*0Sstevel@tonic-gate  * If both share_page_table and ism_off are set, share_page_table prevails.
133*0Sstevel@tonic-gate  *
134*0Sstevel@tonic-gate  * Although these tunables should probably be removed, they do have some
135*0Sstevel@tonic-gate  * external exposure; as long as they exist, they should at least work sensibly.
136*0Sstevel@tonic-gate  */
137*0Sstevel@tonic-gate 
138*0Sstevel@tonic-gate int share_page_table;
139*0Sstevel@tonic-gate int ism_off;
140*0Sstevel@tonic-gate 
141*0Sstevel@tonic-gate /*
142*0Sstevel@tonic-gate  * The following tunables are obsolete.  Though for compatibility we
143*0Sstevel@tonic-gate  * still read and interpret shminfo_shmmax and shminfo_shmmni (see
144*0Sstevel@tonic-gate  * os/project.c), the preferred mechanism for administrating the IPC
145*0Sstevel@tonic-gate  * Shared Memory facility is through the resource controls described at
146*0Sstevel@tonic-gate  * the top of this file.
147*0Sstevel@tonic-gate  */
148*0Sstevel@tonic-gate size_t	shminfo_shmmax = 0x800000;	/* (obsolete) */
149*0Sstevel@tonic-gate int	shminfo_shmmni = 100;		/* (obsolete) */
150*0Sstevel@tonic-gate size_t	shminfo_shmmin = 1;		/* (obsolete) */
151*0Sstevel@tonic-gate int	shminfo_shmseg = 6;		/* (obsolete) */
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate extern rctl_hndl_t rc_project_shmmax;
154*0Sstevel@tonic-gate extern rctl_hndl_t rc_project_shmmni;
155*0Sstevel@tonic-gate static ipc_service_t *shm_svc;
156*0Sstevel@tonic-gate static zone_key_t shm_zone_key;
157*0Sstevel@tonic-gate 
158*0Sstevel@tonic-gate /*
159*0Sstevel@tonic-gate  * Module linkage information for the kernel.
160*0Sstevel@tonic-gate  */
161*0Sstevel@tonic-gate static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate static struct sysent ipcshm_sysent = {
164*0Sstevel@tonic-gate 	4,
165*0Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
166*0Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
167*0Sstevel@tonic-gate #else	/* _SYSCALL32_IMPL */
168*0Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
169*0Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
170*0Sstevel@tonic-gate 	(int (*)())shmsys
171*0Sstevel@tonic-gate };
172*0Sstevel@tonic-gate 
173*0Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
174*0Sstevel@tonic-gate static struct sysent ipcshm_sysent32 = {
175*0Sstevel@tonic-gate 	4,
176*0Sstevel@tonic-gate 	SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
177*0Sstevel@tonic-gate 	(int (*)())shmsys
178*0Sstevel@tonic-gate };
179*0Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
180*0Sstevel@tonic-gate 
181*0Sstevel@tonic-gate static struct modlsys modlsys = {
182*0Sstevel@tonic-gate 	&mod_syscallops, "System V shared memory", &ipcshm_sysent
183*0Sstevel@tonic-gate };
184*0Sstevel@tonic-gate 
185*0Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
186*0Sstevel@tonic-gate static struct modlsys modlsys32 = {
187*0Sstevel@tonic-gate 	&mod_syscallops32, "32-bit System V shared memory", &ipcshm_sysent32
188*0Sstevel@tonic-gate };
189*0Sstevel@tonic-gate #endif	/* _SYSCALL32_IMPL */
190*0Sstevel@tonic-gate 
191*0Sstevel@tonic-gate static struct modlinkage modlinkage = {
192*0Sstevel@tonic-gate 	MODREV_1,
193*0Sstevel@tonic-gate 	&modlsys,
194*0Sstevel@tonic-gate #ifdef	_SYSCALL32_IMPL
195*0Sstevel@tonic-gate 	&modlsys32,
196*0Sstevel@tonic-gate #endif
197*0Sstevel@tonic-gate 	NULL
198*0Sstevel@tonic-gate };
199*0Sstevel@tonic-gate 
200*0Sstevel@tonic-gate 
201*0Sstevel@tonic-gate int
202*0Sstevel@tonic-gate _init(void)
203*0Sstevel@tonic-gate {
204*0Sstevel@tonic-gate 	int result;
205*0Sstevel@tonic-gate 
206*0Sstevel@tonic-gate 	shm_svc = ipcs_create("shmids", rc_project_shmmni, sizeof (kshmid_t),
207*0Sstevel@tonic-gate 	    shm_dtor, shm_rmid, AT_IPC_SHM,
208*0Sstevel@tonic-gate 	    offsetof(kproject_data_t, kpd_shmmni));
209*0Sstevel@tonic-gate 	zone_key_create(&shm_zone_key, NULL, shm_remove_zone, NULL);
210*0Sstevel@tonic-gate 
211*0Sstevel@tonic-gate 	if ((result = mod_install(&modlinkage)) == 0)
212*0Sstevel@tonic-gate 		return (0);
213*0Sstevel@tonic-gate 
214*0Sstevel@tonic-gate 	(void) zone_key_delete(shm_zone_key);
215*0Sstevel@tonic-gate 	ipcs_destroy(shm_svc);
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	return (result);
218*0Sstevel@tonic-gate }
219*0Sstevel@tonic-gate 
220*0Sstevel@tonic-gate int
221*0Sstevel@tonic-gate _fini(void)
222*0Sstevel@tonic-gate {
223*0Sstevel@tonic-gate 	return (EBUSY);
224*0Sstevel@tonic-gate }
225*0Sstevel@tonic-gate 
226*0Sstevel@tonic-gate int
227*0Sstevel@tonic-gate _info(struct modinfo *modinfop)
228*0Sstevel@tonic-gate {
229*0Sstevel@tonic-gate 	return (mod_info(&modlinkage, modinfop));
230*0Sstevel@tonic-gate }
231*0Sstevel@tonic-gate 
232*0Sstevel@tonic-gate /*
233*0Sstevel@tonic-gate  * Shmat (attach shared segment) system call.
234*0Sstevel@tonic-gate  */
235*0Sstevel@tonic-gate static int
236*0Sstevel@tonic-gate shmat(int shmid, caddr_t uaddr, int uflags, uintptr_t *rvp)
237*0Sstevel@tonic-gate {
238*0Sstevel@tonic-gate 	kshmid_t *sp;	/* shared memory header ptr */
239*0Sstevel@tonic-gate 	size_t	size;
240*0Sstevel@tonic-gate 	int	error = 0;
241*0Sstevel@tonic-gate 	proc_t *pp = curproc;
242*0Sstevel@tonic-gate 	struct as *as = pp->p_as;
243*0Sstevel@tonic-gate 	struct segvn_crargs	crargs;	/* segvn create arguments */
244*0Sstevel@tonic-gate 	kmutex_t	*lock;
245*0Sstevel@tonic-gate 	struct seg 	*segspt = NULL;
246*0Sstevel@tonic-gate 	caddr_t		addr = uaddr;
247*0Sstevel@tonic-gate 	int		flags = (uflags & SHMAT_VALID_FLAGS_MASK);
248*0Sstevel@tonic-gate 	int		useISM;
249*0Sstevel@tonic-gate 	uchar_t		prot = PROT_ALL;
250*0Sstevel@tonic-gate 	int result;
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
253*0Sstevel@tonic-gate 		return (EINVAL);
254*0Sstevel@tonic-gate 	if (error = ipcperm_access(&sp->shm_perm, SHM_R, CRED()))
255*0Sstevel@tonic-gate 		goto errret;
256*0Sstevel@tonic-gate 	if ((flags & SHM_RDONLY) == 0 &&
257*0Sstevel@tonic-gate 	    (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
258*0Sstevel@tonic-gate 		goto errret;
259*0Sstevel@tonic-gate 	if (spt_invalid(flags)) {
260*0Sstevel@tonic-gate 		error = EINVAL;
261*0Sstevel@tonic-gate 		goto errret;
262*0Sstevel@tonic-gate 	}
263*0Sstevel@tonic-gate 	if (ism_off)
264*0Sstevel@tonic-gate 		flags = flags & ~SHM_SHARE_MMU;
265*0Sstevel@tonic-gate 	if (share_page_table) {
266*0Sstevel@tonic-gate 		flags = flags & ~SHM_PAGEABLE;
267*0Sstevel@tonic-gate 		flags = flags | SHM_SHARE_MMU;
268*0Sstevel@tonic-gate 	}
269*0Sstevel@tonic-gate 	useISM = (spt_locked(flags) || spt_pageable(flags));
270*0Sstevel@tonic-gate 	if (useISM && (error = ipcperm_access(&sp->shm_perm, SHM_W, CRED())))
271*0Sstevel@tonic-gate 		goto errret;
272*0Sstevel@tonic-gate 	if (useISM && isspt(sp)) {
273*0Sstevel@tonic-gate 		uint_t newsptflags = flags | spt_flags(sp->shm_sptseg);
274*0Sstevel@tonic-gate 		/*
275*0Sstevel@tonic-gate 		 * If trying to change an existing {D}ISM segment from ISM
276*0Sstevel@tonic-gate 		 * to DISM or vice versa, return error. Note that this
277*0Sstevel@tonic-gate 		 * validation of flags needs to be done after the effect of
278*0Sstevel@tonic-gate 		 * tunables such as ism_off and share_page_table, for
279*0Sstevel@tonic-gate 		 * semantics that are consistent with the tunables' settings.
280*0Sstevel@tonic-gate 		 */
281*0Sstevel@tonic-gate 		if (spt_invalid(newsptflags)) {
282*0Sstevel@tonic-gate 			error = EINVAL;
283*0Sstevel@tonic-gate 			goto errret;
284*0Sstevel@tonic-gate 		}
285*0Sstevel@tonic-gate 	}
286*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
287*0Sstevel@tonic-gate 	size = sp->shm_amp->size;
288*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
289*0Sstevel@tonic-gate 
290*0Sstevel@tonic-gate 	/* somewhere to record spt info for final detach */
291*0Sstevel@tonic-gate 	if (sp->shm_sptinfo == NULL)
292*0Sstevel@tonic-gate 		sp->shm_sptinfo = kmem_zalloc(sizeof (sptinfo_t), KM_SLEEP);
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	as_rangelock(as);
295*0Sstevel@tonic-gate 
296*0Sstevel@tonic-gate 	if (useISM) {
297*0Sstevel@tonic-gate 		/*
298*0Sstevel@tonic-gate 		 * Handle ISM
299*0Sstevel@tonic-gate 		 */
300*0Sstevel@tonic-gate 		uint_t	n, share_szc;
301*0Sstevel@tonic-gate 		size_t	share_size;
302*0Sstevel@tonic-gate 		struct	shm_data ssd;
303*0Sstevel@tonic-gate 		uintptr_t align_hint;
304*0Sstevel@tonic-gate 
305*0Sstevel@tonic-gate 		n = page_num_pagesizes();
306*0Sstevel@tonic-gate 		if (n < 2) { /* large pages aren't supported */
307*0Sstevel@tonic-gate 			as_rangeunlock(as);
308*0Sstevel@tonic-gate 			error = EINVAL;
309*0Sstevel@tonic-gate 			goto errret;
310*0Sstevel@tonic-gate 		}
311*0Sstevel@tonic-gate 
312*0Sstevel@tonic-gate 		/*
313*0Sstevel@tonic-gate 		 * Pick a share pagesize to use, if (!isspt(sp)).
314*0Sstevel@tonic-gate 		 * Otherwise use the already chosen page size.
315*0Sstevel@tonic-gate 		 *
316*0Sstevel@tonic-gate 		 * For the initial shmat (!isspt(sp)), where sptcreate is
317*0Sstevel@tonic-gate 		 * called, map_pgsz is called to recommend a [D]ISM pagesize,
318*0Sstevel@tonic-gate 		 * important for systems which offer more than one potential
319*0Sstevel@tonic-gate 		 * [D]ISM pagesize.
320*0Sstevel@tonic-gate 		 * If the shmat is just to attach to an already created
321*0Sstevel@tonic-gate 		 * [D]ISM segment, then use the previously selected page size.
322*0Sstevel@tonic-gate 		 */
323*0Sstevel@tonic-gate 		if (!isspt(sp)) {
324*0Sstevel@tonic-gate 			share_size = map_pgsz(MAPPGSZ_ISM,
325*0Sstevel@tonic-gate 			    pp, addr, size, NULL);
326*0Sstevel@tonic-gate 			if (share_size == 0) {
327*0Sstevel@tonic-gate 				as_rangeunlock(as);
328*0Sstevel@tonic-gate 				error = EINVAL;
329*0Sstevel@tonic-gate 				goto errret;
330*0Sstevel@tonic-gate 			}
331*0Sstevel@tonic-gate 			share_szc = page_szc(share_size);
332*0Sstevel@tonic-gate 		} else {
333*0Sstevel@tonic-gate 			share_szc = sp->shm_sptseg->s_szc;
334*0Sstevel@tonic-gate 			share_size = page_get_pagesize(share_szc);
335*0Sstevel@tonic-gate 		}
336*0Sstevel@tonic-gate 		size = P2ROUNDUP(size, share_size);
337*0Sstevel@tonic-gate 
338*0Sstevel@tonic-gate 		align_hint = share_size;
339*0Sstevel@tonic-gate #if defined(__i386) || defined(__amd64)
340*0Sstevel@tonic-gate 		/*
341*0Sstevel@tonic-gate 		 * For 64 bit amd64, we want to share an entire page table
342*0Sstevel@tonic-gate 		 * if possible. We know (ugh) that there are 512 entries in
343*0Sstevel@tonic-gate 		 * in a page table. The number for 32 bit non-PAE should be
344*0Sstevel@tonic-gate 		 * 1024, but I'm not going to special case that. Note using 512
345*0Sstevel@tonic-gate 		 * won't cause a failure below. It retries with align_hint set
346*0Sstevel@tonic-gate 		 * to share_size
347*0Sstevel@tonic-gate 		 */
348*0Sstevel@tonic-gate 		while (size >= 512 * (uint64_t)align_hint)
349*0Sstevel@tonic-gate 			align_hint *= 512;
350*0Sstevel@tonic-gate #endif /* __i386 || __amd64 */
351*0Sstevel@tonic-gate 
352*0Sstevel@tonic-gate #if defined(__sparcv9)
353*0Sstevel@tonic-gate 		if (addr == 0 && curproc->p_model == DATAMODEL_LP64) {
354*0Sstevel@tonic-gate 			/*
355*0Sstevel@tonic-gate 			 * If no address has been passed in, and this is a
356*0Sstevel@tonic-gate 			 * 64-bit process, we'll try to find an address
357*0Sstevel@tonic-gate 			 * in the predict-ISM zone.
358*0Sstevel@tonic-gate 			 */
359*0Sstevel@tonic-gate 			caddr_t predbase = (caddr_t)PREDISM_1T_BASE;
360*0Sstevel@tonic-gate 			size_t len = PREDISM_BOUND - PREDISM_1T_BASE;
361*0Sstevel@tonic-gate 
362*0Sstevel@tonic-gate 			as_purge(as);
363*0Sstevel@tonic-gate 			if (as_gap(as, size + share_size, &predbase, &len,
364*0Sstevel@tonic-gate 			    AH_LO, (caddr_t)NULL) != -1) {
365*0Sstevel@tonic-gate 				/*
366*0Sstevel@tonic-gate 				 * We found an address which looks like a
367*0Sstevel@tonic-gate 				 * candidate.  We want to round it up, and
368*0Sstevel@tonic-gate 				 * then check that it's a valid user range.
369*0Sstevel@tonic-gate 				 * This assures that we won't fail below.
370*0Sstevel@tonic-gate 				 */
371*0Sstevel@tonic-gate 				addr = (caddr_t)P2ROUNDUP((uintptr_t)predbase,
372*0Sstevel@tonic-gate 				    share_size);
373*0Sstevel@tonic-gate 
374*0Sstevel@tonic-gate 				if (valid_usr_range(addr, size, prot,
375*0Sstevel@tonic-gate 				    as, as->a_userlimit) != RANGE_OKAY) {
376*0Sstevel@tonic-gate 					addr = 0;
377*0Sstevel@tonic-gate 				}
378*0Sstevel@tonic-gate 			}
379*0Sstevel@tonic-gate 		}
380*0Sstevel@tonic-gate #endif /* __sparcv9 */
381*0Sstevel@tonic-gate 
382*0Sstevel@tonic-gate 		if (addr == 0) {
383*0Sstevel@tonic-gate 			for (;;) {
384*0Sstevel@tonic-gate 				addr = (caddr_t)align_hint;
385*0Sstevel@tonic-gate 				map_addr(&addr, size, 0ll, 1, MAP_ALIGN);
386*0Sstevel@tonic-gate 				if (addr != NULL || align_hint == share_size)
387*0Sstevel@tonic-gate 					break;
388*0Sstevel@tonic-gate 				align_hint = share_size;
389*0Sstevel@tonic-gate 			}
390*0Sstevel@tonic-gate 			if (addr == NULL) {
391*0Sstevel@tonic-gate 				as_rangeunlock(as);
392*0Sstevel@tonic-gate 				error = ENOMEM;
393*0Sstevel@tonic-gate 				goto errret;
394*0Sstevel@tonic-gate 			}
395*0Sstevel@tonic-gate 			ASSERT(((uintptr_t)addr & (align_hint - 1)) == 0);
396*0Sstevel@tonic-gate 		} else {
397*0Sstevel@tonic-gate 			/* Use the user-supplied attach address */
398*0Sstevel@tonic-gate 			caddr_t base;
399*0Sstevel@tonic-gate 			size_t len;
400*0Sstevel@tonic-gate 
401*0Sstevel@tonic-gate 			/*
402*0Sstevel@tonic-gate 			 * Check that the address range
403*0Sstevel@tonic-gate 			 *  1) is properly aligned
404*0Sstevel@tonic-gate 			 *  2) is correct in unix terms
405*0Sstevel@tonic-gate 			 *  3) is within an unmapped address segment
406*0Sstevel@tonic-gate 			 */
407*0Sstevel@tonic-gate 			base = addr;
408*0Sstevel@tonic-gate 			len = size;		/* use spt aligned size */
409*0Sstevel@tonic-gate 			/* XXX - in SunOS, is sp->shm_segsz */
410*0Sstevel@tonic-gate 			if ((uintptr_t)base & (share_size - 1)) {
411*0Sstevel@tonic-gate 				error = EINVAL;
412*0Sstevel@tonic-gate 				as_rangeunlock(as);
413*0Sstevel@tonic-gate 				goto errret;
414*0Sstevel@tonic-gate 			}
415*0Sstevel@tonic-gate 			result = valid_usr_range(base, len, prot, as,
416*0Sstevel@tonic-gate 			    as->a_userlimit);
417*0Sstevel@tonic-gate 			if (result == RANGE_BADPROT) {
418*0Sstevel@tonic-gate 				/*
419*0Sstevel@tonic-gate 				 * We try to accomodate processors which
420*0Sstevel@tonic-gate 				 * may not support execute permissions on
421*0Sstevel@tonic-gate 				 * all ISM segments by trying the check
422*0Sstevel@tonic-gate 				 * again but without PROT_EXEC.
423*0Sstevel@tonic-gate 				 */
424*0Sstevel@tonic-gate 				prot &= ~PROT_EXEC;
425*0Sstevel@tonic-gate 				result = valid_usr_range(base, len, prot, as,
426*0Sstevel@tonic-gate 				    as->a_userlimit);
427*0Sstevel@tonic-gate 			}
428*0Sstevel@tonic-gate 			as_purge(as);
429*0Sstevel@tonic-gate 			if (result != RANGE_OKAY ||
430*0Sstevel@tonic-gate 			    as_gap(as, len, &base, &len, AH_LO,
431*0Sstevel@tonic-gate 			    (caddr_t)NULL) != 0) {
432*0Sstevel@tonic-gate 				error = EINVAL;
433*0Sstevel@tonic-gate 				as_rangeunlock(as);
434*0Sstevel@tonic-gate 				goto errret;
435*0Sstevel@tonic-gate 			}
436*0Sstevel@tonic-gate 		}
437*0Sstevel@tonic-gate 
438*0Sstevel@tonic-gate 		if (!isspt(sp)) {
439*0Sstevel@tonic-gate 			error = sptcreate(size, &segspt, sp->shm_amp, prot,
440*0Sstevel@tonic-gate 			    flags, share_szc);
441*0Sstevel@tonic-gate 			if (error) {
442*0Sstevel@tonic-gate 				as_rangeunlock(as);
443*0Sstevel@tonic-gate 				goto errret;
444*0Sstevel@tonic-gate 			}
445*0Sstevel@tonic-gate 			sp->shm_sptinfo->sptas = segspt->s_as;
446*0Sstevel@tonic-gate 			sp->shm_sptseg = segspt;
447*0Sstevel@tonic-gate 			sp->shm_sptprot = prot;
448*0Sstevel@tonic-gate 			sp->shm_lkcnt = 0;
449*0Sstevel@tonic-gate 		} else if ((prot & sp->shm_sptprot) != sp->shm_sptprot) {
450*0Sstevel@tonic-gate 			/*
451*0Sstevel@tonic-gate 			 * Ensure we're attaching to an ISM segment with
452*0Sstevel@tonic-gate 			 * fewer or equal permissions than what we're
453*0Sstevel@tonic-gate 			 * allowed.  Fail if the segment has more
454*0Sstevel@tonic-gate 			 * permissions than what we're allowed.
455*0Sstevel@tonic-gate 			 */
456*0Sstevel@tonic-gate 			error = EACCES;
457*0Sstevel@tonic-gate 			as_rangeunlock(as);
458*0Sstevel@tonic-gate 			goto errret;
459*0Sstevel@tonic-gate 		}
460*0Sstevel@tonic-gate 
461*0Sstevel@tonic-gate 		ssd.shm_sptseg = sp->shm_sptseg;
462*0Sstevel@tonic-gate 		ssd.shm_sptas = sp->shm_sptinfo->sptas;
463*0Sstevel@tonic-gate 		ssd.shm_amp = sp->shm_amp;
464*0Sstevel@tonic-gate 		error = as_map(as, addr, size, segspt_shmattach, &ssd);
465*0Sstevel@tonic-gate 		if (error == 0)
466*0Sstevel@tonic-gate 			sp->shm_ismattch++; /* keep count of ISM attaches */
467*0Sstevel@tonic-gate 	} else {
468*0Sstevel@tonic-gate 
469*0Sstevel@tonic-gate 		/*
470*0Sstevel@tonic-gate 		 * Normal case.
471*0Sstevel@tonic-gate 		 */
472*0Sstevel@tonic-gate 		if (flags & SHM_RDONLY)
473*0Sstevel@tonic-gate 			prot &= ~PROT_WRITE;
474*0Sstevel@tonic-gate 
475*0Sstevel@tonic-gate 		if (addr == 0) {
476*0Sstevel@tonic-gate 			/* Let the system pick the attach address */
477*0Sstevel@tonic-gate 			map_addr(&addr, size, 0ll, 1, 0);
478*0Sstevel@tonic-gate 			if (addr == NULL) {
479*0Sstevel@tonic-gate 				as_rangeunlock(as);
480*0Sstevel@tonic-gate 				error = ENOMEM;
481*0Sstevel@tonic-gate 				goto errret;
482*0Sstevel@tonic-gate 			}
483*0Sstevel@tonic-gate 		} else {
484*0Sstevel@tonic-gate 			/* Use the user-supplied attach address */
485*0Sstevel@tonic-gate 			caddr_t base;
486*0Sstevel@tonic-gate 			size_t len;
487*0Sstevel@tonic-gate 
488*0Sstevel@tonic-gate 			if (flags & SHM_RND)
489*0Sstevel@tonic-gate 				addr = (caddr_t)((uintptr_t)addr &
490*0Sstevel@tonic-gate 				    ~(SHMLBA - 1));
491*0Sstevel@tonic-gate 			/*
492*0Sstevel@tonic-gate 			 * Check that the address range
493*0Sstevel@tonic-gate 			 *  1) is properly aligned
494*0Sstevel@tonic-gate 			 *  2) is correct in unix terms
495*0Sstevel@tonic-gate 			 *  3) is within an unmapped address segment
496*0Sstevel@tonic-gate 			 */
497*0Sstevel@tonic-gate 			base = addr;
498*0Sstevel@tonic-gate 			len = size;		/* use aligned size */
499*0Sstevel@tonic-gate 			/* XXX - in SunOS, is sp->shm_segsz */
500*0Sstevel@tonic-gate 			if ((uintptr_t)base & PAGEOFFSET) {
501*0Sstevel@tonic-gate 				error = EINVAL;
502*0Sstevel@tonic-gate 				as_rangeunlock(as);
503*0Sstevel@tonic-gate 				goto errret;
504*0Sstevel@tonic-gate 			}
505*0Sstevel@tonic-gate 			result = valid_usr_range(base, len, prot, as,
506*0Sstevel@tonic-gate 			    as->a_userlimit);
507*0Sstevel@tonic-gate 			if (result == RANGE_BADPROT) {
508*0Sstevel@tonic-gate 				prot &= ~PROT_EXEC;
509*0Sstevel@tonic-gate 				result = valid_usr_range(base, len, prot, as,
510*0Sstevel@tonic-gate 				    as->a_userlimit);
511*0Sstevel@tonic-gate 			}
512*0Sstevel@tonic-gate 			as_purge(as);
513*0Sstevel@tonic-gate 			if (result != RANGE_OKAY ||
514*0Sstevel@tonic-gate 			    as_gap(as, len, &base, &len,
515*0Sstevel@tonic-gate 			    AH_LO, (caddr_t)NULL) != 0) {
516*0Sstevel@tonic-gate 				error = EINVAL;
517*0Sstevel@tonic-gate 				as_rangeunlock(as);
518*0Sstevel@tonic-gate 				goto errret;
519*0Sstevel@tonic-gate 			}
520*0Sstevel@tonic-gate 		}
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate 		/* Initialize the create arguments and map the segment */
523*0Sstevel@tonic-gate 		crargs = *(struct segvn_crargs *)zfod_argsp;
524*0Sstevel@tonic-gate 		crargs.offset = 0;
525*0Sstevel@tonic-gate 		crargs.type = MAP_SHARED;
526*0Sstevel@tonic-gate 		crargs.amp = sp->shm_amp;
527*0Sstevel@tonic-gate 		crargs.prot = prot;
528*0Sstevel@tonic-gate 		crargs.maxprot = crargs.prot;
529*0Sstevel@tonic-gate 		crargs.flags = 0;
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate 		error = as_map(as, addr, size, segvn_create, &crargs);
532*0Sstevel@tonic-gate 	}
533*0Sstevel@tonic-gate 
534*0Sstevel@tonic-gate 	as_rangeunlock(as);
535*0Sstevel@tonic-gate 	if (error)
536*0Sstevel@tonic-gate 		goto errret;
537*0Sstevel@tonic-gate 
538*0Sstevel@tonic-gate 	/* record shmem range for the detach */
539*0Sstevel@tonic-gate 	sa_add(pp, addr, (size_t)size, useISM ? SHMSA_ISM : 0, sp);
540*0Sstevel@tonic-gate 	*rvp = (uintptr_t)addr;
541*0Sstevel@tonic-gate 
542*0Sstevel@tonic-gate 	sp->shm_atime = gethrestime_sec();
543*0Sstevel@tonic-gate 	sp->shm_lpid = pp->p_pid;
544*0Sstevel@tonic-gate 	ipc_hold(shm_svc, (kipc_perm_t *)sp);
545*0Sstevel@tonic-gate errret:
546*0Sstevel@tonic-gate 	mutex_exit(lock);
547*0Sstevel@tonic-gate 	return (error);
548*0Sstevel@tonic-gate }
549*0Sstevel@tonic-gate 
550*0Sstevel@tonic-gate static void
551*0Sstevel@tonic-gate shm_dtor(kipc_perm_t *perm)
552*0Sstevel@tonic-gate {
553*0Sstevel@tonic-gate 	kshmid_t *sp = (kshmid_t *)perm;
554*0Sstevel@tonic-gate 	uint_t cnt;
555*0Sstevel@tonic-gate 
556*0Sstevel@tonic-gate 	if (sp->shm_sptinfo) {
557*0Sstevel@tonic-gate 		if (isspt(sp))
558*0Sstevel@tonic-gate 			sptdestroy(sp->shm_sptinfo->sptas, sp->shm_amp);
559*0Sstevel@tonic-gate 		kmem_free(sp->shm_sptinfo, sizeof (sptinfo_t));
560*0Sstevel@tonic-gate 	}
561*0Sstevel@tonic-gate 
562*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
563*0Sstevel@tonic-gate 	cnt = --sp->shm_amp->refcnt;
564*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
565*0Sstevel@tonic-gate 	ASSERT(cnt == 0);
566*0Sstevel@tonic-gate 	shm_rm_amp(sp->shm_amp, sp->shm_lkcnt);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 	if (sp->shm_perm.ipc_id != IPC_ID_INVAL) {
569*0Sstevel@tonic-gate 		ipcs_lock(shm_svc);
570*0Sstevel@tonic-gate 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax -=
571*0Sstevel@tonic-gate 		    ptob(btopr(sp->shm_segsz));
572*0Sstevel@tonic-gate 		ipcs_unlock(shm_svc);
573*0Sstevel@tonic-gate 	}
574*0Sstevel@tonic-gate }
575*0Sstevel@tonic-gate 
576*0Sstevel@tonic-gate /* ARGSUSED */
577*0Sstevel@tonic-gate static void
578*0Sstevel@tonic-gate shm_rmid(kipc_perm_t *perm)
579*0Sstevel@tonic-gate {
580*0Sstevel@tonic-gate 	/* nothing to do */
581*0Sstevel@tonic-gate }
582*0Sstevel@tonic-gate 
583*0Sstevel@tonic-gate /*
584*0Sstevel@tonic-gate  * Shmctl system call.
585*0Sstevel@tonic-gate  */
586*0Sstevel@tonic-gate /* ARGSUSED */
587*0Sstevel@tonic-gate static int
588*0Sstevel@tonic-gate shmctl(int shmid, int cmd, void *arg)
589*0Sstevel@tonic-gate {
590*0Sstevel@tonic-gate 	kshmid_t		*sp;	/* shared memory header ptr */
591*0Sstevel@tonic-gate 	STRUCT_DECL(shmid_ds, ds);	/* for SVR4 IPC_SET */
592*0Sstevel@tonic-gate 	int			error = 0;
593*0Sstevel@tonic-gate 	struct cred 		*cr = CRED();
594*0Sstevel@tonic-gate 	kmutex_t		*lock;
595*0Sstevel@tonic-gate 	model_t			mdl = get_udatamodel();
596*0Sstevel@tonic-gate 	struct shmid_ds64	ds64;
597*0Sstevel@tonic-gate 	shmatt_t		nattch;
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate 	STRUCT_INIT(ds, mdl);
600*0Sstevel@tonic-gate 
601*0Sstevel@tonic-gate 	/*
602*0Sstevel@tonic-gate 	 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
603*0Sstevel@tonic-gate 	 */
604*0Sstevel@tonic-gate 	switch (cmd) {
605*0Sstevel@tonic-gate 	case IPC_SET:
606*0Sstevel@tonic-gate 		if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
607*0Sstevel@tonic-gate 			return (EFAULT);
608*0Sstevel@tonic-gate 		break;
609*0Sstevel@tonic-gate 
610*0Sstevel@tonic-gate 	case IPC_SET64:
611*0Sstevel@tonic-gate 		if (copyin(arg, &ds64, sizeof (struct shmid_ds64)))
612*0Sstevel@tonic-gate 			return (EFAULT);
613*0Sstevel@tonic-gate 		break;
614*0Sstevel@tonic-gate 
615*0Sstevel@tonic-gate 	case IPC_RMID:
616*0Sstevel@tonic-gate 		return (ipc_rmid(shm_svc, shmid, cr));
617*0Sstevel@tonic-gate 	}
618*0Sstevel@tonic-gate 
619*0Sstevel@tonic-gate 	if ((lock = ipc_lookup(shm_svc, shmid, (kipc_perm_t **)&sp)) == NULL)
620*0Sstevel@tonic-gate 		return (EINVAL);
621*0Sstevel@tonic-gate 
622*0Sstevel@tonic-gate 	switch (cmd) {
623*0Sstevel@tonic-gate 	/* Set ownership and permissions. */
624*0Sstevel@tonic-gate 	case IPC_SET:
625*0Sstevel@tonic-gate 		if (error = ipcperm_set(shm_svc, cr, &sp->shm_perm,
626*0Sstevel@tonic-gate 		    &STRUCT_BUF(ds)->shm_perm, mdl))
627*0Sstevel@tonic-gate 				break;
628*0Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
629*0Sstevel@tonic-gate 		break;
630*0Sstevel@tonic-gate 
631*0Sstevel@tonic-gate 	case IPC_STAT:
632*0Sstevel@tonic-gate 		if (error = ipcperm_access(&sp->shm_perm, SHM_R, cr))
633*0Sstevel@tonic-gate 			break;
634*0Sstevel@tonic-gate 
635*0Sstevel@tonic-gate 		nattch = sp->shm_perm.ipc_ref - 1;
636*0Sstevel@tonic-gate 
637*0Sstevel@tonic-gate 		ipcperm_stat(&STRUCT_BUF(ds)->shm_perm, &sp->shm_perm, mdl);
638*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_segsz, sp->shm_segsz);
639*0Sstevel@tonic-gate 		STRUCT_FSETP(ds, shm_amp, NULL);	/* kernel addr */
640*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_lkcnt, sp->shm_lkcnt);
641*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_lpid, sp->shm_lpid);
642*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_cpid, sp->shm_cpid);
643*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_nattch, nattch);
644*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_cnattch, sp->shm_ismattch);
645*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_atime, sp->shm_atime);
646*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_dtime, sp->shm_dtime);
647*0Sstevel@tonic-gate 		STRUCT_FSET(ds, shm_ctime, sp->shm_ctime);
648*0Sstevel@tonic-gate 
649*0Sstevel@tonic-gate 		mutex_exit(lock);
650*0Sstevel@tonic-gate 		if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
651*0Sstevel@tonic-gate 			return (EFAULT);
652*0Sstevel@tonic-gate 
653*0Sstevel@tonic-gate 		return (0);
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 	case IPC_SET64:
656*0Sstevel@tonic-gate 		if (error = ipcperm_set64(shm_svc, cr,
657*0Sstevel@tonic-gate 		    &sp->shm_perm, &ds64.shmx_perm))
658*0Sstevel@tonic-gate 			break;
659*0Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
660*0Sstevel@tonic-gate 		break;
661*0Sstevel@tonic-gate 
662*0Sstevel@tonic-gate 	case IPC_STAT64:
663*0Sstevel@tonic-gate 		nattch = sp->shm_perm.ipc_ref - 1;
664*0Sstevel@tonic-gate 
665*0Sstevel@tonic-gate 		ipcperm_stat64(&ds64.shmx_perm, &sp->shm_perm);
666*0Sstevel@tonic-gate 		ds64.shmx_segsz = sp->shm_segsz;
667*0Sstevel@tonic-gate 		ds64.shmx_lkcnt = sp->shm_lkcnt;
668*0Sstevel@tonic-gate 		ds64.shmx_lpid = sp->shm_lpid;
669*0Sstevel@tonic-gate 		ds64.shmx_cpid = sp->shm_cpid;
670*0Sstevel@tonic-gate 		ds64.shmx_nattch = nattch;
671*0Sstevel@tonic-gate 		ds64.shmx_cnattch = sp->shm_ismattch;
672*0Sstevel@tonic-gate 		ds64.shmx_atime = sp->shm_atime;
673*0Sstevel@tonic-gate 		ds64.shmx_dtime = sp->shm_dtime;
674*0Sstevel@tonic-gate 		ds64.shmx_ctime = sp->shm_ctime;
675*0Sstevel@tonic-gate 
676*0Sstevel@tonic-gate 		mutex_exit(lock);
677*0Sstevel@tonic-gate 		if (copyout(&ds64, arg, sizeof (struct shmid_ds64)))
678*0Sstevel@tonic-gate 			return (EFAULT);
679*0Sstevel@tonic-gate 
680*0Sstevel@tonic-gate 		return (0);
681*0Sstevel@tonic-gate 
682*0Sstevel@tonic-gate 	/* Lock segment in memory */
683*0Sstevel@tonic-gate 	case SHM_LOCK:
684*0Sstevel@tonic-gate 		if ((error = secpolicy_lock_memory(cr)) != 0)
685*0Sstevel@tonic-gate 			break;
686*0Sstevel@tonic-gate 
687*0Sstevel@tonic-gate 		if (!isspt(sp) && (sp->shm_lkcnt++ == 0)) {
688*0Sstevel@tonic-gate 			if (error = shmem_lock(sp->shm_amp)) {
689*0Sstevel@tonic-gate 			    ANON_LOCK_ENTER(&sp->shm_amp->a_rwlock, RW_WRITER);
690*0Sstevel@tonic-gate 			    cmn_err(CE_NOTE,
691*0Sstevel@tonic-gate 				"shmctl - couldn't lock %ld pages into memory",
692*0Sstevel@tonic-gate 				sp->shm_amp->size);
693*0Sstevel@tonic-gate 			    ANON_LOCK_EXIT(&sp->shm_amp->a_rwlock);
694*0Sstevel@tonic-gate 			    error = ENOMEM;
695*0Sstevel@tonic-gate 			    sp->shm_lkcnt--;
696*0Sstevel@tonic-gate 			    shmem_unlock(sp->shm_amp, 0);
697*0Sstevel@tonic-gate 			}
698*0Sstevel@tonic-gate 		}
699*0Sstevel@tonic-gate 		break;
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 	/* Unlock segment */
702*0Sstevel@tonic-gate 	case SHM_UNLOCK:
703*0Sstevel@tonic-gate 		if ((error = secpolicy_lock_memory(cr)) != 0)
704*0Sstevel@tonic-gate 			break;
705*0Sstevel@tonic-gate 
706*0Sstevel@tonic-gate 		if (!isspt(sp)) {
707*0Sstevel@tonic-gate 			if (sp->shm_lkcnt && (--sp->shm_lkcnt == 0)) {
708*0Sstevel@tonic-gate 				shmem_unlock(sp->shm_amp, 1);
709*0Sstevel@tonic-gate 			}
710*0Sstevel@tonic-gate 		}
711*0Sstevel@tonic-gate 		break;
712*0Sstevel@tonic-gate 
713*0Sstevel@tonic-gate 	default:
714*0Sstevel@tonic-gate 		error = EINVAL;
715*0Sstevel@tonic-gate 		break;
716*0Sstevel@tonic-gate 	}
717*0Sstevel@tonic-gate 	mutex_exit(lock);
718*0Sstevel@tonic-gate 	return (error);
719*0Sstevel@tonic-gate }
720*0Sstevel@tonic-gate 
721*0Sstevel@tonic-gate static void
722*0Sstevel@tonic-gate shm_detach(proc_t *pp, segacct_t *sap)
723*0Sstevel@tonic-gate {
724*0Sstevel@tonic-gate 	kshmid_t	*sp = sap->sa_id;
725*0Sstevel@tonic-gate 	size_t		len = sap->sa_len;
726*0Sstevel@tonic-gate 	caddr_t		addr = sap->sa_addr;
727*0Sstevel@tonic-gate 
728*0Sstevel@tonic-gate 	/*
729*0Sstevel@tonic-gate 	 * Discard lwpchan mappings.
730*0Sstevel@tonic-gate 	 */
731*0Sstevel@tonic-gate 	if (pp->p_lcp != NULL)
732*0Sstevel@tonic-gate 		lwpchan_delete_mapping(pp, addr, addr + len);
733*0Sstevel@tonic-gate 	(void) as_unmap(pp->p_as, addr, len);
734*0Sstevel@tonic-gate 
735*0Sstevel@tonic-gate 	/*
736*0Sstevel@tonic-gate 	 * Perform some detach-time accounting.
737*0Sstevel@tonic-gate 	 */
738*0Sstevel@tonic-gate 	(void) ipc_lock(shm_svc, sp->shm_perm.ipc_id);
739*0Sstevel@tonic-gate 	if (sap->sa_flags & SHMSA_ISM)
740*0Sstevel@tonic-gate 		sp->shm_ismattch--;
741*0Sstevel@tonic-gate 	sp->shm_dtime = gethrestime_sec();
742*0Sstevel@tonic-gate 	sp->shm_lpid = pp->p_pid;
743*0Sstevel@tonic-gate 	ipc_rele(shm_svc, (kipc_perm_t *)sp);	/* Drops lock */
744*0Sstevel@tonic-gate 
745*0Sstevel@tonic-gate 	kmem_free(sap, sizeof (segacct_t));
746*0Sstevel@tonic-gate }
747*0Sstevel@tonic-gate 
748*0Sstevel@tonic-gate static int
749*0Sstevel@tonic-gate shmdt(caddr_t addr)
750*0Sstevel@tonic-gate {
751*0Sstevel@tonic-gate 	proc_t *pp = curproc;
752*0Sstevel@tonic-gate 	segacct_t *sap, template;
753*0Sstevel@tonic-gate 
754*0Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
755*0Sstevel@tonic-gate 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
756*0Sstevel@tonic-gate 
757*0Sstevel@tonic-gate 	template.sa_addr = addr;
758*0Sstevel@tonic-gate 	template.sa_len = 0;
759*0Sstevel@tonic-gate 	if ((pp->p_segacct == NULL) ||
760*0Sstevel@tonic-gate 	    ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)) {
761*0Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
762*0Sstevel@tonic-gate 		return (EINVAL);
763*0Sstevel@tonic-gate 	}
764*0Sstevel@tonic-gate 	avl_remove(pp->p_segacct, sap);
765*0Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
766*0Sstevel@tonic-gate 
767*0Sstevel@tonic-gate 	shm_detach(pp, sap);
768*0Sstevel@tonic-gate 
769*0Sstevel@tonic-gate 	return (0);
770*0Sstevel@tonic-gate }
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate /*
773*0Sstevel@tonic-gate  * Remove all shared memory segments associated with a given zone.
774*0Sstevel@tonic-gate  * Called by zone_shutdown when the zone is halted.
775*0Sstevel@tonic-gate  */
776*0Sstevel@tonic-gate /*ARGSUSED1*/
777*0Sstevel@tonic-gate static void
778*0Sstevel@tonic-gate shm_remove_zone(zoneid_t zoneid, void *arg)
779*0Sstevel@tonic-gate {
780*0Sstevel@tonic-gate 	ipc_remove_zone(shm_svc, zoneid);
781*0Sstevel@tonic-gate }
782*0Sstevel@tonic-gate 
783*0Sstevel@tonic-gate /*
784*0Sstevel@tonic-gate  * Shmget (create new shmem) system call.
785*0Sstevel@tonic-gate  */
786*0Sstevel@tonic-gate static int
787*0Sstevel@tonic-gate shmget(key_t key, size_t size, int shmflg, uintptr_t *rvp)
788*0Sstevel@tonic-gate {
789*0Sstevel@tonic-gate 	proc_t		*pp = curproc;
790*0Sstevel@tonic-gate 	kshmid_t	*sp;
791*0Sstevel@tonic-gate 	kmutex_t	*lock;
792*0Sstevel@tonic-gate 	int		error;
793*0Sstevel@tonic-gate 
794*0Sstevel@tonic-gate top:
795*0Sstevel@tonic-gate 	if (error = ipc_get(shm_svc, key, shmflg, (kipc_perm_t **)&sp, &lock))
796*0Sstevel@tonic-gate 		return (error);
797*0Sstevel@tonic-gate 
798*0Sstevel@tonic-gate 	if (!IPC_FREE(&sp->shm_perm)) {
799*0Sstevel@tonic-gate 		/*
800*0Sstevel@tonic-gate 		 * A segment with the requested key exists.
801*0Sstevel@tonic-gate 		 */
802*0Sstevel@tonic-gate 		if (size > sp->shm_segsz) {
803*0Sstevel@tonic-gate 			mutex_exit(lock);
804*0Sstevel@tonic-gate 			return (EINVAL);
805*0Sstevel@tonic-gate 		}
806*0Sstevel@tonic-gate 	} else {
807*0Sstevel@tonic-gate 		/*
808*0Sstevel@tonic-gate 		 * A new segment should be created.
809*0Sstevel@tonic-gate 		 */
810*0Sstevel@tonic-gate 		size_t npages = btopr(size);
811*0Sstevel@tonic-gate 		size_t rsize = ptob(npages);
812*0Sstevel@tonic-gate 
813*0Sstevel@tonic-gate 		/*
814*0Sstevel@tonic-gate 		 * Check rsize and the per-project limit on shared
815*0Sstevel@tonic-gate 		 * memory.  Checking rsize handles both the size == 0
816*0Sstevel@tonic-gate 		 * case and the size < ULONG_MAX & PAGEMASK case (i.e.
817*0Sstevel@tonic-gate 		 * rounding up wraps a size_t).
818*0Sstevel@tonic-gate 		 */
819*0Sstevel@tonic-gate 		if (rsize == 0 || (rctl_test(rc_project_shmmax,
820*0Sstevel@tonic-gate 		    pp->p_task->tk_proj->kpj_rctls, pp, rsize,
821*0Sstevel@tonic-gate 		    RCA_SAFE) & RCT_DENY)) {
822*0Sstevel@tonic-gate 
823*0Sstevel@tonic-gate 			mutex_exit(&pp->p_lock);
824*0Sstevel@tonic-gate 			mutex_exit(lock);
825*0Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
826*0Sstevel@tonic-gate 			return (EINVAL);
827*0Sstevel@tonic-gate 		}
828*0Sstevel@tonic-gate 		mutex_exit(&pp->p_lock);
829*0Sstevel@tonic-gate 		mutex_exit(lock);
830*0Sstevel@tonic-gate 
831*0Sstevel@tonic-gate 		if (anon_resv(rsize) == 0) {
832*0Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
833*0Sstevel@tonic-gate 			return (ENOMEM);
834*0Sstevel@tonic-gate 		}
835*0Sstevel@tonic-gate 
836*0Sstevel@tonic-gate 		sp->shm_amp = anonmap_alloc(rsize, rsize);
837*0Sstevel@tonic-gate 
838*0Sstevel@tonic-gate 		/*
839*0Sstevel@tonic-gate 		 * Store the original user's requested size, in bytes,
840*0Sstevel@tonic-gate 		 * rather than the page-aligned size.  The former is
841*0Sstevel@tonic-gate 		 * used for IPC_STAT and shmget() lookups.  The latter
842*0Sstevel@tonic-gate 		 * is saved in the anon_map structure and is used for
843*0Sstevel@tonic-gate 		 * calls to the vm layer.
844*0Sstevel@tonic-gate 		 */
845*0Sstevel@tonic-gate 		sp->shm_segsz = size;
846*0Sstevel@tonic-gate 		sp->shm_atime = sp->shm_dtime = 0;
847*0Sstevel@tonic-gate 		sp->shm_ctime = gethrestime_sec();
848*0Sstevel@tonic-gate 		sp->shm_lpid = (pid_t)0;
849*0Sstevel@tonic-gate 		sp->shm_cpid = curproc->p_pid;
850*0Sstevel@tonic-gate 		sp->shm_ismattch = 0;
851*0Sstevel@tonic-gate 		sp->shm_sptinfo = NULL;
852*0Sstevel@tonic-gate 
853*0Sstevel@tonic-gate 		/*
854*0Sstevel@tonic-gate 		 * Check limits one last time, push id into global
855*0Sstevel@tonic-gate 		 * visibility, and update resource usage counts.
856*0Sstevel@tonic-gate 		 */
857*0Sstevel@tonic-gate 		if (error = ipc_commit_begin(shm_svc, key, shmflg,
858*0Sstevel@tonic-gate 		    (kipc_perm_t *)sp)) {
859*0Sstevel@tonic-gate 			if (error == EAGAIN)
860*0Sstevel@tonic-gate 				goto top;
861*0Sstevel@tonic-gate 			return (error);
862*0Sstevel@tonic-gate 		}
863*0Sstevel@tonic-gate 
864*0Sstevel@tonic-gate 		if (rctl_test(rc_project_shmmax,
865*0Sstevel@tonic-gate 		    sp->shm_perm.ipc_proj->kpj_rctls, pp, rsize,
866*0Sstevel@tonic-gate 		    RCA_SAFE) & RCT_DENY) {
867*0Sstevel@tonic-gate 			ipc_cleanup(shm_svc, (kipc_perm_t *)sp);
868*0Sstevel@tonic-gate 			return (EINVAL);
869*0Sstevel@tonic-gate 		}
870*0Sstevel@tonic-gate 		sp->shm_perm.ipc_proj->kpj_data.kpd_shmmax += rsize;
871*0Sstevel@tonic-gate 
872*0Sstevel@tonic-gate 		lock = ipc_commit_end(shm_svc, &sp->shm_perm);
873*0Sstevel@tonic-gate 	}
874*0Sstevel@tonic-gate 
875*0Sstevel@tonic-gate #ifdef C2_AUDIT
876*0Sstevel@tonic-gate 	if (audit_active)
877*0Sstevel@tonic-gate 		audit_ipcget(AT_IPC_SHM, (void *)sp);
878*0Sstevel@tonic-gate #endif
879*0Sstevel@tonic-gate 
880*0Sstevel@tonic-gate 	*rvp = (uintptr_t)(sp->shm_perm.ipc_id);
881*0Sstevel@tonic-gate 
882*0Sstevel@tonic-gate 	mutex_exit(lock);
883*0Sstevel@tonic-gate 	return (0);
884*0Sstevel@tonic-gate }
885*0Sstevel@tonic-gate 
886*0Sstevel@tonic-gate /*
887*0Sstevel@tonic-gate  * shmids system call.
888*0Sstevel@tonic-gate  */
889*0Sstevel@tonic-gate static int
890*0Sstevel@tonic-gate shmids(int *buf, uint_t nids, uint_t *pnids)
891*0Sstevel@tonic-gate {
892*0Sstevel@tonic-gate 	return (ipc_ids(shm_svc, buf, nids, pnids));
893*0Sstevel@tonic-gate }
894*0Sstevel@tonic-gate 
895*0Sstevel@tonic-gate /*
896*0Sstevel@tonic-gate  * System entry point for shmat, shmctl, shmdt, and shmget system calls.
897*0Sstevel@tonic-gate  */
898*0Sstevel@tonic-gate static uintptr_t
899*0Sstevel@tonic-gate shmsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2)
900*0Sstevel@tonic-gate {
901*0Sstevel@tonic-gate 	int	error;
902*0Sstevel@tonic-gate 	uintptr_t r_val = 0;
903*0Sstevel@tonic-gate 
904*0Sstevel@tonic-gate 	switch (opcode) {
905*0Sstevel@tonic-gate 	case SHMAT:
906*0Sstevel@tonic-gate 		error = shmat((int)a0, (caddr_t)a1, (int)a2, &r_val);
907*0Sstevel@tonic-gate 		break;
908*0Sstevel@tonic-gate 	case SHMCTL:
909*0Sstevel@tonic-gate 		error = shmctl((int)a0, (int)a1, (void *)a2);
910*0Sstevel@tonic-gate 		break;
911*0Sstevel@tonic-gate 	case SHMDT:
912*0Sstevel@tonic-gate 		error = shmdt((caddr_t)a0);
913*0Sstevel@tonic-gate 		break;
914*0Sstevel@tonic-gate 	case SHMGET:
915*0Sstevel@tonic-gate 		error = shmget((key_t)a0, (size_t)a1, (int)a2, &r_val);
916*0Sstevel@tonic-gate 		break;
917*0Sstevel@tonic-gate 	case SHMIDS:
918*0Sstevel@tonic-gate 		error = shmids((int *)a0, (uint_t)a1, (uint_t *)a2);
919*0Sstevel@tonic-gate 		break;
920*0Sstevel@tonic-gate 	default:
921*0Sstevel@tonic-gate 		error = EINVAL;
922*0Sstevel@tonic-gate 		break;
923*0Sstevel@tonic-gate 	}
924*0Sstevel@tonic-gate 
925*0Sstevel@tonic-gate 	if (error)
926*0Sstevel@tonic-gate 		return ((uintptr_t)set_errno(error));
927*0Sstevel@tonic-gate 
928*0Sstevel@tonic-gate 	return (r_val);
929*0Sstevel@tonic-gate }
930*0Sstevel@tonic-gate 
931*0Sstevel@tonic-gate /*
932*0Sstevel@tonic-gate  * segacct_t comparator
933*0Sstevel@tonic-gate  * This works as expected, with one minor change: the first of two real
934*0Sstevel@tonic-gate  * segments with equal addresses is considered to be 'greater than' the
935*0Sstevel@tonic-gate  * second.  We only return equal when searching using a template, in
936*0Sstevel@tonic-gate  * which case we explicitly set the template segment's length to 0
937*0Sstevel@tonic-gate  * (which is invalid for a real segment).
938*0Sstevel@tonic-gate  */
939*0Sstevel@tonic-gate static int
940*0Sstevel@tonic-gate shm_sacompar(const void *x, const void *y)
941*0Sstevel@tonic-gate {
942*0Sstevel@tonic-gate 	segacct_t *sa1 = (segacct_t *)x;
943*0Sstevel@tonic-gate 	segacct_t *sa2 = (segacct_t *)y;
944*0Sstevel@tonic-gate 
945*0Sstevel@tonic-gate 	if (sa1->sa_addr < sa2->sa_addr)
946*0Sstevel@tonic-gate 		return (-1);
947*0Sstevel@tonic-gate 	if (sa1->sa_addr > sa2->sa_addr)
948*0Sstevel@tonic-gate 		return (1);
949*0Sstevel@tonic-gate 	if ((sa1->sa_len == 0) || (sa2->sa_len == 0))
950*0Sstevel@tonic-gate 		return (0);
951*0Sstevel@tonic-gate 	return (1);
952*0Sstevel@tonic-gate }
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate /*
955*0Sstevel@tonic-gate  * add this record to the segacct list.
956*0Sstevel@tonic-gate  */
957*0Sstevel@tonic-gate static void
958*0Sstevel@tonic-gate sa_add(struct proc *pp, caddr_t addr, size_t len, ulong_t flags, kshmid_t *id)
959*0Sstevel@tonic-gate {
960*0Sstevel@tonic-gate 	segacct_t *nsap;
961*0Sstevel@tonic-gate 	avl_tree_t *tree = NULL;
962*0Sstevel@tonic-gate 	avl_index_t where;
963*0Sstevel@tonic-gate 
964*0Sstevel@tonic-gate 	nsap = kmem_alloc(sizeof (segacct_t), KM_SLEEP);
965*0Sstevel@tonic-gate 	nsap->sa_addr = addr;
966*0Sstevel@tonic-gate 	nsap->sa_len  = len;
967*0Sstevel@tonic-gate 	nsap->sa_flags = flags;
968*0Sstevel@tonic-gate 	nsap->sa_id = id;
969*0Sstevel@tonic-gate 
970*0Sstevel@tonic-gate 	if (pp->p_segacct == NULL)
971*0Sstevel@tonic-gate 		tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
972*0Sstevel@tonic-gate 
973*0Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
974*0Sstevel@tonic-gate 	prbarrier(pp);			/* block /proc.  See shmgetid(). */
975*0Sstevel@tonic-gate 
976*0Sstevel@tonic-gate 	if (pp->p_segacct == NULL) {
977*0Sstevel@tonic-gate 		avl_create(tree, shm_sacompar, sizeof (segacct_t),
978*0Sstevel@tonic-gate 		    offsetof(segacct_t, sa_tree));
979*0Sstevel@tonic-gate 		pp->p_segacct = tree;
980*0Sstevel@tonic-gate 	} else if (tree) {
981*0Sstevel@tonic-gate 		kmem_free(tree, sizeof (avl_tree_t));
982*0Sstevel@tonic-gate 	}
983*0Sstevel@tonic-gate 
984*0Sstevel@tonic-gate 	/*
985*0Sstevel@tonic-gate 	 * We can ignore the result of avl_find, as the comparator will
986*0Sstevel@tonic-gate 	 * never return equal for segments with non-zero length.  This
987*0Sstevel@tonic-gate 	 * is a necessary hack to get around the fact that we do, in
988*0Sstevel@tonic-gate 	 * fact, have duplicate keys.
989*0Sstevel@tonic-gate 	 */
990*0Sstevel@tonic-gate 	(void) avl_find(pp->p_segacct, nsap, &where);
991*0Sstevel@tonic-gate 	avl_insert(pp->p_segacct, nsap, where);
992*0Sstevel@tonic-gate 
993*0Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
994*0Sstevel@tonic-gate }
995*0Sstevel@tonic-gate 
996*0Sstevel@tonic-gate /*
997*0Sstevel@tonic-gate  * Duplicate parent's segacct records in child.
998*0Sstevel@tonic-gate  */
999*0Sstevel@tonic-gate void
1000*0Sstevel@tonic-gate shmfork(struct proc *ppp, struct proc *cpp)
1001*0Sstevel@tonic-gate {
1002*0Sstevel@tonic-gate 	segacct_t *sap;
1003*0Sstevel@tonic-gate 	kshmid_t *sp;
1004*0Sstevel@tonic-gate 	kmutex_t *mp;
1005*0Sstevel@tonic-gate 
1006*0Sstevel@tonic-gate 	ASSERT(ppp->p_segacct != NULL);
1007*0Sstevel@tonic-gate 
1008*0Sstevel@tonic-gate 	/*
1009*0Sstevel@tonic-gate 	 * We are the only lwp running in the parent so nobody can
1010*0Sstevel@tonic-gate 	 * mess with our p_segacct list.  Thus it is safe to traverse
1011*0Sstevel@tonic-gate 	 * the list without holding p_lock.  This is essential because
1012*0Sstevel@tonic-gate 	 * we can't hold p_lock during a KM_SLEEP allocation.
1013*0Sstevel@tonic-gate 	 */
1014*0Sstevel@tonic-gate 	for (sap = (segacct_t *)avl_first(ppp->p_segacct); sap != NULL;
1015*0Sstevel@tonic-gate 	    sap = (segacct_t *)AVL_NEXT(ppp->p_segacct, sap)) {
1016*0Sstevel@tonic-gate 		sa_add(cpp, sap->sa_addr, sap->sa_len, sap->sa_flags,
1017*0Sstevel@tonic-gate 		    sap->sa_id);
1018*0Sstevel@tonic-gate 		sp = sap->sa_id;
1019*0Sstevel@tonic-gate 		mp = ipc_lock(shm_svc, sp->shm_perm.ipc_id);
1020*0Sstevel@tonic-gate 		if (sap->sa_flags & SHMSA_ISM)
1021*0Sstevel@tonic-gate 			sp->shm_ismattch++;
1022*0Sstevel@tonic-gate 		ipc_hold(shm_svc, (kipc_perm_t *)sp);
1023*0Sstevel@tonic-gate 		mutex_exit(mp);
1024*0Sstevel@tonic-gate 	}
1025*0Sstevel@tonic-gate }
1026*0Sstevel@tonic-gate 
1027*0Sstevel@tonic-gate /*
1028*0Sstevel@tonic-gate  * Detach shared memory segments from exiting process.
1029*0Sstevel@tonic-gate  */
1030*0Sstevel@tonic-gate void
1031*0Sstevel@tonic-gate shmexit(struct proc *pp)
1032*0Sstevel@tonic-gate {
1033*0Sstevel@tonic-gate 	segacct_t *sap;
1034*0Sstevel@tonic-gate 	avl_tree_t *tree;
1035*0Sstevel@tonic-gate 	void *cookie = NULL;
1036*0Sstevel@tonic-gate 
1037*0Sstevel@tonic-gate 	ASSERT(pp->p_segacct != NULL);
1038*0Sstevel@tonic-gate 
1039*0Sstevel@tonic-gate 	mutex_enter(&pp->p_lock);
1040*0Sstevel@tonic-gate 	prbarrier(pp);
1041*0Sstevel@tonic-gate 	tree = pp->p_segacct;
1042*0Sstevel@tonic-gate 	pp->p_segacct = NULL;
1043*0Sstevel@tonic-gate 	mutex_exit(&pp->p_lock);
1044*0Sstevel@tonic-gate 
1045*0Sstevel@tonic-gate 	while ((sap = avl_destroy_nodes(tree, &cookie)) != NULL)
1046*0Sstevel@tonic-gate 		(void) shm_detach(pp, sap);
1047*0Sstevel@tonic-gate 
1048*0Sstevel@tonic-gate 	avl_destroy(tree);
1049*0Sstevel@tonic-gate 	kmem_free(tree, sizeof (avl_tree_t));
1050*0Sstevel@tonic-gate }
1051*0Sstevel@tonic-gate 
1052*0Sstevel@tonic-gate /*
1053*0Sstevel@tonic-gate  * At this time pages should be in memory, so just lock them.
1054*0Sstevel@tonic-gate  */
1055*0Sstevel@tonic-gate static void
1056*0Sstevel@tonic-gate lock_again(size_t npages, struct anon_map *amp)
1057*0Sstevel@tonic-gate {
1058*0Sstevel@tonic-gate 	struct anon *ap;
1059*0Sstevel@tonic-gate 	struct page *pp;
1060*0Sstevel@tonic-gate 	struct vnode *vp;
1061*0Sstevel@tonic-gate 	anoff_t off;
1062*0Sstevel@tonic-gate 	ulong_t anon_idx;
1063*0Sstevel@tonic-gate 	anon_sync_obj_t cookie;
1064*0Sstevel@tonic-gate 
1065*0Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate 	for (anon_idx = 0; npages != 0; anon_idx++, npages--) {
1068*0Sstevel@tonic-gate 
1069*0Sstevel@tonic-gate 		anon_array_enter(amp, anon_idx, &cookie);
1070*0Sstevel@tonic-gate 		ap = anon_get_ptr(amp->ahp, anon_idx);
1071*0Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
1072*0Sstevel@tonic-gate 		anon_array_exit(&cookie);
1073*0Sstevel@tonic-gate 
1074*0Sstevel@tonic-gate 		pp = page_lookup(vp, (u_offset_t)off, SE_SHARED);
1075*0Sstevel@tonic-gate 		if (pp == NULL) {
1076*0Sstevel@tonic-gate 			panic("lock_again: page not in the system");
1077*0Sstevel@tonic-gate 			/*NOTREACHED*/
1078*0Sstevel@tonic-gate 		}
1079*0Sstevel@tonic-gate 		(void) page_pp_lock(pp, 0, 0);
1080*0Sstevel@tonic-gate 		page_unlock(pp);
1081*0Sstevel@tonic-gate 	}
1082*0Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
1083*0Sstevel@tonic-gate }
1084*0Sstevel@tonic-gate 
1085*0Sstevel@tonic-gate /* check if this segment is already locked. */
1086*0Sstevel@tonic-gate /*ARGSUSED*/
1087*0Sstevel@tonic-gate static int
1088*0Sstevel@tonic-gate check_locked(struct as *as, struct segvn_data *svd, size_t npages)
1089*0Sstevel@tonic-gate {
1090*0Sstevel@tonic-gate 	struct vpage *vpp = svd->vpage;
1091*0Sstevel@tonic-gate 	size_t i;
1092*0Sstevel@tonic-gate 	if (svd->vpage == NULL)
1093*0Sstevel@tonic-gate 		return (0);		/* unlocked */
1094*0Sstevel@tonic-gate 
1095*0Sstevel@tonic-gate 	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
1096*0Sstevel@tonic-gate 	for (i = 0; i < npages; i++, vpp++) {
1097*0Sstevel@tonic-gate 		if (VPP_ISPPLOCK(vpp) == 0) {
1098*0Sstevel@tonic-gate 			SEGVN_LOCK_EXIT(as, &svd->lock);
1099*0Sstevel@tonic-gate 			return (1);	/* partially locked */
1100*0Sstevel@tonic-gate 		}
1101*0Sstevel@tonic-gate 	}
1102*0Sstevel@tonic-gate 	SEGVN_LOCK_EXIT(as, &svd->lock);
1103*0Sstevel@tonic-gate 	return (2);			/* locked */
1104*0Sstevel@tonic-gate }
1105*0Sstevel@tonic-gate 
1106*0Sstevel@tonic-gate 
1107*0Sstevel@tonic-gate /*
1108*0Sstevel@tonic-gate  * Attach the shared memory segment to the process
1109*0Sstevel@tonic-gate  * address space and lock the pages.
1110*0Sstevel@tonic-gate  */
1111*0Sstevel@tonic-gate static int
1112*0Sstevel@tonic-gate shmem_lock(struct anon_map *amp)
1113*0Sstevel@tonic-gate {
1114*0Sstevel@tonic-gate 	size_t npages = btopr(amp->size);
1115*0Sstevel@tonic-gate 	struct seg *seg;
1116*0Sstevel@tonic-gate 	struct as *as;
1117*0Sstevel@tonic-gate 	struct segvn_crargs crargs;
1118*0Sstevel@tonic-gate 	struct segvn_data *svd;
1119*0Sstevel@tonic-gate 	proc_t *p = curproc;
1120*0Sstevel@tonic-gate 	caddr_t addr;
1121*0Sstevel@tonic-gate 	uint_t error, ret;
1122*0Sstevel@tonic-gate 	caddr_t seg_base;
1123*0Sstevel@tonic-gate 	size_t  seg_sz;
1124*0Sstevel@tonic-gate 
1125*0Sstevel@tonic-gate 	as = p->p_as;
1126*0Sstevel@tonic-gate 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1127*0Sstevel@tonic-gate 	/* check if shared memory is already attached */
1128*0Sstevel@tonic-gate 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1129*0Sstevel@tonic-gate 		svd = (struct segvn_data *)seg->s_data;
1130*0Sstevel@tonic-gate 		if ((seg->s_ops == &segvn_ops) && (svd->amp == amp) &&
1131*0Sstevel@tonic-gate 		    (amp->size == seg->s_size)) {
1132*0Sstevel@tonic-gate 			switch (ret = check_locked(as, svd, npages)) {
1133*0Sstevel@tonic-gate 			case 0:			/* unlocked */
1134*0Sstevel@tonic-gate 			case 1:			/* partially locked */
1135*0Sstevel@tonic-gate 				seg_base = seg->s_base;
1136*0Sstevel@tonic-gate 				seg_sz = seg->s_size;
1137*0Sstevel@tonic-gate 
1138*0Sstevel@tonic-gate 				AS_LOCK_EXIT(as, &as->a_lock);
1139*0Sstevel@tonic-gate 				if ((error = as_ctl(as, seg_base, seg_sz,
1140*0Sstevel@tonic-gate 					MC_LOCK, 0, 0, NULL, 0)) == 0)
1141*0Sstevel@tonic-gate 					lock_again(npages, amp);
1142*0Sstevel@tonic-gate 				(void) as_ctl(as, seg_base, seg_sz, MC_UNLOCK,
1143*0Sstevel@tonic-gate 					0, 0, NULL, NULL);
1144*0Sstevel@tonic-gate 				return (error);
1145*0Sstevel@tonic-gate 			case 2:			/* locked */
1146*0Sstevel@tonic-gate 				AS_LOCK_EXIT(as, &as->a_lock);
1147*0Sstevel@tonic-gate 				lock_again(npages, amp);
1148*0Sstevel@tonic-gate 				return (0);
1149*0Sstevel@tonic-gate 			default:
1150*0Sstevel@tonic-gate 				cmn_err(CE_WARN, "shmem_lock: deflt %d", ret);
1151*0Sstevel@tonic-gate 				break;
1152*0Sstevel@tonic-gate 			}
1153*0Sstevel@tonic-gate 		}
1154*0Sstevel@tonic-gate 	}
1155*0Sstevel@tonic-gate 	AS_LOCK_EXIT(as, &as->a_lock);
1156*0Sstevel@tonic-gate 
1157*0Sstevel@tonic-gate 	/* attach shm segment to our address space */
1158*0Sstevel@tonic-gate 	as_rangelock(as);
1159*0Sstevel@tonic-gate 	map_addr(&addr, amp->size, 0ll, 1, 0);
1160*0Sstevel@tonic-gate 	if (addr == NULL) {
1161*0Sstevel@tonic-gate 		as_rangeunlock(as);
1162*0Sstevel@tonic-gate 		return (ENOMEM);
1163*0Sstevel@tonic-gate 	}
1164*0Sstevel@tonic-gate 
1165*0Sstevel@tonic-gate 	/* Initialize the create arguments and map the segment */
1166*0Sstevel@tonic-gate 	crargs = *(struct segvn_crargs *)zfod_argsp;	/* structure copy */
1167*0Sstevel@tonic-gate 	crargs.offset = (u_offset_t)0;
1168*0Sstevel@tonic-gate 	crargs.type = MAP_SHARED;
1169*0Sstevel@tonic-gate 	crargs.amp = amp;
1170*0Sstevel@tonic-gate 	crargs.prot = PROT_ALL;
1171*0Sstevel@tonic-gate 	crargs.maxprot = crargs.prot;
1172*0Sstevel@tonic-gate 	crargs.flags = 0;
1173*0Sstevel@tonic-gate 
1174*0Sstevel@tonic-gate 	error = as_map(as, addr, amp->size, segvn_create, &crargs);
1175*0Sstevel@tonic-gate 	as_rangeunlock(as);
1176*0Sstevel@tonic-gate 	if (!error) {
1177*0Sstevel@tonic-gate 		if ((error = as_ctl(as, addr, amp->size, MC_LOCK, 0, 0,
1178*0Sstevel@tonic-gate 			NULL, 0)) == 0) {
1179*0Sstevel@tonic-gate 			lock_again(npages, amp);
1180*0Sstevel@tonic-gate 		}
1181*0Sstevel@tonic-gate 		(void) as_unmap(as, addr, amp->size);
1182*0Sstevel@tonic-gate 	}
1183*0Sstevel@tonic-gate 	return (error);
1184*0Sstevel@tonic-gate }
1185*0Sstevel@tonic-gate 
1186*0Sstevel@tonic-gate 
1187*0Sstevel@tonic-gate /*
1188*0Sstevel@tonic-gate  * Unlock shared memory
1189*0Sstevel@tonic-gate  */
1190*0Sstevel@tonic-gate static void
1191*0Sstevel@tonic-gate shmem_unlock(struct anon_map *amp, uint_t lck)
1192*0Sstevel@tonic-gate {
1193*0Sstevel@tonic-gate 	struct anon *ap;
1194*0Sstevel@tonic-gate 	pgcnt_t npages = btopr(amp->size);
1195*0Sstevel@tonic-gate 	struct vnode *vp;
1196*0Sstevel@tonic-gate 	struct page *pp;
1197*0Sstevel@tonic-gate 	anoff_t off;
1198*0Sstevel@tonic-gate 	ulong_t anon_idx;
1199*0Sstevel@tonic-gate 
1200*0Sstevel@tonic-gate 	for (anon_idx = 0; anon_idx < npages; anon_idx++) {
1201*0Sstevel@tonic-gate 
1202*0Sstevel@tonic-gate 		if ((ap = anon_get_ptr(amp->ahp, anon_idx)) == NULL) {
1203*0Sstevel@tonic-gate 			if (lck) {
1204*0Sstevel@tonic-gate 				panic("shmem_unlock: null app");
1205*0Sstevel@tonic-gate 				/*NOTREACHED*/
1206*0Sstevel@tonic-gate 			}
1207*0Sstevel@tonic-gate 			continue;
1208*0Sstevel@tonic-gate 		}
1209*0Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
1210*0Sstevel@tonic-gate 		pp = page_lookup(vp, off, SE_SHARED);
1211*0Sstevel@tonic-gate 		if (pp == NULL) {
1212*0Sstevel@tonic-gate 			if (lck) {
1213*0Sstevel@tonic-gate 				panic("shmem_unlock: page not in the system");
1214*0Sstevel@tonic-gate 				/*NOTREACHED*/
1215*0Sstevel@tonic-gate 			}
1216*0Sstevel@tonic-gate 			continue;
1217*0Sstevel@tonic-gate 		}
1218*0Sstevel@tonic-gate 		if (pp->p_lckcnt) {
1219*0Sstevel@tonic-gate 			page_pp_unlock(pp, 0, 0);
1220*0Sstevel@tonic-gate 		}
1221*0Sstevel@tonic-gate 		page_unlock(pp);
1222*0Sstevel@tonic-gate 	}
1223*0Sstevel@tonic-gate }
1224*0Sstevel@tonic-gate 
1225*0Sstevel@tonic-gate /*
1226*0Sstevel@tonic-gate  * We call this routine when we have removed all references to this
1227*0Sstevel@tonic-gate  * amp.  This means all shmdt()s and the IPC_RMID have been done.
1228*0Sstevel@tonic-gate  */
1229*0Sstevel@tonic-gate static void
1230*0Sstevel@tonic-gate shm_rm_amp(struct anon_map *amp, uint_t lckflag)
1231*0Sstevel@tonic-gate {
1232*0Sstevel@tonic-gate 	/*
1233*0Sstevel@tonic-gate 	 * If we are finally deleting the
1234*0Sstevel@tonic-gate 	 * shared memory, and if no one did
1235*0Sstevel@tonic-gate 	 * the SHM_UNLOCK, we must do it now.
1236*0Sstevel@tonic-gate 	 */
1237*0Sstevel@tonic-gate 	shmem_unlock(amp, lckflag);
1238*0Sstevel@tonic-gate 
1239*0Sstevel@tonic-gate 	/*
1240*0Sstevel@tonic-gate 	 * Free up the anon_map.
1241*0Sstevel@tonic-gate 	 */
1242*0Sstevel@tonic-gate 	lgrp_shm_policy_fini(amp, NULL);
1243*0Sstevel@tonic-gate 	anon_free(amp->ahp, 0, amp->size);
1244*0Sstevel@tonic-gate 	anon_unresv(amp->swresv);
1245*0Sstevel@tonic-gate 	anonmap_free(amp);
1246*0Sstevel@tonic-gate }
1247*0Sstevel@tonic-gate 
1248*0Sstevel@tonic-gate /*
1249*0Sstevel@tonic-gate  * Return the shared memory id for the process's virtual address.
1250*0Sstevel@tonic-gate  * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1251*0Sstevel@tonic-gate  * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1252*0Sstevel@tonic-gate  *
1253*0Sstevel@tonic-gate  * shmgetid() is called from code in /proc with the process locked but
1254*0Sstevel@tonic-gate  * with pp->p_lock not held.  The address space lock is held, so we
1255*0Sstevel@tonic-gate  * cannot grab pp->p_lock here due to lock-ordering constraints.
1256*0Sstevel@tonic-gate  * Because of all this, modifications to the p_segacct list must only
1257*0Sstevel@tonic-gate  * be made after calling prbarrier() to ensure the process is not locked.
1258*0Sstevel@tonic-gate  * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1259*0Sstevel@tonic-gate  * thread's own process without the process locked.
1260*0Sstevel@tonic-gate  */
1261*0Sstevel@tonic-gate int
1262*0Sstevel@tonic-gate shmgetid(proc_t *pp, caddr_t addr)
1263*0Sstevel@tonic-gate {
1264*0Sstevel@tonic-gate 	segacct_t *sap, template;
1265*0Sstevel@tonic-gate 
1266*0Sstevel@tonic-gate 	ASSERT(MUTEX_NOT_HELD(&pp->p_lock));
1267*0Sstevel@tonic-gate 	ASSERT((pp->p_proc_flag & P_PR_LOCK) || pp == curproc);
1268*0Sstevel@tonic-gate 
1269*0Sstevel@tonic-gate 	if (pp->p_segacct == NULL)
1270*0Sstevel@tonic-gate 		return (SHMID_NONE);
1271*0Sstevel@tonic-gate 
1272*0Sstevel@tonic-gate 	template.sa_addr = addr;
1273*0Sstevel@tonic-gate 	template.sa_len = 0;
1274*0Sstevel@tonic-gate 	if ((sap = avl_find(pp->p_segacct, &template, NULL)) == NULL)
1275*0Sstevel@tonic-gate 		return (SHMID_NONE);
1276*0Sstevel@tonic-gate 
1277*0Sstevel@tonic-gate 	if (IPC_FREE(&sap->sa_id->shm_perm))
1278*0Sstevel@tonic-gate 		return (SHMID_FREE);
1279*0Sstevel@tonic-gate 
1280*0Sstevel@tonic-gate 	return (sap->sa_id->shm_perm.ipc_id);
1281*0Sstevel@tonic-gate }
1282