1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23*0Sstevel@tonic-gate /*	  All Rights Reserved	*/
24*0Sstevel@tonic-gate 
25*0Sstevel@tonic-gate 
26*0Sstevel@tonic-gate /*
27*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28*0Sstevel@tonic-gate  * Use is subject to license terms.
29*0Sstevel@tonic-gate  */
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #include <sys/param.h>
34*0Sstevel@tonic-gate #include <sys/types.h>
35*0Sstevel@tonic-gate #include <sys/sysmacros.h>
36*0Sstevel@tonic-gate #include <sys/systm.h>
37*0Sstevel@tonic-gate #include <sys/cred.h>
38*0Sstevel@tonic-gate #include <sys/user.h>
39*0Sstevel@tonic-gate #include <sys/errno.h>
40*0Sstevel@tonic-gate #include <sys/file.h>
41*0Sstevel@tonic-gate #include <sys/proc.h>
42*0Sstevel@tonic-gate #include <sys/prsystm.h>
43*0Sstevel@tonic-gate #include <sys/kmem.h>
44*0Sstevel@tonic-gate #include <sys/sobject.h>
45*0Sstevel@tonic-gate #include <sys/fault.h>
46*0Sstevel@tonic-gate #include <sys/procfs.h>
47*0Sstevel@tonic-gate #include <sys/watchpoint.h>
48*0Sstevel@tonic-gate #include <sys/time.h>
49*0Sstevel@tonic-gate #include <sys/cmn_err.h>
50*0Sstevel@tonic-gate #include <sys/machlock.h>
51*0Sstevel@tonic-gate #include <sys/debug.h>
52*0Sstevel@tonic-gate #include <sys/synch.h>
53*0Sstevel@tonic-gate #include <sys/synch32.h>
54*0Sstevel@tonic-gate #include <sys/mman.h>
55*0Sstevel@tonic-gate #include <sys/class.h>
56*0Sstevel@tonic-gate #include <sys/schedctl.h>
57*0Sstevel@tonic-gate #include <sys/sleepq.h>
58*0Sstevel@tonic-gate #include <sys/policy.h>
59*0Sstevel@tonic-gate #include <sys/tnf_probe.h>
60*0Sstevel@tonic-gate #include <sys/lwpchan_impl.h>
61*0Sstevel@tonic-gate #include <sys/turnstile.h>
62*0Sstevel@tonic-gate #include <sys/atomic.h>
63*0Sstevel@tonic-gate #include <sys/lwp_timer_impl.h>
64*0Sstevel@tonic-gate #include <sys/lwp_upimutex_impl.h>
65*0Sstevel@tonic-gate #include <vm/as.h>
66*0Sstevel@tonic-gate #include <sys/sdt.h>
67*0Sstevel@tonic-gate 
68*0Sstevel@tonic-gate static kthread_t *lwpsobj_owner(caddr_t);
69*0Sstevel@tonic-gate static void lwp_unsleep(kthread_t *t);
70*0Sstevel@tonic-gate static void lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip);
71*0Sstevel@tonic-gate static void lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg);
72*0Sstevel@tonic-gate 
73*0Sstevel@tonic-gate extern int lwp_cond_signal(lwp_cond_t *cv);
74*0Sstevel@tonic-gate 
75*0Sstevel@tonic-gate /*
76*0Sstevel@tonic-gate  * Maximum number of user prio inheritance locks that can be held by a thread.
77*0Sstevel@tonic-gate  * Used to limit kmem for each thread. This is a per-thread limit that
78*0Sstevel@tonic-gate  * can be administered on a system wide basis (using /etc/system).
79*0Sstevel@tonic-gate  *
80*0Sstevel@tonic-gate  * Also, when a limit, say maxlwps is added for numbers of lwps within a
81*0Sstevel@tonic-gate  * process, the per-thread limit automatically becomes a process-wide limit
82*0Sstevel@tonic-gate  * of maximum number of held upi locks within a process:
83*0Sstevel@tonic-gate  *      maxheldupimx = maxnestupimx * maxlwps;
84*0Sstevel@tonic-gate  */
85*0Sstevel@tonic-gate static uint32_t maxnestupimx = 2000;
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate /*
88*0Sstevel@tonic-gate  * The sobj_ops vector exports a set of functions needed when a thread
89*0Sstevel@tonic-gate  * is asleep on a synchronization object of this type.
90*0Sstevel@tonic-gate  */
91*0Sstevel@tonic-gate static sobj_ops_t lwp_sobj_ops = {
92*0Sstevel@tonic-gate 	SOBJ_USER, lwpsobj_owner, lwp_unsleep, lwp_change_pri
93*0Sstevel@tonic-gate };
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate static kthread_t *lwpsobj_pi_owner(upimutex_t *up);
96*0Sstevel@tonic-gate 
97*0Sstevel@tonic-gate static sobj_ops_t lwp_sobj_pi_ops = {
98*0Sstevel@tonic-gate 	SOBJ_USER_PI, lwpsobj_pi_owner, turnstile_unsleep,
99*0Sstevel@tonic-gate 	turnstile_change_pri
100*0Sstevel@tonic-gate };
101*0Sstevel@tonic-gate 
102*0Sstevel@tonic-gate static sleepq_head_t	lwpsleepq[NSLEEPQ];
103*0Sstevel@tonic-gate upib_t			upimutextab[UPIMUTEX_TABSIZE];
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate #define	LWPCHAN_LOCK_SHIFT	10	/* 1024 locks for each pool */
106*0Sstevel@tonic-gate #define	LWPCHAN_LOCK_SIZE	(1 << LWPCHAN_LOCK_SHIFT)
107*0Sstevel@tonic-gate 
108*0Sstevel@tonic-gate /*
109*0Sstevel@tonic-gate  * We know that both lc_wchan and lc_wchan0 are addresses that most
110*0Sstevel@tonic-gate  * likely are 8-byte aligned, so we shift off the low-order 3 bits.
111*0Sstevel@tonic-gate  * 'pool' is either 0 or 1.
112*0Sstevel@tonic-gate  */
113*0Sstevel@tonic-gate #define	LWPCHAN_LOCK_HASH(X, pool) \
114*0Sstevel@tonic-gate 	(((((X) >> 3) ^ ((X) >> (LWPCHAN_LOCK_SHIFT + 3))) & \
115*0Sstevel@tonic-gate 	(LWPCHAN_LOCK_SIZE - 1)) + ((pool)? LWPCHAN_LOCK_SIZE : 0))
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate static kmutex_t		lwpchanlock[2 * LWPCHAN_LOCK_SIZE];
118*0Sstevel@tonic-gate 
119*0Sstevel@tonic-gate /*
120*0Sstevel@tonic-gate  * Is this a POSIX threads user-level lock requiring priority inheritance?
121*0Sstevel@tonic-gate  */
122*0Sstevel@tonic-gate #define	UPIMUTEX(type)	((type) & LOCK_PRIO_INHERIT)
123*0Sstevel@tonic-gate 
124*0Sstevel@tonic-gate static sleepq_head_t *
125*0Sstevel@tonic-gate lwpsqhash(lwpchan_t *lwpchan)
126*0Sstevel@tonic-gate {
127*0Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
128*0Sstevel@tonic-gate 	return (&lwpsleepq[SQHASHINDEX(x)]);
129*0Sstevel@tonic-gate }
130*0Sstevel@tonic-gate 
131*0Sstevel@tonic-gate /*
132*0Sstevel@tonic-gate  * Lock an lwpchan.
133*0Sstevel@tonic-gate  * Keep this in sync with lwpchan_unlock(), below.
134*0Sstevel@tonic-gate  */
135*0Sstevel@tonic-gate static void
136*0Sstevel@tonic-gate lwpchan_lock(lwpchan_t *lwpchan, int pool)
137*0Sstevel@tonic-gate {
138*0Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
139*0Sstevel@tonic-gate 	mutex_enter(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
140*0Sstevel@tonic-gate }
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate /*
143*0Sstevel@tonic-gate  * Unlock an lwpchan.
144*0Sstevel@tonic-gate  * Keep this in sync with lwpchan_lock(), above.
145*0Sstevel@tonic-gate  */
146*0Sstevel@tonic-gate static void
147*0Sstevel@tonic-gate lwpchan_unlock(lwpchan_t *lwpchan, int pool)
148*0Sstevel@tonic-gate {
149*0Sstevel@tonic-gate 	uint_t x = (uintptr_t)lwpchan->lc_wchan ^ (uintptr_t)lwpchan->lc_wchan0;
150*0Sstevel@tonic-gate 	mutex_exit(&lwpchanlock[LWPCHAN_LOCK_HASH(x, pool)]);
151*0Sstevel@tonic-gate }
152*0Sstevel@tonic-gate 
153*0Sstevel@tonic-gate /*
154*0Sstevel@tonic-gate  * Delete mappings from the lwpchan cache for pages that are being
155*0Sstevel@tonic-gate  * unmapped by as_unmap().  Given a range of addresses, "start" to "end",
156*0Sstevel@tonic-gate  * all mappings within the range are deleted from the lwpchan cache.
157*0Sstevel@tonic-gate  */
158*0Sstevel@tonic-gate void
159*0Sstevel@tonic-gate lwpchan_delete_mapping(proc_t *p, caddr_t start, caddr_t end)
160*0Sstevel@tonic-gate {
161*0Sstevel@tonic-gate 	lwpchan_data_t *lcp;
162*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
163*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
164*0Sstevel@tonic-gate 	lwpchan_entry_t *ent;
165*0Sstevel@tonic-gate 	lwpchan_entry_t **prev;
166*0Sstevel@tonic-gate 	caddr_t addr;
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
169*0Sstevel@tonic-gate 	lcp = p->p_lcp;
170*0Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
171*0Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
172*0Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
173*0Sstevel@tonic-gate 		if (hashbucket->lwpchan_chain == NULL)
174*0Sstevel@tonic-gate 			continue;
175*0Sstevel@tonic-gate 		mutex_enter(&hashbucket->lwpchan_lock);
176*0Sstevel@tonic-gate 		prev = &hashbucket->lwpchan_chain;
177*0Sstevel@tonic-gate 		/* check entire chain */
178*0Sstevel@tonic-gate 		while ((ent = *prev) != NULL) {
179*0Sstevel@tonic-gate 			addr = ent->lwpchan_addr;
180*0Sstevel@tonic-gate 			if (start <= addr && addr < end) {
181*0Sstevel@tonic-gate 				*prev = ent->lwpchan_next;
182*0Sstevel@tonic-gate 				if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
183*0Sstevel@tonic-gate 				    (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
184*0Sstevel@tonic-gate 					lwp_mutex_cleanup(ent, LOCK_UNMAPPED);
185*0Sstevel@tonic-gate 				kmem_free(ent, sizeof (*ent));
186*0Sstevel@tonic-gate 				atomic_add_32(&lcp->lwpchan_entries, -1);
187*0Sstevel@tonic-gate 			} else {
188*0Sstevel@tonic-gate 				prev = &ent->lwpchan_next;
189*0Sstevel@tonic-gate 			}
190*0Sstevel@tonic-gate 		}
191*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
192*0Sstevel@tonic-gate 	}
193*0Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
194*0Sstevel@tonic-gate }
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate /*
197*0Sstevel@tonic-gate  * Given an lwpchan cache pointer and a process virtual address,
198*0Sstevel@tonic-gate  * return a pointer to the corresponding lwpchan hash bucket.
199*0Sstevel@tonic-gate  */
200*0Sstevel@tonic-gate static lwpchan_hashbucket_t *
201*0Sstevel@tonic-gate lwpchan_bucket(lwpchan_data_t *lcp, uintptr_t addr)
202*0Sstevel@tonic-gate {
203*0Sstevel@tonic-gate 	uint_t i;
204*0Sstevel@tonic-gate 
205*0Sstevel@tonic-gate 	/*
206*0Sstevel@tonic-gate 	 * All user-level sync object addresses are 8-byte aligned.
207*0Sstevel@tonic-gate 	 * Ignore the lowest 3 bits of the address and use the
208*0Sstevel@tonic-gate 	 * higher-order 2*lwpchan_bits bits for the hash index.
209*0Sstevel@tonic-gate 	 */
210*0Sstevel@tonic-gate 	addr >>= 3;
211*0Sstevel@tonic-gate 	i = (addr ^ (addr >> lcp->lwpchan_bits)) & lcp->lwpchan_mask;
212*0Sstevel@tonic-gate 	return (lcp->lwpchan_cache + i);
213*0Sstevel@tonic-gate }
214*0Sstevel@tonic-gate 
215*0Sstevel@tonic-gate /*
216*0Sstevel@tonic-gate  * (Re)allocate the per-process lwpchan cache.
217*0Sstevel@tonic-gate  */
218*0Sstevel@tonic-gate static void
219*0Sstevel@tonic-gate lwpchan_alloc_cache(proc_t *p, uint_t bits)
220*0Sstevel@tonic-gate {
221*0Sstevel@tonic-gate 	lwpchan_data_t *lcp;
222*0Sstevel@tonic-gate 	lwpchan_data_t *old_lcp;
223*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
224*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
225*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *newbucket;
226*0Sstevel@tonic-gate 	lwpchan_entry_t *ent;
227*0Sstevel@tonic-gate 	lwpchan_entry_t *next;
228*0Sstevel@tonic-gate 	uint_t count;
229*0Sstevel@tonic-gate 
230*0Sstevel@tonic-gate 	ASSERT(bits >= LWPCHAN_INITIAL_BITS && bits <= LWPCHAN_MAX_BITS);
231*0Sstevel@tonic-gate 
232*0Sstevel@tonic-gate 	lcp = kmem_alloc(sizeof (lwpchan_data_t), KM_SLEEP);
233*0Sstevel@tonic-gate 	lcp->lwpchan_bits = bits;
234*0Sstevel@tonic-gate 	lcp->lwpchan_size = 1 << lcp->lwpchan_bits;
235*0Sstevel@tonic-gate 	lcp->lwpchan_mask = lcp->lwpchan_size - 1;
236*0Sstevel@tonic-gate 	lcp->lwpchan_entries = 0;
237*0Sstevel@tonic-gate 	lcp->lwpchan_cache = kmem_zalloc(lcp->lwpchan_size *
238*0Sstevel@tonic-gate 		sizeof (lwpchan_hashbucket_t), KM_SLEEP);
239*0Sstevel@tonic-gate 	lcp->lwpchan_next_data = NULL;
240*0Sstevel@tonic-gate 
241*0Sstevel@tonic-gate 	mutex_enter(&p->p_lcp_lock);
242*0Sstevel@tonic-gate 	if ((old_lcp = p->p_lcp) != NULL) {
243*0Sstevel@tonic-gate 		if (old_lcp->lwpchan_bits >= bits) {
244*0Sstevel@tonic-gate 			/* someone beat us to it */
245*0Sstevel@tonic-gate 			mutex_exit(&p->p_lcp_lock);
246*0Sstevel@tonic-gate 			kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
247*0Sstevel@tonic-gate 				sizeof (lwpchan_hashbucket_t));
248*0Sstevel@tonic-gate 			kmem_free(lcp, sizeof (lwpchan_data_t));
249*0Sstevel@tonic-gate 			return;
250*0Sstevel@tonic-gate 		}
251*0Sstevel@tonic-gate 		/*
252*0Sstevel@tonic-gate 		 * Acquire all of the old hash table locks.
253*0Sstevel@tonic-gate 		 */
254*0Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
255*0Sstevel@tonic-gate 		endbucket = hashbucket + old_lcp->lwpchan_size;
256*0Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
257*0Sstevel@tonic-gate 			mutex_enter(&hashbucket->lwpchan_lock);
258*0Sstevel@tonic-gate 		/*
259*0Sstevel@tonic-gate 		 * Move all of the old hash table entries to the
260*0Sstevel@tonic-gate 		 * new hash table.  The new hash table has not yet
261*0Sstevel@tonic-gate 		 * been installed so we don't need any of its locks.
262*0Sstevel@tonic-gate 		 */
263*0Sstevel@tonic-gate 		count = 0;
264*0Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
265*0Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++) {
266*0Sstevel@tonic-gate 			ent = hashbucket->lwpchan_chain;
267*0Sstevel@tonic-gate 			while (ent != NULL) {
268*0Sstevel@tonic-gate 				next = ent->lwpchan_next;
269*0Sstevel@tonic-gate 				newbucket = lwpchan_bucket(lcp,
270*0Sstevel@tonic-gate 					(uintptr_t)ent->lwpchan_addr);
271*0Sstevel@tonic-gate 				ent->lwpchan_next = newbucket->lwpchan_chain;
272*0Sstevel@tonic-gate 				newbucket->lwpchan_chain = ent;
273*0Sstevel@tonic-gate 				ent = next;
274*0Sstevel@tonic-gate 				count++;
275*0Sstevel@tonic-gate 			}
276*0Sstevel@tonic-gate 			hashbucket->lwpchan_chain = NULL;
277*0Sstevel@tonic-gate 		}
278*0Sstevel@tonic-gate 		lcp->lwpchan_entries = count;
279*0Sstevel@tonic-gate 	}
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate 	/*
282*0Sstevel@tonic-gate 	 * Retire the old hash table.  We can't actually kmem_free() it
283*0Sstevel@tonic-gate 	 * now because someone may still have a pointer to it.  Instead,
284*0Sstevel@tonic-gate 	 * we link it onto the new hash table's list of retired hash tables.
285*0Sstevel@tonic-gate 	 * The new hash table is double the size of the previous one, so
286*0Sstevel@tonic-gate 	 * the total size of all retired hash tables is less than the size
287*0Sstevel@tonic-gate 	 * of the new one.  exit() and exec() free the retired hash tables
288*0Sstevel@tonic-gate 	 * (see lwpchan_destroy_cache(), below).
289*0Sstevel@tonic-gate 	 */
290*0Sstevel@tonic-gate 	lcp->lwpchan_next_data = old_lcp;
291*0Sstevel@tonic-gate 
292*0Sstevel@tonic-gate 	/*
293*0Sstevel@tonic-gate 	 * As soon as we store the new lcp, future locking operations will
294*0Sstevel@tonic-gate 	 * use it.  Therefore, we must ensure that all the state we've just
295*0Sstevel@tonic-gate 	 * established reaches global visibility before the new lcp does.
296*0Sstevel@tonic-gate 	 */
297*0Sstevel@tonic-gate 	membar_producer();
298*0Sstevel@tonic-gate 	p->p_lcp = lcp;
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 	if (old_lcp != NULL) {
301*0Sstevel@tonic-gate 		/*
302*0Sstevel@tonic-gate 		 * Release all of the old hash table locks.
303*0Sstevel@tonic-gate 		 */
304*0Sstevel@tonic-gate 		hashbucket = old_lcp->lwpchan_cache;
305*0Sstevel@tonic-gate 		for (; hashbucket < endbucket; hashbucket++)
306*0Sstevel@tonic-gate 			mutex_exit(&hashbucket->lwpchan_lock);
307*0Sstevel@tonic-gate 	}
308*0Sstevel@tonic-gate 	mutex_exit(&p->p_lcp_lock);
309*0Sstevel@tonic-gate }
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate /*
312*0Sstevel@tonic-gate  * Deallocate the lwpchan cache, and any dynamically allocated mappings.
313*0Sstevel@tonic-gate  * Called when the process exits or execs.  All lwps except one have
314*0Sstevel@tonic-gate  * exited so we need no locks here.
315*0Sstevel@tonic-gate  */
316*0Sstevel@tonic-gate void
317*0Sstevel@tonic-gate lwpchan_destroy_cache(int exec)
318*0Sstevel@tonic-gate {
319*0Sstevel@tonic-gate 	proc_t *p = curproc;
320*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
321*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *endbucket;
322*0Sstevel@tonic-gate 	lwpchan_data_t *lcp;
323*0Sstevel@tonic-gate 	lwpchan_entry_t *ent;
324*0Sstevel@tonic-gate 	lwpchan_entry_t *next;
325*0Sstevel@tonic-gate 	uint16_t lockflg;
326*0Sstevel@tonic-gate 
327*0Sstevel@tonic-gate 	lcp = p->p_lcp;
328*0Sstevel@tonic-gate 	p->p_lcp = NULL;
329*0Sstevel@tonic-gate 
330*0Sstevel@tonic-gate 	lockflg = exec? LOCK_UNMAPPED : LOCK_OWNERDEAD;
331*0Sstevel@tonic-gate 	hashbucket = lcp->lwpchan_cache;
332*0Sstevel@tonic-gate 	endbucket = hashbucket + lcp->lwpchan_size;
333*0Sstevel@tonic-gate 	for (; hashbucket < endbucket; hashbucket++) {
334*0Sstevel@tonic-gate 		ent = hashbucket->lwpchan_chain;
335*0Sstevel@tonic-gate 		hashbucket->lwpchan_chain = NULL;
336*0Sstevel@tonic-gate 		while (ent != NULL) {
337*0Sstevel@tonic-gate 			next = ent->lwpchan_next;
338*0Sstevel@tonic-gate 			if (ent->lwpchan_pool == LWPCHAN_MPPOOL &&
339*0Sstevel@tonic-gate 			    (ent->lwpchan_type & USYNC_PROCESS_ROBUST))
340*0Sstevel@tonic-gate 				lwp_mutex_cleanup(ent, lockflg);
341*0Sstevel@tonic-gate 			kmem_free(ent, sizeof (*ent));
342*0Sstevel@tonic-gate 			ent = next;
343*0Sstevel@tonic-gate 		}
344*0Sstevel@tonic-gate 	}
345*0Sstevel@tonic-gate 
346*0Sstevel@tonic-gate 	while (lcp != NULL) {
347*0Sstevel@tonic-gate 		lwpchan_data_t *next_lcp = lcp->lwpchan_next_data;
348*0Sstevel@tonic-gate 		kmem_free(lcp->lwpchan_cache, lcp->lwpchan_size *
349*0Sstevel@tonic-gate 			sizeof (lwpchan_hashbucket_t));
350*0Sstevel@tonic-gate 		kmem_free(lcp, sizeof (lwpchan_data_t));
351*0Sstevel@tonic-gate 		lcp = next_lcp;
352*0Sstevel@tonic-gate 	}
353*0Sstevel@tonic-gate }
354*0Sstevel@tonic-gate 
355*0Sstevel@tonic-gate /*
356*0Sstevel@tonic-gate  * Return zero when there is an entry in the lwpchan cache for the
357*0Sstevel@tonic-gate  * given process virtual address and non-zero when there is not.
358*0Sstevel@tonic-gate  * The returned non-zero value is the current length of the
359*0Sstevel@tonic-gate  * hash chain plus one.  The caller holds the hash bucket lock.
360*0Sstevel@tonic-gate  */
361*0Sstevel@tonic-gate static uint_t
362*0Sstevel@tonic-gate lwpchan_cache_mapping(caddr_t addr, int type, int pool, lwpchan_t *lwpchan,
363*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket)
364*0Sstevel@tonic-gate {
365*0Sstevel@tonic-gate 	lwpchan_entry_t *ent;
366*0Sstevel@tonic-gate 	uint_t count = 1;
367*0Sstevel@tonic-gate 
368*0Sstevel@tonic-gate 	for (ent = hashbucket->lwpchan_chain; ent; ent = ent->lwpchan_next) {
369*0Sstevel@tonic-gate 		if (ent->lwpchan_addr == addr) {
370*0Sstevel@tonic-gate 			if (ent->lwpchan_type != type ||
371*0Sstevel@tonic-gate 			    ent->lwpchan_pool != pool) {
372*0Sstevel@tonic-gate 				/*
373*0Sstevel@tonic-gate 				 * This shouldn't happen, but might if the
374*0Sstevel@tonic-gate 				 * process reuses its memory for different
375*0Sstevel@tonic-gate 				 * types of sync objects.  We test first
376*0Sstevel@tonic-gate 				 * to avoid grabbing the memory cache line.
377*0Sstevel@tonic-gate 				 */
378*0Sstevel@tonic-gate 				ent->lwpchan_type = (uint16_t)type;
379*0Sstevel@tonic-gate 				ent->lwpchan_pool = (uint16_t)pool;
380*0Sstevel@tonic-gate 			}
381*0Sstevel@tonic-gate 			*lwpchan = ent->lwpchan_lwpchan;
382*0Sstevel@tonic-gate 			return (0);
383*0Sstevel@tonic-gate 		}
384*0Sstevel@tonic-gate 		count++;
385*0Sstevel@tonic-gate 	}
386*0Sstevel@tonic-gate 	return (count);
387*0Sstevel@tonic-gate }
388*0Sstevel@tonic-gate 
389*0Sstevel@tonic-gate /*
390*0Sstevel@tonic-gate  * Return the cached lwpchan mapping if cached, otherwise insert
391*0Sstevel@tonic-gate  * a virtual address to lwpchan mapping into the cache.
392*0Sstevel@tonic-gate  */
393*0Sstevel@tonic-gate static int
394*0Sstevel@tonic-gate lwpchan_get_mapping(struct as *as, caddr_t addr,
395*0Sstevel@tonic-gate 	int type, lwpchan_t *lwpchan, int pool)
396*0Sstevel@tonic-gate {
397*0Sstevel@tonic-gate 	proc_t *p = curproc;
398*0Sstevel@tonic-gate 	lwpchan_data_t *lcp;
399*0Sstevel@tonic-gate 	lwpchan_hashbucket_t *hashbucket;
400*0Sstevel@tonic-gate 	lwpchan_entry_t *ent;
401*0Sstevel@tonic-gate 	memid_t	memid;
402*0Sstevel@tonic-gate 	uint_t count;
403*0Sstevel@tonic-gate 	uint_t bits;
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate top:
406*0Sstevel@tonic-gate 	/* initialize the lwpchan cache, if necesary */
407*0Sstevel@tonic-gate 	if ((lcp = p->p_lcp) == NULL) {
408*0Sstevel@tonic-gate 		lwpchan_alloc_cache(p, LWPCHAN_INITIAL_BITS);
409*0Sstevel@tonic-gate 		goto top;
410*0Sstevel@tonic-gate 	}
411*0Sstevel@tonic-gate 	hashbucket = lwpchan_bucket(lcp, (uintptr_t)addr);
412*0Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
413*0Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
414*0Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
415*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
416*0Sstevel@tonic-gate 		goto top;
417*0Sstevel@tonic-gate 	}
418*0Sstevel@tonic-gate 	if (lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket) == 0) {
419*0Sstevel@tonic-gate 		/* it's in the cache */
420*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
421*0Sstevel@tonic-gate 		return (1);
422*0Sstevel@tonic-gate 	}
423*0Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
424*0Sstevel@tonic-gate 	if (as_getmemid(as, addr, &memid) != 0)
425*0Sstevel@tonic-gate 		return (0);
426*0Sstevel@tonic-gate 	lwpchan->lc_wchan0 = (caddr_t)(uintptr_t)memid.val[0];
427*0Sstevel@tonic-gate 	lwpchan->lc_wchan = (caddr_t)(uintptr_t)memid.val[1];
428*0Sstevel@tonic-gate 	ent = kmem_alloc(sizeof (lwpchan_entry_t), KM_SLEEP);
429*0Sstevel@tonic-gate 	mutex_enter(&hashbucket->lwpchan_lock);
430*0Sstevel@tonic-gate 	if (lcp != p->p_lcp) {
431*0Sstevel@tonic-gate 		/* someone resized the lwpchan cache; start over */
432*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
433*0Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
434*0Sstevel@tonic-gate 		goto top;
435*0Sstevel@tonic-gate 	}
436*0Sstevel@tonic-gate 	count = lwpchan_cache_mapping(addr, type, pool, lwpchan, hashbucket);
437*0Sstevel@tonic-gate 	if (count == 0) {
438*0Sstevel@tonic-gate 		/* someone else added this entry to the cache */
439*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
440*0Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
441*0Sstevel@tonic-gate 		return (1);
442*0Sstevel@tonic-gate 	}
443*0Sstevel@tonic-gate 	if (count > lcp->lwpchan_bits + 2 && /* larger table, longer chains */
444*0Sstevel@tonic-gate 	    (bits = lcp->lwpchan_bits) < LWPCHAN_MAX_BITS) {
445*0Sstevel@tonic-gate 		/* hash chain too long; reallocate the hash table */
446*0Sstevel@tonic-gate 		mutex_exit(&hashbucket->lwpchan_lock);
447*0Sstevel@tonic-gate 		kmem_free(ent, sizeof (*ent));
448*0Sstevel@tonic-gate 		lwpchan_alloc_cache(p, bits + 1);
449*0Sstevel@tonic-gate 		goto top;
450*0Sstevel@tonic-gate 	}
451*0Sstevel@tonic-gate 	ent->lwpchan_addr = addr;
452*0Sstevel@tonic-gate 	ent->lwpchan_type = (uint16_t)type;
453*0Sstevel@tonic-gate 	ent->lwpchan_pool = (uint16_t)pool;
454*0Sstevel@tonic-gate 	ent->lwpchan_lwpchan = *lwpchan;
455*0Sstevel@tonic-gate 	ent->lwpchan_next = hashbucket->lwpchan_chain;
456*0Sstevel@tonic-gate 	hashbucket->lwpchan_chain = ent;
457*0Sstevel@tonic-gate 	atomic_add_32(&lcp->lwpchan_entries, 1);
458*0Sstevel@tonic-gate 	mutex_exit(&hashbucket->lwpchan_lock);
459*0Sstevel@tonic-gate 	return (1);
460*0Sstevel@tonic-gate }
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate /*
463*0Sstevel@tonic-gate  * Return a unique pair of identifiers that corresponds to a
464*0Sstevel@tonic-gate  * synchronization object's virtual address.  Process-shared
465*0Sstevel@tonic-gate  * sync objects usually get vnode/offset from as_getmemid().
466*0Sstevel@tonic-gate  */
467*0Sstevel@tonic-gate static int
468*0Sstevel@tonic-gate get_lwpchan(struct as *as, caddr_t addr, int type, lwpchan_t *lwpchan, int pool)
469*0Sstevel@tonic-gate {
470*0Sstevel@tonic-gate 	/*
471*0Sstevel@tonic-gate 	 * If the lwp synch object is defined to be process-private,
472*0Sstevel@tonic-gate 	 * we just make the first field of the lwpchan be 'as' and
473*0Sstevel@tonic-gate 	 * the second field be the synch object's virtual address.
474*0Sstevel@tonic-gate 	 * (segvn_getmemid() does the same for MAP_PRIVATE mappings.)
475*0Sstevel@tonic-gate 	 * The lwpchan cache is used only for process-shared objects.
476*0Sstevel@tonic-gate 	 */
477*0Sstevel@tonic-gate 	if ((type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) == 0) {
478*0Sstevel@tonic-gate 		lwpchan->lc_wchan0 = (caddr_t)as;
479*0Sstevel@tonic-gate 		lwpchan->lc_wchan = addr;
480*0Sstevel@tonic-gate 		return (1);
481*0Sstevel@tonic-gate 	}
482*0Sstevel@tonic-gate 	/* check the lwpchan cache for mapping */
483*0Sstevel@tonic-gate 	return (lwpchan_get_mapping(as, addr, type, lwpchan, pool));
484*0Sstevel@tonic-gate }
485*0Sstevel@tonic-gate 
486*0Sstevel@tonic-gate static void
487*0Sstevel@tonic-gate lwp_block(lwpchan_t *lwpchan)
488*0Sstevel@tonic-gate {
489*0Sstevel@tonic-gate 	kthread_t *t = curthread;
490*0Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
491*0Sstevel@tonic-gate 	sleepq_head_t *sqh;
492*0Sstevel@tonic-gate 
493*0Sstevel@tonic-gate 	thread_lock(t);
494*0Sstevel@tonic-gate 	t->t_flag |= T_WAKEABLE;
495*0Sstevel@tonic-gate 	t->t_lwpchan = *lwpchan;
496*0Sstevel@tonic-gate 	t->t_sobj_ops = &lwp_sobj_ops;
497*0Sstevel@tonic-gate 	t->t_release = 0;
498*0Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
499*0Sstevel@tonic-gate 	disp_lock_enter_high(&sqh->sq_lock);
500*0Sstevel@tonic-gate 	CL_SLEEP(t);
501*0Sstevel@tonic-gate 	DTRACE_SCHED(sleep);
502*0Sstevel@tonic-gate 	THREAD_SLEEP(t, &sqh->sq_lock);
503*0Sstevel@tonic-gate 	sleepq_insert(&sqh->sq_queue, t);
504*0Sstevel@tonic-gate 	thread_unlock(t);
505*0Sstevel@tonic-gate 	lwp->lwp_asleep = 1;
506*0Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
507*0Sstevel@tonic-gate 	lwp->lwp_ru.nvcsw++;
508*0Sstevel@tonic-gate 	(void) new_mstate(curthread, LMS_SLEEP);
509*0Sstevel@tonic-gate }
510*0Sstevel@tonic-gate 
511*0Sstevel@tonic-gate static kthread_t *
512*0Sstevel@tonic-gate lwpsobj_pi_owner(upimutex_t *up)
513*0Sstevel@tonic-gate {
514*0Sstevel@tonic-gate 	return (up->upi_owner);
515*0Sstevel@tonic-gate }
516*0Sstevel@tonic-gate 
517*0Sstevel@tonic-gate static struct upimutex *
518*0Sstevel@tonic-gate upi_get(upib_t *upibp, lwpchan_t *lcp)
519*0Sstevel@tonic-gate {
520*0Sstevel@tonic-gate 	struct upimutex *upip;
521*0Sstevel@tonic-gate 
522*0Sstevel@tonic-gate 	for (upip = upibp->upib_first; upip != NULL;
523*0Sstevel@tonic-gate 	    upip = upip->upi_nextchain) {
524*0Sstevel@tonic-gate 		if (upip->upi_lwpchan.lc_wchan0 == lcp->lc_wchan0 &&
525*0Sstevel@tonic-gate 		    upip->upi_lwpchan.lc_wchan == lcp->lc_wchan)
526*0Sstevel@tonic-gate 			break;
527*0Sstevel@tonic-gate 	}
528*0Sstevel@tonic-gate 	return (upip);
529*0Sstevel@tonic-gate }
530*0Sstevel@tonic-gate 
531*0Sstevel@tonic-gate static void
532*0Sstevel@tonic-gate upi_chain_add(upib_t *upibp, struct upimutex *upimutex)
533*0Sstevel@tonic-gate {
534*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate 	/*
537*0Sstevel@tonic-gate 	 * Insert upimutex at front of list. Maybe a bit unfair
538*0Sstevel@tonic-gate 	 * but assume that not many lwpchans hash to the same
539*0Sstevel@tonic-gate 	 * upimutextab bucket, i.e. the list of upimutexes from
540*0Sstevel@tonic-gate 	 * upib_first is not too long.
541*0Sstevel@tonic-gate 	 */
542*0Sstevel@tonic-gate 	upimutex->upi_nextchain = upibp->upib_first;
543*0Sstevel@tonic-gate 	upibp->upib_first = upimutex;
544*0Sstevel@tonic-gate }
545*0Sstevel@tonic-gate 
546*0Sstevel@tonic-gate static void
547*0Sstevel@tonic-gate upi_chain_del(upib_t *upibp, struct upimutex *upimutex)
548*0Sstevel@tonic-gate {
549*0Sstevel@tonic-gate 	struct upimutex **prev;
550*0Sstevel@tonic-gate 
551*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&upibp->upib_lock));
552*0Sstevel@tonic-gate 
553*0Sstevel@tonic-gate 	prev = &upibp->upib_first;
554*0Sstevel@tonic-gate 	while (*prev != upimutex) {
555*0Sstevel@tonic-gate 		prev = &(*prev)->upi_nextchain;
556*0Sstevel@tonic-gate 	}
557*0Sstevel@tonic-gate 	*prev = upimutex->upi_nextchain;
558*0Sstevel@tonic-gate 	upimutex->upi_nextchain = NULL;
559*0Sstevel@tonic-gate }
560*0Sstevel@tonic-gate 
561*0Sstevel@tonic-gate /*
562*0Sstevel@tonic-gate  * Add upimutex to chain of upimutexes held by curthread.
563*0Sstevel@tonic-gate  * Returns number of upimutexes held by curthread.
564*0Sstevel@tonic-gate  */
565*0Sstevel@tonic-gate static uint32_t
566*0Sstevel@tonic-gate upi_mylist_add(struct upimutex *upimutex)
567*0Sstevel@tonic-gate {
568*0Sstevel@tonic-gate 	kthread_t *t = curthread;
569*0Sstevel@tonic-gate 
570*0Sstevel@tonic-gate 	/*
571*0Sstevel@tonic-gate 	 * Insert upimutex at front of list of upimutexes owned by t. This
572*0Sstevel@tonic-gate 	 * would match typical LIFO order in which nested locks are acquired
573*0Sstevel@tonic-gate 	 * and released.
574*0Sstevel@tonic-gate 	 */
575*0Sstevel@tonic-gate 	upimutex->upi_nextowned = t->t_upimutex;
576*0Sstevel@tonic-gate 	t->t_upimutex = upimutex;
577*0Sstevel@tonic-gate 	t->t_nupinest++;
578*0Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
579*0Sstevel@tonic-gate 	return (t->t_nupinest);
580*0Sstevel@tonic-gate }
581*0Sstevel@tonic-gate 
582*0Sstevel@tonic-gate /*
583*0Sstevel@tonic-gate  * Delete upimutex from list of upimutexes owned by curthread.
584*0Sstevel@tonic-gate  */
585*0Sstevel@tonic-gate static void
586*0Sstevel@tonic-gate upi_mylist_del(struct upimutex *upimutex)
587*0Sstevel@tonic-gate {
588*0Sstevel@tonic-gate 	kthread_t *t = curthread;
589*0Sstevel@tonic-gate 	struct upimutex **prev;
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	/*
592*0Sstevel@tonic-gate 	 * Since the order in which nested locks are acquired and released,
593*0Sstevel@tonic-gate 	 * is typically LIFO, and typical nesting levels are not too deep, the
594*0Sstevel@tonic-gate 	 * following should not be expensive in the general case.
595*0Sstevel@tonic-gate 	 */
596*0Sstevel@tonic-gate 	prev = &t->t_upimutex;
597*0Sstevel@tonic-gate 	while (*prev != upimutex) {
598*0Sstevel@tonic-gate 		prev = &(*prev)->upi_nextowned;
599*0Sstevel@tonic-gate 	}
600*0Sstevel@tonic-gate 	*prev = upimutex->upi_nextowned;
601*0Sstevel@tonic-gate 	upimutex->upi_nextowned = NULL;
602*0Sstevel@tonic-gate 	ASSERT(t->t_nupinest > 0);
603*0Sstevel@tonic-gate 	t->t_nupinest--;
604*0Sstevel@tonic-gate }
605*0Sstevel@tonic-gate 
606*0Sstevel@tonic-gate /*
607*0Sstevel@tonic-gate  * Returns true if upimutex is owned. Should be called only when upim points
608*0Sstevel@tonic-gate  * to kmem which cannot disappear from underneath.
609*0Sstevel@tonic-gate  */
610*0Sstevel@tonic-gate static int
611*0Sstevel@tonic-gate upi_owned(upimutex_t *upim)
612*0Sstevel@tonic-gate {
613*0Sstevel@tonic-gate 	return (upim->upi_owner == curthread);
614*0Sstevel@tonic-gate }
615*0Sstevel@tonic-gate 
616*0Sstevel@tonic-gate /*
617*0Sstevel@tonic-gate  * Returns pointer to kernel object (upimutex_t *) if lp is owned.
618*0Sstevel@tonic-gate  */
619*0Sstevel@tonic-gate static struct upimutex *
620*0Sstevel@tonic-gate lwp_upimutex_owned(lwp_mutex_t *lp, uint8_t type)
621*0Sstevel@tonic-gate {
622*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
623*0Sstevel@tonic-gate 	upib_t *upibp;
624*0Sstevel@tonic-gate 	struct upimutex *upimutex;
625*0Sstevel@tonic-gate 
626*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
627*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL))
628*0Sstevel@tonic-gate 		return (NULL);
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
631*0Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
632*0Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
633*0Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
634*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
635*0Sstevel@tonic-gate 		return (NULL);
636*0Sstevel@tonic-gate 	}
637*0Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
638*0Sstevel@tonic-gate 	return (upimutex);
639*0Sstevel@tonic-gate }
640*0Sstevel@tonic-gate 
641*0Sstevel@tonic-gate /*
642*0Sstevel@tonic-gate  * Unlocks upimutex, waking up waiters if any. upimutex kmem is freed if
643*0Sstevel@tonic-gate  * no lock hand-off occurrs.
644*0Sstevel@tonic-gate  */
645*0Sstevel@tonic-gate static void
646*0Sstevel@tonic-gate upimutex_unlock(struct upimutex *upimutex, uint16_t flag)
647*0Sstevel@tonic-gate {
648*0Sstevel@tonic-gate 	turnstile_t *ts;
649*0Sstevel@tonic-gate 	upib_t *upibp;
650*0Sstevel@tonic-gate 	kthread_t *newowner;
651*0Sstevel@tonic-gate 
652*0Sstevel@tonic-gate 	upi_mylist_del(upimutex);
653*0Sstevel@tonic-gate 	upibp = upimutex->upi_upibp;
654*0Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
655*0Sstevel@tonic-gate 	if (upimutex->upi_waiter != 0) { /* if waiters */
656*0Sstevel@tonic-gate 		ts = turnstile_lookup(upimutex);
657*0Sstevel@tonic-gate 		if (ts != NULL && !(flag & LOCK_NOTRECOVERABLE)) {
658*0Sstevel@tonic-gate 			/* hand-off lock to highest prio waiter */
659*0Sstevel@tonic-gate 			newowner = ts->ts_sleepq[TS_WRITER_Q].sq_first;
660*0Sstevel@tonic-gate 			upimutex->upi_owner = newowner;
661*0Sstevel@tonic-gate 			if (ts->ts_waiters == 1)
662*0Sstevel@tonic-gate 				upimutex->upi_waiter = 0;
663*0Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, 1, newowner);
664*0Sstevel@tonic-gate 			mutex_exit(&upibp->upib_lock);
665*0Sstevel@tonic-gate 			return;
666*0Sstevel@tonic-gate 		} else if (ts != NULL) {
667*0Sstevel@tonic-gate 			/* LOCK_NOTRECOVERABLE: wakeup all */
668*0Sstevel@tonic-gate 			turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
669*0Sstevel@tonic-gate 		} else {
670*0Sstevel@tonic-gate 			/*
671*0Sstevel@tonic-gate 			 * Misleading w bit. Waiters might have been
672*0Sstevel@tonic-gate 			 * interrupted. No need to clear the w bit (upimutex
673*0Sstevel@tonic-gate 			 * will soon be freed). Re-calculate PI from existing
674*0Sstevel@tonic-gate 			 * waiters.
675*0Sstevel@tonic-gate 			 */
676*0Sstevel@tonic-gate 			turnstile_exit(upimutex);
677*0Sstevel@tonic-gate 			turnstile_pi_recalc();
678*0Sstevel@tonic-gate 		}
679*0Sstevel@tonic-gate 	}
680*0Sstevel@tonic-gate 	/*
681*0Sstevel@tonic-gate 	 * no waiters, or LOCK_NOTRECOVERABLE.
682*0Sstevel@tonic-gate 	 * remove from the bucket chain of upi mutexes.
683*0Sstevel@tonic-gate 	 * de-allocate kernel memory (upimutex).
684*0Sstevel@tonic-gate 	 */
685*0Sstevel@tonic-gate 	upi_chain_del(upimutex->upi_upibp, upimutex);
686*0Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock);
687*0Sstevel@tonic-gate 	kmem_free(upimutex, sizeof (upimutex_t));
688*0Sstevel@tonic-gate }
689*0Sstevel@tonic-gate 
690*0Sstevel@tonic-gate static int
691*0Sstevel@tonic-gate lwp_upimutex_lock(lwp_mutex_t *lp, uint8_t type, int try, lwp_timer_t *lwptp)
692*0Sstevel@tonic-gate {
693*0Sstevel@tonic-gate 	label_t ljb;
694*0Sstevel@tonic-gate 	int error = 0;
695*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
696*0Sstevel@tonic-gate 	uint16_t flag;
697*0Sstevel@tonic-gate 	upib_t *upibp;
698*0Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
699*0Sstevel@tonic-gate 	turnstile_t *ts;
700*0Sstevel@tonic-gate 	uint32_t nupinest;
701*0Sstevel@tonic-gate 	volatile int upilocked = 0;
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
704*0Sstevel@tonic-gate 		if (upilocked)
705*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
706*0Sstevel@tonic-gate 		error = EFAULT;
707*0Sstevel@tonic-gate 		goto out;
708*0Sstevel@tonic-gate 	}
709*0Sstevel@tonic-gate 	/*
710*0Sstevel@tonic-gate 	 * The apparent assumption made in implementing other _lwp_* synch
711*0Sstevel@tonic-gate 	 * primitives, is that get_lwpchan() does not return a unique cookie
712*0Sstevel@tonic-gate 	 * for the case where 2 processes (one forked from the other) point
713*0Sstevel@tonic-gate 	 * at the same underlying object, which is typed USYNC_PROCESS, but
714*0Sstevel@tonic-gate 	 * mapped MAP_PRIVATE, since the object has not yet been written to,
715*0Sstevel@tonic-gate 	 * in the child process.
716*0Sstevel@tonic-gate 	 *
717*0Sstevel@tonic-gate 	 * Since get_lwpchan() has been fixed, it is not necessary to do the
718*0Sstevel@tonic-gate 	 * dummy writes to force a COW fault as in other places (which should
719*0Sstevel@tonic-gate 	 * be fixed).
720*0Sstevel@tonic-gate 	 */
721*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
722*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
723*0Sstevel@tonic-gate 		error = EFAULT;
724*0Sstevel@tonic-gate 		goto out;
725*0Sstevel@tonic-gate 	}
726*0Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
727*0Sstevel@tonic-gate retry:
728*0Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
729*0Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
730*0Sstevel@tonic-gate 	if (upimutex == NULL)  {
731*0Sstevel@tonic-gate 		/* lock available since lwpchan has no upimutex */
732*0Sstevel@tonic-gate 		upimutex = kmem_zalloc(sizeof (upimutex_t), KM_SLEEP);
733*0Sstevel@tonic-gate 		upi_chain_add(upibp, (upimutex_t *)upimutex);
734*0Sstevel@tonic-gate 		upimutex->upi_owner = curthread; /* grab lock */
735*0Sstevel@tonic-gate 		upimutex->upi_upibp = upibp;
736*0Sstevel@tonic-gate 		upimutex->upi_vaddr = lp;
737*0Sstevel@tonic-gate 		upimutex->upi_lwpchan = lwpchan;
738*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
739*0Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
740*0Sstevel@tonic-gate 		upilocked = 1;
741*0Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
742*0Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
743*0Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
744*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
745*0Sstevel@tonic-gate 			error = ENOMEM;
746*0Sstevel@tonic-gate 			goto out;
747*0Sstevel@tonic-gate 		}
748*0Sstevel@tonic-gate 		if (flag & LOCK_OWNERDEAD) {
749*0Sstevel@tonic-gate 			/*
750*0Sstevel@tonic-gate 			 * Return with upimutex held.
751*0Sstevel@tonic-gate 			 */
752*0Sstevel@tonic-gate 			error = EOWNERDEAD;
753*0Sstevel@tonic-gate 		} else if (flag & LOCK_NOTRECOVERABLE) {
754*0Sstevel@tonic-gate 			/*
755*0Sstevel@tonic-gate 			 * Since the setting of LOCK_NOTRECOVERABLE
756*0Sstevel@tonic-gate 			 * was done under the high-level upi mutex,
757*0Sstevel@tonic-gate 			 * in lwp_upimutex_unlock(), this flag needs to
758*0Sstevel@tonic-gate 			 * be checked while holding the upi mutex.
759*0Sstevel@tonic-gate 			 * If set, this thread should  return without
760*0Sstevel@tonic-gate 			 * the lock held, and with the right error
761*0Sstevel@tonic-gate 			 * code.
762*0Sstevel@tonic-gate 			 */
763*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
764*0Sstevel@tonic-gate 			upilocked = 0;
765*0Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
766*0Sstevel@tonic-gate 		}
767*0Sstevel@tonic-gate 		goto out;
768*0Sstevel@tonic-gate 	}
769*0Sstevel@tonic-gate 	/*
770*0Sstevel@tonic-gate 	 * If a upimutex object exists, it must have an owner.
771*0Sstevel@tonic-gate 	 * This is due to lock hand-off, and release of upimutex when no
772*0Sstevel@tonic-gate 	 * waiters are present at unlock time,
773*0Sstevel@tonic-gate 	 */
774*0Sstevel@tonic-gate 	ASSERT(upimutex->upi_owner != NULL);
775*0Sstevel@tonic-gate 	if (upimutex->upi_owner == curthread) {
776*0Sstevel@tonic-gate 		/*
777*0Sstevel@tonic-gate 		 * The user wrapper can check if the mutex type is
778*0Sstevel@tonic-gate 		 * ERRORCHECK: if not, it should stall at user-level.
779*0Sstevel@tonic-gate 		 * If so, it should return the error code.
780*0Sstevel@tonic-gate 		 */
781*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
782*0Sstevel@tonic-gate 		error = EDEADLK;
783*0Sstevel@tonic-gate 		goto out;
784*0Sstevel@tonic-gate 	}
785*0Sstevel@tonic-gate 	if (try == UPIMUTEX_TRY) {
786*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
787*0Sstevel@tonic-gate 		error = EBUSY;
788*0Sstevel@tonic-gate 		goto out;
789*0Sstevel@tonic-gate 	}
790*0Sstevel@tonic-gate 	/*
791*0Sstevel@tonic-gate 	 * Block for the lock.
792*0Sstevel@tonic-gate 	 * Put the lwp in an orderly state for debugging.
793*0Sstevel@tonic-gate 	 * Calling prstop() has to be done here, and not in
794*0Sstevel@tonic-gate 	 * turnstile_block(), since the preceding call to
795*0Sstevel@tonic-gate 	 * turnstile_lookup() raises the PIL to a level
796*0Sstevel@tonic-gate 	 * at which calls to prstop() should not be made.
797*0Sstevel@tonic-gate 	 */
798*0Sstevel@tonic-gate 	if ((error = lwptp->lwpt_time_error) != 0) {
799*0Sstevel@tonic-gate 		/*
800*0Sstevel@tonic-gate 		 * The SUSV3 Posix spec is very clear that we
801*0Sstevel@tonic-gate 		 * should get no error from validating the
802*0Sstevel@tonic-gate 		 * timer until we would actually sleep.
803*0Sstevel@tonic-gate 		 */
804*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
805*0Sstevel@tonic-gate 		goto out;
806*0Sstevel@tonic-gate 	}
807*0Sstevel@tonic-gate 	prstop(PR_REQUESTED, 0);
808*0Sstevel@tonic-gate 	if (lwptp->lwpt_tsp != NULL) {
809*0Sstevel@tonic-gate 		/*
810*0Sstevel@tonic-gate 		 * If we successfully queue the timeout
811*0Sstevel@tonic-gate 		 * (lwp_timer_enqueue() returns zero),
812*0Sstevel@tonic-gate 		 * then don't drop t_delay_lock until we are
813*0Sstevel@tonic-gate 		 * on the sleep queue (in turnstile_block()).
814*0Sstevel@tonic-gate 		 * Otherwise we will get an immediate timeout
815*0Sstevel@tonic-gate 		 * when we attempt to sleep in turnstile_block().
816*0Sstevel@tonic-gate 		 */
817*0Sstevel@tonic-gate 		mutex_enter(&curthread->t_delay_lock);
818*0Sstevel@tonic-gate 		if (lwp_timer_enqueue(lwptp) != 0)
819*0Sstevel@tonic-gate 			mutex_exit(&curthread->t_delay_lock);
820*0Sstevel@tonic-gate 	}
821*0Sstevel@tonic-gate 	/*
822*0Sstevel@tonic-gate 	 * Now, set the waiter bit and block for the lock in turnstile_block().
823*0Sstevel@tonic-gate 	 * No need to preserve the previous wbit since a lock try is not
824*0Sstevel@tonic-gate 	 * attempted after setting the wait bit. Wait bit is set under
825*0Sstevel@tonic-gate 	 * the upib_lock, which is not released until the turnstile lock
826*0Sstevel@tonic-gate 	 * is acquired. Say, the upimutex is L:
827*0Sstevel@tonic-gate 	 *
828*0Sstevel@tonic-gate 	 * 1. upib_lock is held so the waiter does not have to retry L after
829*0Sstevel@tonic-gate 	 *    setting the wait bit: since the owner has to grab the upib_lock
830*0Sstevel@tonic-gate 	 *    to unlock L, it will certainly see the wait bit set.
831*0Sstevel@tonic-gate 	 * 2. upib_lock is not released until the turnstile lock is acquired.
832*0Sstevel@tonic-gate 	 *    This is the key to preventing a missed wake-up. Otherwise, the
833*0Sstevel@tonic-gate 	 *    owner could acquire the upib_lock, and the tc_lock, to call
834*0Sstevel@tonic-gate 	 *    turnstile_wakeup(). All this, before the waiter gets tc_lock
835*0Sstevel@tonic-gate 	 *    to sleep in turnstile_block(). turnstile_wakeup() will then not
836*0Sstevel@tonic-gate 	 *    find this waiter, resulting in the missed wakeup.
837*0Sstevel@tonic-gate 	 * 3. The upib_lock, being a kernel mutex, cannot be released while
838*0Sstevel@tonic-gate 	 *    holding the tc_lock (since mutex_exit() could need to acquire
839*0Sstevel@tonic-gate 	 *    the same tc_lock)...and so is held when calling turnstile_block().
840*0Sstevel@tonic-gate 	 *    The address of upib_lock is passed to turnstile_block() which
841*0Sstevel@tonic-gate 	 *    releases it after releasing all turnstile locks, and before going
842*0Sstevel@tonic-gate 	 *    to sleep in swtch().
843*0Sstevel@tonic-gate 	 * 4. The waiter value cannot be a count of waiters, because a waiter
844*0Sstevel@tonic-gate 	 *    can be interrupted. The interrupt occurs under the tc_lock, at
845*0Sstevel@tonic-gate 	 *    which point, the upib_lock cannot be locked, to decrement waiter
846*0Sstevel@tonic-gate 	 *    count. So, just treat the waiter state as a bit, not a count.
847*0Sstevel@tonic-gate 	 */
848*0Sstevel@tonic-gate 	ts = turnstile_lookup((upimutex_t *)upimutex);
849*0Sstevel@tonic-gate 	upimutex->upi_waiter = 1;
850*0Sstevel@tonic-gate 	error = turnstile_block(ts, TS_WRITER_Q, (upimutex_t *)upimutex,
851*0Sstevel@tonic-gate 	    &lwp_sobj_pi_ops, &upibp->upib_lock, lwptp);
852*0Sstevel@tonic-gate 	/*
853*0Sstevel@tonic-gate 	 * Hand-off implies that we wakeup holding the lock, except when:
854*0Sstevel@tonic-gate 	 *	- deadlock is detected
855*0Sstevel@tonic-gate 	 *	- lock is not recoverable
856*0Sstevel@tonic-gate 	 *	- we got an interrupt or timeout
857*0Sstevel@tonic-gate 	 * If we wake up due to an interrupt or timeout, we may
858*0Sstevel@tonic-gate 	 * or may not be holding the lock due to mutex hand-off.
859*0Sstevel@tonic-gate 	 * Use lwp_upimutex_owned() to check if we do hold the lock.
860*0Sstevel@tonic-gate 	 */
861*0Sstevel@tonic-gate 	if (error != 0) {
862*0Sstevel@tonic-gate 		if ((error == EINTR || error == ETIME) &&
863*0Sstevel@tonic-gate 		    (upimutex = lwp_upimutex_owned(lp, type))) {
864*0Sstevel@tonic-gate 			/*
865*0Sstevel@tonic-gate 			 * Unlock and return - the re-startable syscall will
866*0Sstevel@tonic-gate 			 * try the lock again if we got EINTR.
867*0Sstevel@tonic-gate 			 */
868*0Sstevel@tonic-gate 			(void) upi_mylist_add((upimutex_t *)upimutex);
869*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
870*0Sstevel@tonic-gate 		}
871*0Sstevel@tonic-gate 		/*
872*0Sstevel@tonic-gate 		 * The only other possible error is EDEADLK.  If so, upimutex
873*0Sstevel@tonic-gate 		 * is valid, since its owner is deadlocked with curthread.
874*0Sstevel@tonic-gate 		 */
875*0Sstevel@tonic-gate 		ASSERT(error == EINTR || error == ETIME ||
876*0Sstevel@tonic-gate 		    (error == EDEADLK && !upi_owned((upimutex_t *)upimutex)));
877*0Sstevel@tonic-gate 		ASSERT(!lwp_upimutex_owned(lp, type));
878*0Sstevel@tonic-gate 		goto out;
879*0Sstevel@tonic-gate 	}
880*0Sstevel@tonic-gate 	if (lwp_upimutex_owned(lp, type)) {
881*0Sstevel@tonic-gate 		ASSERT(lwp_upimutex_owned(lp, type) == upimutex);
882*0Sstevel@tonic-gate 		nupinest = upi_mylist_add((upimutex_t *)upimutex);
883*0Sstevel@tonic-gate 		upilocked = 1;
884*0Sstevel@tonic-gate 	}
885*0Sstevel@tonic-gate 	/*
886*0Sstevel@tonic-gate 	 * Now, need to read the user-level lp->mutex_flag to do the following:
887*0Sstevel@tonic-gate 	 *
888*0Sstevel@tonic-gate 	 * - if lock is held, check if EOWNERDEAD should be returned
889*0Sstevel@tonic-gate 	 * - if lock isn't held, check if ENOTRECOVERABLE should be returned
890*0Sstevel@tonic-gate 	 *
891*0Sstevel@tonic-gate 	 * Now, either lp->mutex_flag is readable or it's not. If not
892*0Sstevel@tonic-gate 	 * readable, the on_fault path will cause a return with EFAULT as
893*0Sstevel@tonic-gate 	 * it should. If it is readable, the state of the flag encodes the
894*0Sstevel@tonic-gate 	 * robustness state of the lock:
895*0Sstevel@tonic-gate 	 *
896*0Sstevel@tonic-gate 	 * If the upimutex is locked here, the flag's LOCK_OWNERDEAD setting
897*0Sstevel@tonic-gate 	 * will influence the return code appropriately. If the upimutex is
898*0Sstevel@tonic-gate 	 * not locked here, this could be due to a spurious wake-up or a
899*0Sstevel@tonic-gate 	 * NOTRECOVERABLE event. The flag's setting can be used to distinguish
900*0Sstevel@tonic-gate 	 * between these two events.
901*0Sstevel@tonic-gate 	 */
902*0Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
903*0Sstevel@tonic-gate 	if (upilocked) {
904*0Sstevel@tonic-gate 		/*
905*0Sstevel@tonic-gate 		 * If the thread wakes up from turnstile_block with the lock
906*0Sstevel@tonic-gate 		 * held, the flag could not be set to LOCK_NOTRECOVERABLE,
907*0Sstevel@tonic-gate 		 * since it would not have been handed-off the lock.
908*0Sstevel@tonic-gate 		 * So, no need to check for this case.
909*0Sstevel@tonic-gate 		 */
910*0Sstevel@tonic-gate 		if (nupinest > maxnestupimx &&
911*0Sstevel@tonic-gate 		    secpolicy_resource(CRED()) != 0) {
912*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, flag);
913*0Sstevel@tonic-gate 			upilocked = 0;
914*0Sstevel@tonic-gate 			error = ENOMEM;
915*0Sstevel@tonic-gate 		} else if (flag & LOCK_OWNERDEAD) {
916*0Sstevel@tonic-gate 			error = EOWNERDEAD;
917*0Sstevel@tonic-gate 		}
918*0Sstevel@tonic-gate 	} else {
919*0Sstevel@tonic-gate 		/*
920*0Sstevel@tonic-gate 		 * Wake-up without the upimutex held. Either this is a
921*0Sstevel@tonic-gate 		 * spurious wake-up (due to signals, forkall(), whatever), or
922*0Sstevel@tonic-gate 		 * it is a LOCK_NOTRECOVERABLE robustness event. The setting
923*0Sstevel@tonic-gate 		 * of the mutex flag can be used to distinguish between the
924*0Sstevel@tonic-gate 		 * two events.
925*0Sstevel@tonic-gate 		 */
926*0Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
927*0Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
928*0Sstevel@tonic-gate 		} else {
929*0Sstevel@tonic-gate 			/*
930*0Sstevel@tonic-gate 			 * Here, the flag could be set to LOCK_OWNERDEAD or
931*0Sstevel@tonic-gate 			 * not. In both cases, this is a spurious wakeup,
932*0Sstevel@tonic-gate 			 * since the upi lock is not held, but the thread
933*0Sstevel@tonic-gate 			 * has returned from turnstile_block().
934*0Sstevel@tonic-gate 			 *
935*0Sstevel@tonic-gate 			 * The user flag could be LOCK_OWNERDEAD if, at the
936*0Sstevel@tonic-gate 			 * same time as curthread having been woken up
937*0Sstevel@tonic-gate 			 * spuriously, the owner (say Tdead) has died, marked
938*0Sstevel@tonic-gate 			 * the mutex flag accordingly, and handed off the lock
939*0Sstevel@tonic-gate 			 * to some other waiter (say Tnew). curthread just
940*0Sstevel@tonic-gate 			 * happened to read the flag while Tnew has yet to deal
941*0Sstevel@tonic-gate 			 * with the owner-dead event.
942*0Sstevel@tonic-gate 			 *
943*0Sstevel@tonic-gate 			 * In this event, curthread should retry the lock.
944*0Sstevel@tonic-gate 			 * If Tnew is able to cleanup the lock, curthread
945*0Sstevel@tonic-gate 			 * will eventually get the lock with a zero error code,
946*0Sstevel@tonic-gate 			 * If Tnew is unable to cleanup, its eventual call to
947*0Sstevel@tonic-gate 			 * unlock the lock will result in the mutex flag being
948*0Sstevel@tonic-gate 			 * set to LOCK_NOTRECOVERABLE, and the wake-up of
949*0Sstevel@tonic-gate 			 * all waiters, including curthread, which will then
950*0Sstevel@tonic-gate 			 * eventually return ENOTRECOVERABLE due to the above
951*0Sstevel@tonic-gate 			 * check.
952*0Sstevel@tonic-gate 			 *
953*0Sstevel@tonic-gate 			 * Of course, if the user-flag is not set with
954*0Sstevel@tonic-gate 			 * LOCK_OWNERDEAD, retrying is the thing to do, since
955*0Sstevel@tonic-gate 			 * this is definitely a spurious wakeup.
956*0Sstevel@tonic-gate 			 */
957*0Sstevel@tonic-gate 			goto retry;
958*0Sstevel@tonic-gate 		}
959*0Sstevel@tonic-gate 	}
960*0Sstevel@tonic-gate 
961*0Sstevel@tonic-gate out:
962*0Sstevel@tonic-gate 	no_fault();
963*0Sstevel@tonic-gate 	return (error);
964*0Sstevel@tonic-gate }
965*0Sstevel@tonic-gate 
966*0Sstevel@tonic-gate 
967*0Sstevel@tonic-gate static int
968*0Sstevel@tonic-gate lwp_upimutex_unlock(lwp_mutex_t *lp, uint8_t type)
969*0Sstevel@tonic-gate {
970*0Sstevel@tonic-gate 	label_t ljb;
971*0Sstevel@tonic-gate 	int error = 0;
972*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
973*0Sstevel@tonic-gate 	uint16_t flag;
974*0Sstevel@tonic-gate 	upib_t *upibp;
975*0Sstevel@tonic-gate 	volatile struct upimutex *upimutex = NULL;
976*0Sstevel@tonic-gate 	volatile int upilocked = 0;
977*0Sstevel@tonic-gate 
978*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
979*0Sstevel@tonic-gate 		if (upilocked)
980*0Sstevel@tonic-gate 			upimutex_unlock((upimutex_t *)upimutex, 0);
981*0Sstevel@tonic-gate 		error = EFAULT;
982*0Sstevel@tonic-gate 		goto out;
983*0Sstevel@tonic-gate 	}
984*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
985*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
986*0Sstevel@tonic-gate 		error = EFAULT;
987*0Sstevel@tonic-gate 		goto out;
988*0Sstevel@tonic-gate 	}
989*0Sstevel@tonic-gate 	upibp = &UPI_CHAIN(lwpchan);
990*0Sstevel@tonic-gate 	mutex_enter(&upibp->upib_lock);
991*0Sstevel@tonic-gate 	upimutex = upi_get(upibp, &lwpchan);
992*0Sstevel@tonic-gate 	/*
993*0Sstevel@tonic-gate 	 * If the lock is not held, or the owner is not curthread, return
994*0Sstevel@tonic-gate 	 * error. The user-level wrapper can return this error or stall,
995*0Sstevel@tonic-gate 	 * depending on whether mutex is of ERRORCHECK type or not.
996*0Sstevel@tonic-gate 	 */
997*0Sstevel@tonic-gate 	if (upimutex == NULL || upimutex->upi_owner != curthread) {
998*0Sstevel@tonic-gate 		mutex_exit(&upibp->upib_lock);
999*0Sstevel@tonic-gate 		error = EPERM;
1000*0Sstevel@tonic-gate 		goto out;
1001*0Sstevel@tonic-gate 	}
1002*0Sstevel@tonic-gate 	mutex_exit(&upibp->upib_lock); /* release for user memory access */
1003*0Sstevel@tonic-gate 	upilocked = 1;
1004*0Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
1005*0Sstevel@tonic-gate 	if (flag & LOCK_OWNERDEAD) {
1006*0Sstevel@tonic-gate 		/*
1007*0Sstevel@tonic-gate 		 * transition mutex to the LOCK_NOTRECOVERABLE state.
1008*0Sstevel@tonic-gate 		 */
1009*0Sstevel@tonic-gate 		flag &= ~LOCK_OWNERDEAD;
1010*0Sstevel@tonic-gate 		flag |= LOCK_NOTRECOVERABLE;
1011*0Sstevel@tonic-gate 		suword16_noerr(&lp->mutex_flag, flag);
1012*0Sstevel@tonic-gate 	}
1013*0Sstevel@tonic-gate 	upimutex_unlock((upimutex_t *)upimutex, flag);
1014*0Sstevel@tonic-gate 	upilocked = 0;
1015*0Sstevel@tonic-gate out:
1016*0Sstevel@tonic-gate 	no_fault();
1017*0Sstevel@tonic-gate 	return (error);
1018*0Sstevel@tonic-gate }
1019*0Sstevel@tonic-gate 
1020*0Sstevel@tonic-gate /*
1021*0Sstevel@tonic-gate  * Mark user mutex state, corresponding to kernel upimutex, as LOCK_OWNERDEAD.
1022*0Sstevel@tonic-gate  */
1023*0Sstevel@tonic-gate static int
1024*0Sstevel@tonic-gate upi_dead(upimutex_t *upip)
1025*0Sstevel@tonic-gate {
1026*0Sstevel@tonic-gate 	label_t ljb;
1027*0Sstevel@tonic-gate 	int error = 0;
1028*0Sstevel@tonic-gate 	lwp_mutex_t *lp;
1029*0Sstevel@tonic-gate 	uint16_t flag;
1030*0Sstevel@tonic-gate 
1031*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1032*0Sstevel@tonic-gate 		error = EFAULT;
1033*0Sstevel@tonic-gate 		goto out;
1034*0Sstevel@tonic-gate 	}
1035*0Sstevel@tonic-gate 
1036*0Sstevel@tonic-gate 	lp = upip->upi_vaddr;
1037*0Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
1038*0Sstevel@tonic-gate 	flag |= LOCK_OWNERDEAD;
1039*0Sstevel@tonic-gate 	suword16_noerr(&lp->mutex_flag, flag);
1040*0Sstevel@tonic-gate out:
1041*0Sstevel@tonic-gate 	no_fault();
1042*0Sstevel@tonic-gate 	return (error);
1043*0Sstevel@tonic-gate }
1044*0Sstevel@tonic-gate 
1045*0Sstevel@tonic-gate /*
1046*0Sstevel@tonic-gate  * Unlock all upimutexes held by curthread, since curthread is dying.
1047*0Sstevel@tonic-gate  * For each upimutex, attempt to mark its corresponding user mutex object as
1048*0Sstevel@tonic-gate  * dead.
1049*0Sstevel@tonic-gate  */
1050*0Sstevel@tonic-gate void
1051*0Sstevel@tonic-gate upimutex_cleanup()
1052*0Sstevel@tonic-gate {
1053*0Sstevel@tonic-gate 	kthread_t *t = curthread;
1054*0Sstevel@tonic-gate 	struct upimutex *upip;
1055*0Sstevel@tonic-gate 
1056*0Sstevel@tonic-gate 	while ((upip = t->t_upimutex) != NULL) {
1057*0Sstevel@tonic-gate 		if (upi_dead(upip) != 0) {
1058*0Sstevel@tonic-gate 			/*
1059*0Sstevel@tonic-gate 			 * If the user object associated with this upimutex is
1060*0Sstevel@tonic-gate 			 * unmapped, unlock upimutex with the
1061*0Sstevel@tonic-gate 			 * LOCK_NOTRECOVERABLE flag, so that all waiters are
1062*0Sstevel@tonic-gate 			 * woken up. Since user object is unmapped, it could
1063*0Sstevel@tonic-gate 			 * not be marked as dead or notrecoverable.
1064*0Sstevel@tonic-gate 			 * The waiters will now all wake up and return
1065*0Sstevel@tonic-gate 			 * ENOTRECOVERABLE, since they would find that the lock
1066*0Sstevel@tonic-gate 			 * has not been handed-off to them.
1067*0Sstevel@tonic-gate 			 * See lwp_upimutex_lock().
1068*0Sstevel@tonic-gate 			 */
1069*0Sstevel@tonic-gate 			upimutex_unlock(upip, LOCK_NOTRECOVERABLE);
1070*0Sstevel@tonic-gate 		} else {
1071*0Sstevel@tonic-gate 			/*
1072*0Sstevel@tonic-gate 			 * The user object has been updated as dead.
1073*0Sstevel@tonic-gate 			 * Unlock the upimutex: if no waiters, upip kmem will
1074*0Sstevel@tonic-gate 			 * be freed. If there is a waiter, the lock will be
1075*0Sstevel@tonic-gate 			 * handed off. If exit() is in progress, each existing
1076*0Sstevel@tonic-gate 			 * waiter will successively get the lock, as owners
1077*0Sstevel@tonic-gate 			 * die, and each new owner will call this routine as
1078*0Sstevel@tonic-gate 			 * it dies. The last owner will free kmem, since
1079*0Sstevel@tonic-gate 			 * it will find the upimutex has no waiters. So,
1080*0Sstevel@tonic-gate 			 * eventually, the kmem is guaranteed to be freed.
1081*0Sstevel@tonic-gate 			 */
1082*0Sstevel@tonic-gate 			upimutex_unlock(upip, 0);
1083*0Sstevel@tonic-gate 		}
1084*0Sstevel@tonic-gate 		/*
1085*0Sstevel@tonic-gate 		 * Note that the call to upimutex_unlock() above will delete
1086*0Sstevel@tonic-gate 		 * upimutex from the t_upimutexes chain. And so the
1087*0Sstevel@tonic-gate 		 * while loop will eventually terminate.
1088*0Sstevel@tonic-gate 		 */
1089*0Sstevel@tonic-gate 	}
1090*0Sstevel@tonic-gate }
1091*0Sstevel@tonic-gate 
1092*0Sstevel@tonic-gate int
1093*0Sstevel@tonic-gate lwp_mutex_timedlock(lwp_mutex_t *lp, timespec_t *tsp)
1094*0Sstevel@tonic-gate {
1095*0Sstevel@tonic-gate 	kthread_t *t = curthread;
1096*0Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
1097*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
1098*0Sstevel@tonic-gate 	lwp_timer_t lwpt;
1099*0Sstevel@tonic-gate 	caddr_t timedwait;
1100*0Sstevel@tonic-gate 	int error = 0;
1101*0Sstevel@tonic-gate 	int time_error;
1102*0Sstevel@tonic-gate 	clock_t tim = -1;
1103*0Sstevel@tonic-gate 	uchar_t waiters;
1104*0Sstevel@tonic-gate 	volatile int locked = 0;
1105*0Sstevel@tonic-gate 	volatile int watched = 0;
1106*0Sstevel@tonic-gate 	label_t ljb;
1107*0Sstevel@tonic-gate 	volatile uint8_t type = 0;
1108*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1109*0Sstevel@tonic-gate 	sleepq_head_t *sqh;
1110*0Sstevel@tonic-gate 	static int iswanted();
1111*0Sstevel@tonic-gate 	uint16_t flag;
1112*0Sstevel@tonic-gate 	int imm_timeout = 0;
1113*0Sstevel@tonic-gate 
1114*0Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
1115*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1116*0Sstevel@tonic-gate 
1117*0Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
1118*0Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
1119*0Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
1120*0Sstevel@tonic-gate 		imm_timeout = 1;
1121*0Sstevel@tonic-gate 		timedwait = NULL;
1122*0Sstevel@tonic-gate 	}
1123*0Sstevel@tonic-gate 
1124*0Sstevel@tonic-gate 	/*
1125*0Sstevel@tonic-gate 	 * Although LMS_USER_LOCK implies "asleep waiting for user-mode lock",
1126*0Sstevel@tonic-gate 	 * this micro state is really a run state. If the thread indeed blocks,
1127*0Sstevel@tonic-gate 	 * this state becomes valid. If not, the state is converted back to
1128*0Sstevel@tonic-gate 	 * LMS_SYSTEM. So, it is OK to set the mstate here, instead of just
1129*0Sstevel@tonic-gate 	 * when blocking.
1130*0Sstevel@tonic-gate 	 */
1131*0Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
1132*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1133*0Sstevel@tonic-gate 		if (locked)
1134*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1135*0Sstevel@tonic-gate 		error = EFAULT;
1136*0Sstevel@tonic-gate 		goto out;
1137*0Sstevel@tonic-gate 	}
1138*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
1139*0Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
1140*0Sstevel@tonic-gate 		no_fault();
1141*0Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_BLOCK, &lwpt);
1142*0Sstevel@tonic-gate 		if ((error == 0 || error == EOWNERDEAD) &&
1143*0Sstevel@tonic-gate 		    (type & USYNC_PROCESS))
1144*0Sstevel@tonic-gate 			(void) suword32(&lp->mutex_ownerpid, p->p_pid);
1145*0Sstevel@tonic-gate 		if (tsp && !time_error)	/* copyout the residual time left */
1146*0Sstevel@tonic-gate 			error = lwp_timer_copyout(&lwpt, error);
1147*0Sstevel@tonic-gate 		if (error)
1148*0Sstevel@tonic-gate 			return (set_errno(error));
1149*0Sstevel@tonic-gate 		return (0);
1150*0Sstevel@tonic-gate 	}
1151*0Sstevel@tonic-gate 	/*
1152*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_mutex_t object is
1153*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE and it was initialized to
1154*0Sstevel@tonic-gate 	 * USYNC_PROCESS.
1155*0Sstevel@tonic-gate 	 */
1156*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
1157*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
1158*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
1159*0Sstevel@tonic-gate 		error = EFAULT;
1160*0Sstevel@tonic-gate 		goto out;
1161*0Sstevel@tonic-gate 	}
1162*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1163*0Sstevel@tonic-gate 	locked = 1;
1164*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_waiters, &waiters);
1165*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_waiters, 1);
1166*0Sstevel@tonic-gate 	if (type & USYNC_PROCESS_ROBUST) {
1167*0Sstevel@tonic-gate 		fuword16_noerr(&lp->mutex_flag, &flag);
1168*0Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
1169*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1170*0Sstevel@tonic-gate 			error = ENOTRECOVERABLE;
1171*0Sstevel@tonic-gate 			goto out;
1172*0Sstevel@tonic-gate 		}
1173*0Sstevel@tonic-gate 	}
1174*0Sstevel@tonic-gate 
1175*0Sstevel@tonic-gate 	/*
1176*0Sstevel@tonic-gate 	 * If watchpoints are set, they need to be restored, since
1177*0Sstevel@tonic-gate 	 * atomic accesses of memory such as the call to ulock_try()
1178*0Sstevel@tonic-gate 	 * below cannot be watched.
1179*0Sstevel@tonic-gate 	 */
1180*0Sstevel@tonic-gate 
1181*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1182*0Sstevel@tonic-gate 
1183*0Sstevel@tonic-gate 	while (!ulock_try(&lp->mutex_lockw)) {
1184*0Sstevel@tonic-gate 		if (time_error) {
1185*0Sstevel@tonic-gate 			/*
1186*0Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
1187*0Sstevel@tonic-gate 			 * should get no error from validating the
1188*0Sstevel@tonic-gate 			 * timer until we would actually sleep.
1189*0Sstevel@tonic-gate 			 */
1190*0Sstevel@tonic-gate 			error = time_error;
1191*0Sstevel@tonic-gate 			break;
1192*0Sstevel@tonic-gate 		}
1193*0Sstevel@tonic-gate 
1194*0Sstevel@tonic-gate 		if (watched) {
1195*0Sstevel@tonic-gate 			watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1196*0Sstevel@tonic-gate 			watched = 0;
1197*0Sstevel@tonic-gate 		}
1198*0Sstevel@tonic-gate 
1199*0Sstevel@tonic-gate 		/*
1200*0Sstevel@tonic-gate 		 * Put the lwp in an orderly state for debugging.
1201*0Sstevel@tonic-gate 		 */
1202*0Sstevel@tonic-gate 		prstop(PR_REQUESTED, 0);
1203*0Sstevel@tonic-gate 		if (timedwait) {
1204*0Sstevel@tonic-gate 			/*
1205*0Sstevel@tonic-gate 			 * If we successfully queue the timeout,
1206*0Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
1207*0Sstevel@tonic-gate 			 * we are on the sleep queue (below).
1208*0Sstevel@tonic-gate 			 */
1209*0Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
1210*0Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
1211*0Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
1212*0Sstevel@tonic-gate 				imm_timeout = 1;
1213*0Sstevel@tonic-gate 				timedwait = NULL;
1214*0Sstevel@tonic-gate 			}
1215*0Sstevel@tonic-gate 		}
1216*0Sstevel@tonic-gate 		lwp_block(&lwpchan);
1217*0Sstevel@tonic-gate 		/*
1218*0Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to go to
1219*0Sstevel@tonic-gate 		 * sleep again until after it returns from swtch().
1220*0Sstevel@tonic-gate 		 */
1221*0Sstevel@tonic-gate 		if (timedwait)
1222*0Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
1223*0Sstevel@tonic-gate 		locked = 0;
1224*0Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1225*0Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) || imm_timeout)
1226*0Sstevel@tonic-gate 			setrun(t);
1227*0Sstevel@tonic-gate 		swtch();
1228*0Sstevel@tonic-gate 		t->t_flag &= ~T_WAKEABLE;
1229*0Sstevel@tonic-gate 		if (timedwait)
1230*0Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
1231*0Sstevel@tonic-gate 		setallwatch();
1232*0Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
1233*0Sstevel@tonic-gate 			error = EINTR;
1234*0Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
1235*0Sstevel@tonic-gate 			error = ETIME;
1236*0Sstevel@tonic-gate 		if (error) {
1237*0Sstevel@tonic-gate 			lwp->lwp_asleep = 0;
1238*0Sstevel@tonic-gate 			lwp->lwp_sysabort = 0;
1239*0Sstevel@tonic-gate 			watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
1240*0Sstevel@tonic-gate 			    S_WRITE);
1241*0Sstevel@tonic-gate 
1242*0Sstevel@tonic-gate 			/*
1243*0Sstevel@tonic-gate 			 * Need to re-compute waiters bit. The waiters field in
1244*0Sstevel@tonic-gate 			 * the lock is not reliable. Either of two things could
1245*0Sstevel@tonic-gate 			 * have occurred: no lwp may have called lwp_release()
1246*0Sstevel@tonic-gate 			 * for me but I have woken up due to a signal or
1247*0Sstevel@tonic-gate 			 * timeout.  In this case, the waiter bit is incorrect
1248*0Sstevel@tonic-gate 			 * since it is still set to 1, set above.
1249*0Sstevel@tonic-gate 			 * OR an lwp_release() did occur for some other lwp on
1250*0Sstevel@tonic-gate 			 * the same lwpchan. In this case, the waiter bit is
1251*0Sstevel@tonic-gate 			 * correct.  But which event occurred, one can't tell.
1252*0Sstevel@tonic-gate 			 * So, recompute.
1253*0Sstevel@tonic-gate 			 */
1254*0Sstevel@tonic-gate 			lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1255*0Sstevel@tonic-gate 			locked = 1;
1256*0Sstevel@tonic-gate 			sqh = lwpsqhash(&lwpchan);
1257*0Sstevel@tonic-gate 			disp_lock_enter(&sqh->sq_lock);
1258*0Sstevel@tonic-gate 			waiters = iswanted(sqh->sq_queue.sq_first, &lwpchan);
1259*0Sstevel@tonic-gate 			disp_lock_exit(&sqh->sq_lock);
1260*0Sstevel@tonic-gate 			break;
1261*0Sstevel@tonic-gate 		}
1262*0Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
1263*0Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)lp, sizeof (*lp),
1264*0Sstevel@tonic-gate 		    S_WRITE);
1265*0Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1266*0Sstevel@tonic-gate 		locked = 1;
1267*0Sstevel@tonic-gate 		fuword8_noerr(&lp->mutex_waiters, &waiters);
1268*0Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, 1);
1269*0Sstevel@tonic-gate 		if (type & USYNC_PROCESS_ROBUST) {
1270*0Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
1271*0Sstevel@tonic-gate 			if (flag & LOCK_NOTRECOVERABLE) {
1272*0Sstevel@tonic-gate 				error = ENOTRECOVERABLE;
1273*0Sstevel@tonic-gate 				break;
1274*0Sstevel@tonic-gate 			}
1275*0Sstevel@tonic-gate 		}
1276*0Sstevel@tonic-gate 	}
1277*0Sstevel@tonic-gate 
1278*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
1279*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
1280*0Sstevel@tonic-gate 
1281*0Sstevel@tonic-gate 	if (!error && (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))) {
1282*0Sstevel@tonic-gate 		suword32_noerr(&lp->mutex_ownerpid, p->p_pid);
1283*0Sstevel@tonic-gate 		if (type & USYNC_PROCESS_ROBUST) {
1284*0Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
1285*0Sstevel@tonic-gate 			if (flag & LOCK_OWNERDEAD)
1286*0Sstevel@tonic-gate 				error = EOWNERDEAD;
1287*0Sstevel@tonic-gate 			else if (flag & LOCK_UNMAPPED)
1288*0Sstevel@tonic-gate 				error = ELOCKUNMAPPED;
1289*0Sstevel@tonic-gate 		}
1290*0Sstevel@tonic-gate 	}
1291*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_waiters, waiters);
1292*0Sstevel@tonic-gate 	locked = 0;
1293*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1294*0Sstevel@tonic-gate out:
1295*0Sstevel@tonic-gate 	no_fault();
1296*0Sstevel@tonic-gate 	if (watched)
1297*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1298*0Sstevel@tonic-gate 	if (tsp && !time_error)		/* copyout the residual time left */
1299*0Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
1300*0Sstevel@tonic-gate 	if (error)
1301*0Sstevel@tonic-gate 		return (set_errno(error));
1302*0Sstevel@tonic-gate 	return (0);
1303*0Sstevel@tonic-gate }
1304*0Sstevel@tonic-gate 
1305*0Sstevel@tonic-gate /*
1306*0Sstevel@tonic-gate  * Obsolete lwp_mutex_lock() interface, no longer called from libc.
1307*0Sstevel@tonic-gate  * libc now calls lwp_mutex_timedlock(lp, NULL).
1308*0Sstevel@tonic-gate  * This system call trap continues to exist solely for the benefit
1309*0Sstevel@tonic-gate  * of old statically-linked binaries from Solaris 9 and before.
1310*0Sstevel@tonic-gate  * It should be removed from the system when we no longer care
1311*0Sstevel@tonic-gate  * about such applications.
1312*0Sstevel@tonic-gate  */
1313*0Sstevel@tonic-gate int
1314*0Sstevel@tonic-gate lwp_mutex_lock(lwp_mutex_t *lp)
1315*0Sstevel@tonic-gate {
1316*0Sstevel@tonic-gate 	return (lwp_mutex_timedlock(lp, NULL));
1317*0Sstevel@tonic-gate }
1318*0Sstevel@tonic-gate 
1319*0Sstevel@tonic-gate static int
1320*0Sstevel@tonic-gate iswanted(kthread_t *t, lwpchan_t *lwpchan)
1321*0Sstevel@tonic-gate {
1322*0Sstevel@tonic-gate 	/*
1323*0Sstevel@tonic-gate 	 * The caller holds the dispatcher lock on the sleep queue.
1324*0Sstevel@tonic-gate 	 */
1325*0Sstevel@tonic-gate 	while (t != NULL) {
1326*0Sstevel@tonic-gate 		if (t->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1327*0Sstevel@tonic-gate 		    t->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
1328*0Sstevel@tonic-gate 			return (1);
1329*0Sstevel@tonic-gate 		t = t->t_link;
1330*0Sstevel@tonic-gate 	}
1331*0Sstevel@tonic-gate 	return (0);
1332*0Sstevel@tonic-gate }
1333*0Sstevel@tonic-gate 
1334*0Sstevel@tonic-gate /*
1335*0Sstevel@tonic-gate  * Return the highest priority thread sleeping on this lwpchan.
1336*0Sstevel@tonic-gate  */
1337*0Sstevel@tonic-gate static kthread_t *
1338*0Sstevel@tonic-gate lwp_queue_waiter(lwpchan_t *lwpchan)
1339*0Sstevel@tonic-gate {
1340*0Sstevel@tonic-gate 	sleepq_head_t *sqh;
1341*0Sstevel@tonic-gate 	kthread_t *tp;
1342*0Sstevel@tonic-gate 
1343*0Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
1344*0Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
1345*0Sstevel@tonic-gate 	for (tp = sqh->sq_queue.sq_first; tp != NULL; tp = tp->t_link) {
1346*0Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1347*0Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan)
1348*0Sstevel@tonic-gate 			break;
1349*0Sstevel@tonic-gate 	}
1350*0Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
1351*0Sstevel@tonic-gate 	return (tp);
1352*0Sstevel@tonic-gate }
1353*0Sstevel@tonic-gate 
1354*0Sstevel@tonic-gate static int
1355*0Sstevel@tonic-gate lwp_release(lwpchan_t *lwpchan, uchar_t *waiters, int sync_type)
1356*0Sstevel@tonic-gate {
1357*0Sstevel@tonic-gate 	sleepq_head_t *sqh;
1358*0Sstevel@tonic-gate 	kthread_t *tp;
1359*0Sstevel@tonic-gate 	kthread_t **tpp;
1360*0Sstevel@tonic-gate 
1361*0Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
1362*0Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock the sleep queue */
1363*0Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
1364*0Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
1365*0Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1366*0Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
1367*0Sstevel@tonic-gate 			/*
1368*0Sstevel@tonic-gate 			 * The following is typically false. It could be true
1369*0Sstevel@tonic-gate 			 * only if lwp_release() is called from
1370*0Sstevel@tonic-gate 			 * lwp_mutex_wakeup() after reading the waiters field
1371*0Sstevel@tonic-gate 			 * from memory in which the lwp lock used to be, but has
1372*0Sstevel@tonic-gate 			 * since been re-used to hold a lwp cv or lwp semaphore.
1373*0Sstevel@tonic-gate 			 * The thread "tp" found to match the lwp lock's wchan
1374*0Sstevel@tonic-gate 			 * is actually sleeping for the cv or semaphore which
1375*0Sstevel@tonic-gate 			 * now has the same wchan. In this case, lwp_release()
1376*0Sstevel@tonic-gate 			 * should return failure.
1377*0Sstevel@tonic-gate 			 */
1378*0Sstevel@tonic-gate 			if (sync_type != (tp->t_flag & T_WAITCVSEM)) {
1379*0Sstevel@tonic-gate 				ASSERT(sync_type == 0);
1380*0Sstevel@tonic-gate 				/*
1381*0Sstevel@tonic-gate 				 * assert that this can happen only for mutexes
1382*0Sstevel@tonic-gate 				 * i.e. sync_type == 0, for correctly written
1383*0Sstevel@tonic-gate 				 * user programs.
1384*0Sstevel@tonic-gate 				 */
1385*0Sstevel@tonic-gate 				disp_lock_exit(&sqh->sq_lock);
1386*0Sstevel@tonic-gate 				return (0);
1387*0Sstevel@tonic-gate 			}
1388*0Sstevel@tonic-gate 			*waiters = iswanted(tp->t_link, lwpchan);
1389*0Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
1390*0Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
1391*0Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
1392*0Sstevel@tonic-gate 			tp->t_wchan = NULL;
1393*0Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
1394*0Sstevel@tonic-gate 			tp->t_release = 1;
1395*0Sstevel@tonic-gate 			THREAD_TRANSITION(tp);	/* drops sleepq lock */
1396*0Sstevel@tonic-gate 			CL_WAKEUP(tp);
1397*0Sstevel@tonic-gate 			thread_unlock(tp);	/* drop run queue lock */
1398*0Sstevel@tonic-gate 			return (1);
1399*0Sstevel@tonic-gate 		}
1400*0Sstevel@tonic-gate 		tpp = &tp->t_link;
1401*0Sstevel@tonic-gate 	}
1402*0Sstevel@tonic-gate 	*waiters = 0;
1403*0Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
1404*0Sstevel@tonic-gate 	return (0);
1405*0Sstevel@tonic-gate }
1406*0Sstevel@tonic-gate 
1407*0Sstevel@tonic-gate static void
1408*0Sstevel@tonic-gate lwp_release_all(lwpchan_t *lwpchan)
1409*0Sstevel@tonic-gate {
1410*0Sstevel@tonic-gate 	sleepq_head_t	*sqh;
1411*0Sstevel@tonic-gate 	kthread_t *tp;
1412*0Sstevel@tonic-gate 	kthread_t **tpp;
1413*0Sstevel@tonic-gate 
1414*0Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
1415*0Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);		/* lock sleep q queue */
1416*0Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
1417*0Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
1418*0Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
1419*0Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
1420*0Sstevel@tonic-gate 			sleepq_unlink(tpp, tp);
1421*0Sstevel@tonic-gate 			DTRACE_SCHED1(wakeup, kthread_t *, tp);
1422*0Sstevel@tonic-gate 			tp->t_wchan0 = NULL;
1423*0Sstevel@tonic-gate 			tp->t_wchan = NULL;
1424*0Sstevel@tonic-gate 			tp->t_sobj_ops = NULL;
1425*0Sstevel@tonic-gate 			CL_WAKEUP(tp);
1426*0Sstevel@tonic-gate 			thread_unlock_high(tp);	/* release run queue lock */
1427*0Sstevel@tonic-gate 		} else {
1428*0Sstevel@tonic-gate 			tpp = &tp->t_link;
1429*0Sstevel@tonic-gate 		}
1430*0Sstevel@tonic-gate 	}
1431*0Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);		/* drop sleep q lock */
1432*0Sstevel@tonic-gate }
1433*0Sstevel@tonic-gate 
1434*0Sstevel@tonic-gate /*
1435*0Sstevel@tonic-gate  * unblock a lwp that is trying to acquire this mutex. the blocked
1436*0Sstevel@tonic-gate  * lwp resumes and retries to acquire the lock.
1437*0Sstevel@tonic-gate  */
1438*0Sstevel@tonic-gate int
1439*0Sstevel@tonic-gate lwp_mutex_wakeup(lwp_mutex_t *lp)
1440*0Sstevel@tonic-gate {
1441*0Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
1442*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1443*0Sstevel@tonic-gate 	uchar_t waiters;
1444*0Sstevel@tonic-gate 	volatile int locked = 0;
1445*0Sstevel@tonic-gate 	volatile int watched = 0;
1446*0Sstevel@tonic-gate 	volatile uint8_t type = 0;
1447*0Sstevel@tonic-gate 	label_t ljb;
1448*0Sstevel@tonic-gate 	int error = 0;
1449*0Sstevel@tonic-gate 
1450*0Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
1451*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1452*0Sstevel@tonic-gate 
1453*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1454*0Sstevel@tonic-gate 
1455*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1456*0Sstevel@tonic-gate 		if (locked)
1457*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1458*0Sstevel@tonic-gate 		error = EFAULT;
1459*0Sstevel@tonic-gate 		goto out;
1460*0Sstevel@tonic-gate 	}
1461*0Sstevel@tonic-gate 	/*
1462*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_mutex_t object is
1463*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS
1464*0Sstevel@tonic-gate 	 */
1465*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
1466*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
1467*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
1468*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
1469*0Sstevel@tonic-gate 		error = EFAULT;
1470*0Sstevel@tonic-gate 		goto out;
1471*0Sstevel@tonic-gate 	}
1472*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
1473*0Sstevel@tonic-gate 	locked = 1;
1474*0Sstevel@tonic-gate 	/*
1475*0Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
1476*0Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
1477*0Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
1478*0Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
1479*0Sstevel@tonic-gate 	 *
1480*0Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
1481*0Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
1482*0Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
1483*0Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
1484*0Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
1485*0Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
1486*0Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
1487*0Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
1488*0Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
1489*0Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
1490*0Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
1491*0Sstevel@tonic-gate 	 *	   corruption.
1492*0Sstevel@tonic-gate 	 */
1493*0Sstevel@tonic-gate 	if (lwp_release(&lwpchan, &waiters, 0) == 1) {
1494*0Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, waiters);
1495*0Sstevel@tonic-gate 	}
1496*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
1497*0Sstevel@tonic-gate out:
1498*0Sstevel@tonic-gate 	no_fault();
1499*0Sstevel@tonic-gate 	if (watched)
1500*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
1501*0Sstevel@tonic-gate 	if (error)
1502*0Sstevel@tonic-gate 		return (set_errno(error));
1503*0Sstevel@tonic-gate 	return (0);
1504*0Sstevel@tonic-gate }
1505*0Sstevel@tonic-gate 
1506*0Sstevel@tonic-gate /*
1507*0Sstevel@tonic-gate  * lwp_cond_wait() has four arguments, a pointer to a condition variable,
1508*0Sstevel@tonic-gate  * a pointer to a mutex, a pointer to a timespec for a timed wait and
1509*0Sstevel@tonic-gate  * a flag telling the kernel whether or not to honor the kernel/user
1510*0Sstevel@tonic-gate  * schedctl parking protocol (see schedctl_is_park() in schedctl.c).
1511*0Sstevel@tonic-gate  * The kernel puts the lwp to sleep on a unique pair of caddr_t's called an
1512*0Sstevel@tonic-gate  * lwpchan, returned by get_lwpchan().  If the timespec pointer is non-NULL,
1513*0Sstevel@tonic-gate  * it is used an an in/out parameter.  On entry, it contains the relative
1514*0Sstevel@tonic-gate  * time until timeout.  On exit, we copyout the residual time left to it.
1515*0Sstevel@tonic-gate  */
1516*0Sstevel@tonic-gate int
1517*0Sstevel@tonic-gate lwp_cond_wait(lwp_cond_t *cv, lwp_mutex_t *mp, timespec_t *tsp, int check_park)
1518*0Sstevel@tonic-gate {
1519*0Sstevel@tonic-gate 	kthread_t *t = curthread;
1520*0Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
1521*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
1522*0Sstevel@tonic-gate 	lwp_timer_t lwpt;
1523*0Sstevel@tonic-gate 	lwpchan_t cv_lwpchan;
1524*0Sstevel@tonic-gate 	lwpchan_t m_lwpchan;
1525*0Sstevel@tonic-gate 	caddr_t timedwait;
1526*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
1527*0Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
1528*0Sstevel@tonic-gate 	uchar_t waiters;
1529*0Sstevel@tonic-gate 	volatile int error;
1530*0Sstevel@tonic-gate 	clock_t tim = -1;
1531*0Sstevel@tonic-gate 	volatile int locked = 0;
1532*0Sstevel@tonic-gate 	volatile int m_locked = 0;
1533*0Sstevel@tonic-gate 	volatile int cvwatched = 0;
1534*0Sstevel@tonic-gate 	volatile int mpwatched = 0;
1535*0Sstevel@tonic-gate 	label_t ljb;
1536*0Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
1537*0Sstevel@tonic-gate 	int imm_timeout = 0;
1538*0Sstevel@tonic-gate 	int imm_unpark = 0;
1539*0Sstevel@tonic-gate 
1540*0Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit ||
1541*0Sstevel@tonic-gate 	    (caddr_t)mp >= p->p_as->a_userlimit)
1542*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1543*0Sstevel@tonic-gate 
1544*0Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
1545*0Sstevel@tonic-gate 	if ((error = lwp_timer_copyin(&lwpt, tsp)) != 0)
1546*0Sstevel@tonic-gate 		return (set_errno(error));
1547*0Sstevel@tonic-gate 	if (lwpt.lwpt_imm_timeout) {
1548*0Sstevel@tonic-gate 		imm_timeout = 1;
1549*0Sstevel@tonic-gate 		timedwait = NULL;
1550*0Sstevel@tonic-gate 	}
1551*0Sstevel@tonic-gate 
1552*0Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
1553*0Sstevel@tonic-gate 
1554*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1555*0Sstevel@tonic-gate 		if (no_lwpchan) {
1556*0Sstevel@tonic-gate 			error = EFAULT;
1557*0Sstevel@tonic-gate 			goto out;
1558*0Sstevel@tonic-gate 		}
1559*0Sstevel@tonic-gate 		if (m_locked) {
1560*0Sstevel@tonic-gate 			m_locked = 0;
1561*0Sstevel@tonic-gate 			lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1562*0Sstevel@tonic-gate 		}
1563*0Sstevel@tonic-gate 		if (locked) {
1564*0Sstevel@tonic-gate 			locked = 0;
1565*0Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1566*0Sstevel@tonic-gate 		}
1567*0Sstevel@tonic-gate 		/*
1568*0Sstevel@tonic-gate 		 * set up another on_fault() for a possible fault
1569*0Sstevel@tonic-gate 		 * on the user lock accessed at "efault"
1570*0Sstevel@tonic-gate 		 */
1571*0Sstevel@tonic-gate 		if (on_fault(&ljb)) {
1572*0Sstevel@tonic-gate 			if (m_locked) {
1573*0Sstevel@tonic-gate 				m_locked = 0;
1574*0Sstevel@tonic-gate 				lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1575*0Sstevel@tonic-gate 			}
1576*0Sstevel@tonic-gate 			goto out;
1577*0Sstevel@tonic-gate 		}
1578*0Sstevel@tonic-gate 		error = EFAULT;
1579*0Sstevel@tonic-gate 		goto efault;
1580*0Sstevel@tonic-gate 	}
1581*0Sstevel@tonic-gate 
1582*0Sstevel@tonic-gate 	/*
1583*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_cond_t and lwp_mutex_t
1584*0Sstevel@tonic-gate 	 * objects are defined to be MAP_PRIVATE, and are USYNC_PROCESS
1585*0Sstevel@tonic-gate 	 */
1586*0Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
1587*0Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
1588*0Sstevel@tonic-gate 		suword8_noerr(&mp->mutex_type, mtype);
1589*0Sstevel@tonic-gate 		/* convert user level mutex, "mp", to a unique lwpchan */
1590*0Sstevel@tonic-gate 		/* check if mtype is ok to use below, instead of type from cv */
1591*0Sstevel@tonic-gate 		if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
1592*0Sstevel@tonic-gate 		    &m_lwpchan, LWPCHAN_MPPOOL)) {
1593*0Sstevel@tonic-gate 			error = EFAULT;
1594*0Sstevel@tonic-gate 			goto out;
1595*0Sstevel@tonic-gate 		}
1596*0Sstevel@tonic-gate 	}
1597*0Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1598*0Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
1599*0Sstevel@tonic-gate 	/* convert user level condition variable, "cv", to a unique lwpchan */
1600*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)cv, type,
1601*0Sstevel@tonic-gate 	    &cv_lwpchan, LWPCHAN_CVPOOL)) {
1602*0Sstevel@tonic-gate 		error = EFAULT;
1603*0Sstevel@tonic-gate 		goto out;
1604*0Sstevel@tonic-gate 	}
1605*0Sstevel@tonic-gate 	no_lwpchan = 0;
1606*0Sstevel@tonic-gate 	cvwatched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1607*0Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0)
1608*0Sstevel@tonic-gate 		mpwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp),
1609*0Sstevel@tonic-gate 		    S_WRITE);
1610*0Sstevel@tonic-gate 
1611*0Sstevel@tonic-gate 	/*
1612*0Sstevel@tonic-gate 	 * lwpchan_lock ensures that the calling lwp is put to sleep atomically
1613*0Sstevel@tonic-gate 	 * with respect to a possible wakeup which is a result of either
1614*0Sstevel@tonic-gate 	 * an lwp_cond_signal() or an lwp_cond_broadcast().
1615*0Sstevel@tonic-gate 	 *
1616*0Sstevel@tonic-gate 	 * What's misleading, is that the lwp is put to sleep after the
1617*0Sstevel@tonic-gate 	 * condition variable's mutex is released.  This is OK as long as
1618*0Sstevel@tonic-gate 	 * the release operation is also done while holding lwpchan_lock.
1619*0Sstevel@tonic-gate 	 * The lwp is then put to sleep when the possibility of pagefaulting
1620*0Sstevel@tonic-gate 	 * or sleeping is completely eliminated.
1621*0Sstevel@tonic-gate 	 */
1622*0Sstevel@tonic-gate 	lwpchan_lock(&cv_lwpchan, LWPCHAN_CVPOOL);
1623*0Sstevel@tonic-gate 	locked = 1;
1624*0Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
1625*0Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
1626*0Sstevel@tonic-gate 		m_locked = 1;
1627*0Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
1628*0Sstevel@tonic-gate 		/*
1629*0Sstevel@tonic-gate 		 * unlock the condition variable's mutex. (pagefaults are
1630*0Sstevel@tonic-gate 		 * possible here.)
1631*0Sstevel@tonic-gate 		 */
1632*0Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
1633*0Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
1634*0Sstevel@tonic-gate 		if (waiters != 0) {
1635*0Sstevel@tonic-gate 			/*
1636*0Sstevel@tonic-gate 			 * Given the locking of lwpchan_lock around the release
1637*0Sstevel@tonic-gate 			 * of the mutex and checking for waiters, the following
1638*0Sstevel@tonic-gate 			 * call to lwp_release() can fail ONLY if the lock
1639*0Sstevel@tonic-gate 			 * acquirer is interrupted after setting the waiter bit,
1640*0Sstevel@tonic-gate 			 * calling lwp_block() and releasing lwpchan_lock.
1641*0Sstevel@tonic-gate 			 * In this case, it could get pulled off the lwp sleep
1642*0Sstevel@tonic-gate 			 * q (via setrun()) before the following call to
1643*0Sstevel@tonic-gate 			 * lwp_release() occurs. In this case, the lock
1644*0Sstevel@tonic-gate 			 * requestor will update the waiter bit correctly by
1645*0Sstevel@tonic-gate 			 * re-evaluating it.
1646*0Sstevel@tonic-gate 			 */
1647*0Sstevel@tonic-gate 			if (lwp_release(&m_lwpchan, &waiters, 0) > 0)
1648*0Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
1649*0Sstevel@tonic-gate 		}
1650*0Sstevel@tonic-gate 		m_locked = 0;
1651*0Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1652*0Sstevel@tonic-gate 	} else {
1653*0Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 1);
1654*0Sstevel@tonic-gate 		error = lwp_upimutex_unlock(mp, mtype);
1655*0Sstevel@tonic-gate 		if (error) {	/* if the upimutex unlock failed */
1656*0Sstevel@tonic-gate 			locked = 0;
1657*0Sstevel@tonic-gate 			lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1658*0Sstevel@tonic-gate 			goto out;
1659*0Sstevel@tonic-gate 		}
1660*0Sstevel@tonic-gate 	}
1661*0Sstevel@tonic-gate 	no_fault();
1662*0Sstevel@tonic-gate 
1663*0Sstevel@tonic-gate 	if (mpwatched) {
1664*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
1665*0Sstevel@tonic-gate 		mpwatched = 0;
1666*0Sstevel@tonic-gate 	}
1667*0Sstevel@tonic-gate 	if (cvwatched) {
1668*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1669*0Sstevel@tonic-gate 		cvwatched = 0;
1670*0Sstevel@tonic-gate 	}
1671*0Sstevel@tonic-gate 
1672*0Sstevel@tonic-gate 	/*
1673*0Sstevel@tonic-gate 	 * Put the lwp in an orderly state for debugging.
1674*0Sstevel@tonic-gate 	 */
1675*0Sstevel@tonic-gate 	prstop(PR_REQUESTED, 0);
1676*0Sstevel@tonic-gate 	if (check_park && (!schedctl_is_park() || t->t_unpark)) {
1677*0Sstevel@tonic-gate 		/*
1678*0Sstevel@tonic-gate 		 * We received a signal at user-level before calling here
1679*0Sstevel@tonic-gate 		 * or another thread wants us to return immediately
1680*0Sstevel@tonic-gate 		 * with EINTR.  See lwp_unpark().
1681*0Sstevel@tonic-gate 		 */
1682*0Sstevel@tonic-gate 		imm_unpark = 1;
1683*0Sstevel@tonic-gate 		t->t_unpark = 0;
1684*0Sstevel@tonic-gate 		timedwait = NULL;
1685*0Sstevel@tonic-gate 	} else if (timedwait) {
1686*0Sstevel@tonic-gate 		/*
1687*0Sstevel@tonic-gate 		 * If we successfully queue the timeout,
1688*0Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
1689*0Sstevel@tonic-gate 		 * we are on the sleep queue (below).
1690*0Sstevel@tonic-gate 		 */
1691*0Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
1692*0Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
1693*0Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
1694*0Sstevel@tonic-gate 			imm_timeout = 1;
1695*0Sstevel@tonic-gate 			timedwait = NULL;
1696*0Sstevel@tonic-gate 		}
1697*0Sstevel@tonic-gate 	}
1698*0Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
1699*0Sstevel@tonic-gate 	lwp_block(&cv_lwpchan);
1700*0Sstevel@tonic-gate 	/*
1701*0Sstevel@tonic-gate 	 * Nothing should happen to cause the lwp to go to sleep
1702*0Sstevel@tonic-gate 	 * until after it returns from swtch().
1703*0Sstevel@tonic-gate 	 */
1704*0Sstevel@tonic-gate 	if (timedwait)
1705*0Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
1706*0Sstevel@tonic-gate 	locked = 0;
1707*0Sstevel@tonic-gate 	lwpchan_unlock(&cv_lwpchan, LWPCHAN_CVPOOL);
1708*0Sstevel@tonic-gate 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
1709*0Sstevel@tonic-gate 	    (imm_timeout | imm_unpark))
1710*0Sstevel@tonic-gate 		setrun(t);
1711*0Sstevel@tonic-gate 	swtch();
1712*0Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
1713*0Sstevel@tonic-gate 	if (timedwait)
1714*0Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
1715*0Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
1716*0Sstevel@tonic-gate 	    MUSTRETURN(p, t) || imm_unpark)
1717*0Sstevel@tonic-gate 		error = EINTR;
1718*0Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
1719*0Sstevel@tonic-gate 		error = ETIME;
1720*0Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
1721*0Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
1722*0Sstevel@tonic-gate 	setallwatch();
1723*0Sstevel@tonic-gate 
1724*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
1725*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
1726*0Sstevel@tonic-gate 
1727*0Sstevel@tonic-gate 	if (tsp && check_park)		/* copyout the residual time left */
1728*0Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
1729*0Sstevel@tonic-gate 
1730*0Sstevel@tonic-gate 	/* the mutex is reacquired by the caller on return to user level */
1731*0Sstevel@tonic-gate 	if (error) {
1732*0Sstevel@tonic-gate 		/*
1733*0Sstevel@tonic-gate 		 * If we were concurrently lwp_cond_signal()d and we
1734*0Sstevel@tonic-gate 		 * received a UNIX signal or got a timeout, then perform
1735*0Sstevel@tonic-gate 		 * another lwp_cond_signal() to avoid consuming the wakeup.
1736*0Sstevel@tonic-gate 		 */
1737*0Sstevel@tonic-gate 		if (t->t_release)
1738*0Sstevel@tonic-gate 			(void) lwp_cond_signal(cv);
1739*0Sstevel@tonic-gate 		return (set_errno(error));
1740*0Sstevel@tonic-gate 	}
1741*0Sstevel@tonic-gate 	return (0);
1742*0Sstevel@tonic-gate 
1743*0Sstevel@tonic-gate efault:
1744*0Sstevel@tonic-gate 	/*
1745*0Sstevel@tonic-gate 	 * make sure that the user level lock is dropped before
1746*0Sstevel@tonic-gate 	 * returning to caller, since the caller always re-acquires it.
1747*0Sstevel@tonic-gate 	 */
1748*0Sstevel@tonic-gate 	if (UPIMUTEX(mtype) == 0) {
1749*0Sstevel@tonic-gate 		lwpchan_lock(&m_lwpchan, LWPCHAN_MPPOOL);
1750*0Sstevel@tonic-gate 		m_locked = 1;
1751*0Sstevel@tonic-gate 		ulock_clear(&mp->mutex_lockw);
1752*0Sstevel@tonic-gate 		fuword8_noerr(&mp->mutex_waiters, &waiters);
1753*0Sstevel@tonic-gate 		if (waiters != 0) {
1754*0Sstevel@tonic-gate 			/*
1755*0Sstevel@tonic-gate 			 * See comment above on lock clearing and lwp_release()
1756*0Sstevel@tonic-gate 			 * success/failure.
1757*0Sstevel@tonic-gate 			 */
1758*0Sstevel@tonic-gate 			if (lwp_release(&m_lwpchan, &waiters, 0) > 0)
1759*0Sstevel@tonic-gate 				suword8_noerr(&mp->mutex_waiters, waiters);
1760*0Sstevel@tonic-gate 		}
1761*0Sstevel@tonic-gate 		m_locked = 0;
1762*0Sstevel@tonic-gate 		lwpchan_unlock(&m_lwpchan, LWPCHAN_MPPOOL);
1763*0Sstevel@tonic-gate 	} else {
1764*0Sstevel@tonic-gate 		(void) lwp_upimutex_unlock(mp, mtype);
1765*0Sstevel@tonic-gate 	}
1766*0Sstevel@tonic-gate out:
1767*0Sstevel@tonic-gate 	no_fault();
1768*0Sstevel@tonic-gate 	if (mpwatched)
1769*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
1770*0Sstevel@tonic-gate 	if (cvwatched)
1771*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1772*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
1773*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
1774*0Sstevel@tonic-gate 	return (set_errno(error));
1775*0Sstevel@tonic-gate }
1776*0Sstevel@tonic-gate 
1777*0Sstevel@tonic-gate /*
1778*0Sstevel@tonic-gate  * wakeup one lwp that's blocked on this condition variable.
1779*0Sstevel@tonic-gate  */
1780*0Sstevel@tonic-gate int
1781*0Sstevel@tonic-gate lwp_cond_signal(lwp_cond_t *cv)
1782*0Sstevel@tonic-gate {
1783*0Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
1784*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1785*0Sstevel@tonic-gate 	uchar_t waiters;
1786*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
1787*0Sstevel@tonic-gate 	volatile int locked = 0;
1788*0Sstevel@tonic-gate 	volatile int watched = 0;
1789*0Sstevel@tonic-gate 	label_t ljb;
1790*0Sstevel@tonic-gate 	int error = 0;
1791*0Sstevel@tonic-gate 
1792*0Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
1793*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1794*0Sstevel@tonic-gate 
1795*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1796*0Sstevel@tonic-gate 
1797*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1798*0Sstevel@tonic-gate 		if (locked)
1799*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1800*0Sstevel@tonic-gate 		error = EFAULT;
1801*0Sstevel@tonic-gate 		goto out;
1802*0Sstevel@tonic-gate 	}
1803*0Sstevel@tonic-gate 	/*
1804*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_cond_t object is
1805*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and is USYNC_PROCESS.
1806*0Sstevel@tonic-gate 	 */
1807*0Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1808*0Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
1809*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
1810*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
1811*0Sstevel@tonic-gate 		error = EFAULT;
1812*0Sstevel@tonic-gate 		goto out;
1813*0Sstevel@tonic-gate 	}
1814*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1815*0Sstevel@tonic-gate 	locked = 1;
1816*0Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
1817*0Sstevel@tonic-gate 	if (waiters != 0) {
1818*0Sstevel@tonic-gate 		/*
1819*0Sstevel@tonic-gate 		 * The following call to lwp_release() might fail but it is
1820*0Sstevel@tonic-gate 		 * OK to write into the waiters bit below, since the memory
1821*0Sstevel@tonic-gate 		 * could not have been re-used or unmapped (for correctly
1822*0Sstevel@tonic-gate 		 * written user programs) as in the case of lwp_mutex_wakeup().
1823*0Sstevel@tonic-gate 		 * For an incorrect program, we should not care about data
1824*0Sstevel@tonic-gate 		 * corruption since this is just one instance of other places
1825*0Sstevel@tonic-gate 		 * where corruption can occur for such a program. Of course
1826*0Sstevel@tonic-gate 		 * if the memory is unmapped, normal fault recovery occurs.
1827*0Sstevel@tonic-gate 		 */
1828*0Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
1829*0Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, waiters);
1830*0Sstevel@tonic-gate 	}
1831*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1832*0Sstevel@tonic-gate out:
1833*0Sstevel@tonic-gate 	no_fault();
1834*0Sstevel@tonic-gate 	if (watched)
1835*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1836*0Sstevel@tonic-gate 	if (error)
1837*0Sstevel@tonic-gate 		return (set_errno(error));
1838*0Sstevel@tonic-gate 	return (0);
1839*0Sstevel@tonic-gate }
1840*0Sstevel@tonic-gate 
1841*0Sstevel@tonic-gate /*
1842*0Sstevel@tonic-gate  * wakeup every lwp that's blocked on this condition variable.
1843*0Sstevel@tonic-gate  */
1844*0Sstevel@tonic-gate int
1845*0Sstevel@tonic-gate lwp_cond_broadcast(lwp_cond_t *cv)
1846*0Sstevel@tonic-gate {
1847*0Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
1848*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1849*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
1850*0Sstevel@tonic-gate 	volatile int locked = 0;
1851*0Sstevel@tonic-gate 	volatile int watched = 0;
1852*0Sstevel@tonic-gate 	label_t ljb;
1853*0Sstevel@tonic-gate 	uchar_t waiters;
1854*0Sstevel@tonic-gate 	int error = 0;
1855*0Sstevel@tonic-gate 
1856*0Sstevel@tonic-gate 	if ((caddr_t)cv >= p->p_as->a_userlimit)
1857*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1858*0Sstevel@tonic-gate 
1859*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1860*0Sstevel@tonic-gate 
1861*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1862*0Sstevel@tonic-gate 		if (locked)
1863*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1864*0Sstevel@tonic-gate 		error = EFAULT;
1865*0Sstevel@tonic-gate 		goto out;
1866*0Sstevel@tonic-gate 	}
1867*0Sstevel@tonic-gate 	/*
1868*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_cond_t object is
1869*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and is USYNC_PROCESS.
1870*0Sstevel@tonic-gate 	 */
1871*0Sstevel@tonic-gate 	fuword16_noerr(&cv->cond_type, (uint16_t *)&type);
1872*0Sstevel@tonic-gate 	suword16_noerr(&cv->cond_type, type);
1873*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)cv, type,
1874*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
1875*0Sstevel@tonic-gate 		error = EFAULT;
1876*0Sstevel@tonic-gate 		goto out;
1877*0Sstevel@tonic-gate 	}
1878*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1879*0Sstevel@tonic-gate 	locked = 1;
1880*0Sstevel@tonic-gate 	fuword8_noerr(&cv->cond_waiters_kernel, &waiters);
1881*0Sstevel@tonic-gate 	if (waiters != 0) {
1882*0Sstevel@tonic-gate 		lwp_release_all(&lwpchan);
1883*0Sstevel@tonic-gate 		suword8_noerr(&cv->cond_waiters_kernel, 0);
1884*0Sstevel@tonic-gate 	}
1885*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1886*0Sstevel@tonic-gate out:
1887*0Sstevel@tonic-gate 	no_fault();
1888*0Sstevel@tonic-gate 	if (watched)
1889*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)cv, sizeof (*cv), S_WRITE);
1890*0Sstevel@tonic-gate 	if (error)
1891*0Sstevel@tonic-gate 		return (set_errno(error));
1892*0Sstevel@tonic-gate 	return (0);
1893*0Sstevel@tonic-gate }
1894*0Sstevel@tonic-gate 
1895*0Sstevel@tonic-gate int
1896*0Sstevel@tonic-gate lwp_sema_trywait(lwp_sema_t *sp)
1897*0Sstevel@tonic-gate {
1898*0Sstevel@tonic-gate 	kthread_t *t = curthread;
1899*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
1900*0Sstevel@tonic-gate 	label_t ljb;
1901*0Sstevel@tonic-gate 	volatile int locked = 0;
1902*0Sstevel@tonic-gate 	volatile int watched = 0;
1903*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
1904*0Sstevel@tonic-gate 	int count;
1905*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1906*0Sstevel@tonic-gate 	uchar_t waiters;
1907*0Sstevel@tonic-gate 	int error = 0;
1908*0Sstevel@tonic-gate 
1909*0Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
1910*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1911*0Sstevel@tonic-gate 
1912*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
1913*0Sstevel@tonic-gate 
1914*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1915*0Sstevel@tonic-gate 		if (locked)
1916*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1917*0Sstevel@tonic-gate 		error = EFAULT;
1918*0Sstevel@tonic-gate 		goto out;
1919*0Sstevel@tonic-gate 	}
1920*0Sstevel@tonic-gate 	/*
1921*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_sema_t object is
1922*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and is USYNC_PROCESS.
1923*0Sstevel@tonic-gate 	 */
1924*0Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
1925*0Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
1926*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
1927*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
1928*0Sstevel@tonic-gate 		error = EFAULT;
1929*0Sstevel@tonic-gate 		goto out;
1930*0Sstevel@tonic-gate 	}
1931*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
1932*0Sstevel@tonic-gate 	locked = 1;
1933*0Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
1934*0Sstevel@tonic-gate 	if (count == 0)
1935*0Sstevel@tonic-gate 		error = EBUSY;
1936*0Sstevel@tonic-gate 	else
1937*0Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
1938*0Sstevel@tonic-gate 	if (count != 0) {
1939*0Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
1940*0Sstevel@tonic-gate 		if (waiters != 0) {
1941*0Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
1942*0Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
1943*0Sstevel@tonic-gate 		}
1944*0Sstevel@tonic-gate 	}
1945*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1946*0Sstevel@tonic-gate out:
1947*0Sstevel@tonic-gate 	no_fault();
1948*0Sstevel@tonic-gate 	if (watched)
1949*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
1950*0Sstevel@tonic-gate 	if (error)
1951*0Sstevel@tonic-gate 		return (set_errno(error));
1952*0Sstevel@tonic-gate 	return (0);
1953*0Sstevel@tonic-gate }
1954*0Sstevel@tonic-gate 
1955*0Sstevel@tonic-gate /*
1956*0Sstevel@tonic-gate  * See lwp_cond_wait(), above, for an explanation of the 'check_park' argument.
1957*0Sstevel@tonic-gate  */
1958*0Sstevel@tonic-gate int
1959*0Sstevel@tonic-gate lwp_sema_timedwait(lwp_sema_t *sp, timespec_t *tsp, int check_park)
1960*0Sstevel@tonic-gate {
1961*0Sstevel@tonic-gate 	kthread_t *t = curthread;
1962*0Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
1963*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
1964*0Sstevel@tonic-gate 	lwp_timer_t lwpt;
1965*0Sstevel@tonic-gate 	caddr_t timedwait;
1966*0Sstevel@tonic-gate 	clock_t tim = -1;
1967*0Sstevel@tonic-gate 	label_t ljb;
1968*0Sstevel@tonic-gate 	volatile int locked = 0;
1969*0Sstevel@tonic-gate 	volatile int watched = 0;
1970*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
1971*0Sstevel@tonic-gate 	int count;
1972*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
1973*0Sstevel@tonic-gate 	uchar_t waiters;
1974*0Sstevel@tonic-gate 	int error = 0;
1975*0Sstevel@tonic-gate 	int time_error;
1976*0Sstevel@tonic-gate 	int imm_timeout = 0;
1977*0Sstevel@tonic-gate 	int imm_unpark = 0;
1978*0Sstevel@tonic-gate 
1979*0Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
1980*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
1981*0Sstevel@tonic-gate 
1982*0Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
1983*0Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
1984*0Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
1985*0Sstevel@tonic-gate 		imm_timeout = 1;
1986*0Sstevel@tonic-gate 		timedwait = NULL;
1987*0Sstevel@tonic-gate 	}
1988*0Sstevel@tonic-gate 
1989*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
1990*0Sstevel@tonic-gate 
1991*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
1992*0Sstevel@tonic-gate 		if (locked)
1993*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
1994*0Sstevel@tonic-gate 		error = EFAULT;
1995*0Sstevel@tonic-gate 		goto out;
1996*0Sstevel@tonic-gate 	}
1997*0Sstevel@tonic-gate 	/*
1998*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_sema_t object is
1999*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and is USYNC_PROCESS.
2000*0Sstevel@tonic-gate 	 */
2001*0Sstevel@tonic-gate 	fuword16_noerr((void *)&sp->sema_type, (uint16_t *)&type);
2002*0Sstevel@tonic-gate 	suword16_noerr((void *)&sp->sema_type, type);
2003*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)sp, type,
2004*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
2005*0Sstevel@tonic-gate 		error = EFAULT;
2006*0Sstevel@tonic-gate 		goto out;
2007*0Sstevel@tonic-gate 	}
2008*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2009*0Sstevel@tonic-gate 	locked = 1;
2010*0Sstevel@tonic-gate 	fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
2011*0Sstevel@tonic-gate 	while (error == 0 && count == 0) {
2012*0Sstevel@tonic-gate 		if (time_error) {
2013*0Sstevel@tonic-gate 			/*
2014*0Sstevel@tonic-gate 			 * The SUSV3 Posix spec is very clear that we
2015*0Sstevel@tonic-gate 			 * should get no error from validating the
2016*0Sstevel@tonic-gate 			 * timer until we would actually sleep.
2017*0Sstevel@tonic-gate 			 */
2018*0Sstevel@tonic-gate 			error = time_error;
2019*0Sstevel@tonic-gate 			break;
2020*0Sstevel@tonic-gate 		}
2021*0Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, 1);
2022*0Sstevel@tonic-gate 		if (watched)
2023*0Sstevel@tonic-gate 			watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2024*0Sstevel@tonic-gate 		/*
2025*0Sstevel@tonic-gate 		 * Put the lwp in an orderly state for debugging.
2026*0Sstevel@tonic-gate 		 */
2027*0Sstevel@tonic-gate 		prstop(PR_REQUESTED, 0);
2028*0Sstevel@tonic-gate 		if (check_park && (!schedctl_is_park() || t->t_unpark)) {
2029*0Sstevel@tonic-gate 			/*
2030*0Sstevel@tonic-gate 			 * We received a signal at user-level before calling
2031*0Sstevel@tonic-gate 			 * here or another thread wants us to return
2032*0Sstevel@tonic-gate 			 * immediately with EINTR.  See lwp_unpark().
2033*0Sstevel@tonic-gate 			 */
2034*0Sstevel@tonic-gate 			imm_unpark = 1;
2035*0Sstevel@tonic-gate 			t->t_unpark = 0;
2036*0Sstevel@tonic-gate 			timedwait = NULL;
2037*0Sstevel@tonic-gate 		} else if (timedwait) {
2038*0Sstevel@tonic-gate 			/*
2039*0Sstevel@tonic-gate 			 * If we successfully queue the timeout,
2040*0Sstevel@tonic-gate 			 * then don't drop t_delay_lock until
2041*0Sstevel@tonic-gate 			 * we are on the sleep queue (below).
2042*0Sstevel@tonic-gate 			 */
2043*0Sstevel@tonic-gate 			mutex_enter(&t->t_delay_lock);
2044*0Sstevel@tonic-gate 			if (lwp_timer_enqueue(&lwpt) != 0) {
2045*0Sstevel@tonic-gate 				mutex_exit(&t->t_delay_lock);
2046*0Sstevel@tonic-gate 				imm_timeout = 1;
2047*0Sstevel@tonic-gate 				timedwait = NULL;
2048*0Sstevel@tonic-gate 			}
2049*0Sstevel@tonic-gate 		}
2050*0Sstevel@tonic-gate 		t->t_flag |= T_WAITCVSEM;
2051*0Sstevel@tonic-gate 		lwp_block(&lwpchan);
2052*0Sstevel@tonic-gate 		/*
2053*0Sstevel@tonic-gate 		 * Nothing should happen to cause the lwp to sleep
2054*0Sstevel@tonic-gate 		 * again until after it returns from swtch().
2055*0Sstevel@tonic-gate 		 */
2056*0Sstevel@tonic-gate 		if (timedwait)
2057*0Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
2058*0Sstevel@tonic-gate 		locked = 0;
2059*0Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2060*0Sstevel@tonic-gate 		if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t) ||
2061*0Sstevel@tonic-gate 		    (imm_timeout | imm_unpark))
2062*0Sstevel@tonic-gate 			setrun(t);
2063*0Sstevel@tonic-gate 		swtch();
2064*0Sstevel@tonic-gate 		t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
2065*0Sstevel@tonic-gate 		if (timedwait)
2066*0Sstevel@tonic-gate 			tim = lwp_timer_dequeue(&lwpt);
2067*0Sstevel@tonic-gate 		setallwatch();
2068*0Sstevel@tonic-gate 		if (ISSIG(t, FORREAL) || lwp->lwp_sysabort ||
2069*0Sstevel@tonic-gate 		    MUSTRETURN(p, t) || imm_unpark)
2070*0Sstevel@tonic-gate 			error = EINTR;
2071*0Sstevel@tonic-gate 		else if (imm_timeout || (timedwait && tim == -1))
2072*0Sstevel@tonic-gate 			error = ETIME;
2073*0Sstevel@tonic-gate 		lwp->lwp_asleep = 0;
2074*0Sstevel@tonic-gate 		lwp->lwp_sysabort = 0;
2075*0Sstevel@tonic-gate 		watched = watch_disable_addr((caddr_t)sp,
2076*0Sstevel@tonic-gate 		    sizeof (*sp), S_WRITE);
2077*0Sstevel@tonic-gate 		lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2078*0Sstevel@tonic-gate 		locked = 1;
2079*0Sstevel@tonic-gate 		fuword32_noerr((void *)&sp->sema_count, (uint32_t *)&count);
2080*0Sstevel@tonic-gate 	}
2081*0Sstevel@tonic-gate 	if (error == 0)
2082*0Sstevel@tonic-gate 		suword32_noerr((void *)&sp->sema_count, --count);
2083*0Sstevel@tonic-gate 	if (count != 0) {
2084*0Sstevel@tonic-gate 		(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
2085*0Sstevel@tonic-gate 		suword8_noerr(&sp->sema_waiters, waiters);
2086*0Sstevel@tonic-gate 	}
2087*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2088*0Sstevel@tonic-gate out:
2089*0Sstevel@tonic-gate 	no_fault();
2090*0Sstevel@tonic-gate 	if (watched)
2091*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2092*0Sstevel@tonic-gate 	if (tsp && check_park && !time_error)
2093*0Sstevel@tonic-gate 		error = lwp_timer_copyout(&lwpt, error);
2094*0Sstevel@tonic-gate 	if (error)
2095*0Sstevel@tonic-gate 		return (set_errno(error));
2096*0Sstevel@tonic-gate 	return (0);
2097*0Sstevel@tonic-gate }
2098*0Sstevel@tonic-gate 
2099*0Sstevel@tonic-gate /*
2100*0Sstevel@tonic-gate  * Obsolete lwp_sema_wait() interface, no longer called from libc.
2101*0Sstevel@tonic-gate  * libc now calls lwp_sema_timedwait().
2102*0Sstevel@tonic-gate  * This system call trap exists solely for the benefit of old
2103*0Sstevel@tonic-gate  * statically linked applications from Solaris 9 and before.
2104*0Sstevel@tonic-gate  * It should be removed when we no longer care about such applications.
2105*0Sstevel@tonic-gate  */
2106*0Sstevel@tonic-gate int
2107*0Sstevel@tonic-gate lwp_sema_wait(lwp_sema_t *sp)
2108*0Sstevel@tonic-gate {
2109*0Sstevel@tonic-gate 	return (lwp_sema_timedwait(sp, NULL, 0));
2110*0Sstevel@tonic-gate }
2111*0Sstevel@tonic-gate 
2112*0Sstevel@tonic-gate int
2113*0Sstevel@tonic-gate lwp_sema_post(lwp_sema_t *sp)
2114*0Sstevel@tonic-gate {
2115*0Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
2116*0Sstevel@tonic-gate 	label_t ljb;
2117*0Sstevel@tonic-gate 	volatile int locked = 0;
2118*0Sstevel@tonic-gate 	volatile int watched = 0;
2119*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
2120*0Sstevel@tonic-gate 	int count;
2121*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
2122*0Sstevel@tonic-gate 	uchar_t waiters;
2123*0Sstevel@tonic-gate 	int error = 0;
2124*0Sstevel@tonic-gate 
2125*0Sstevel@tonic-gate 	if ((caddr_t)sp >= p->p_as->a_userlimit)
2126*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
2127*0Sstevel@tonic-gate 
2128*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2129*0Sstevel@tonic-gate 
2130*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2131*0Sstevel@tonic-gate 		if (locked)
2132*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2133*0Sstevel@tonic-gate 		error = EFAULT;
2134*0Sstevel@tonic-gate 		goto out;
2135*0Sstevel@tonic-gate 	}
2136*0Sstevel@tonic-gate 	/*
2137*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_sema_t object is
2138*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and is USYNC_PROCESS.
2139*0Sstevel@tonic-gate 	 */
2140*0Sstevel@tonic-gate 	fuword16_noerr(&sp->sema_type, (uint16_t *)&type);
2141*0Sstevel@tonic-gate 	suword16_noerr(&sp->sema_type, type);
2142*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)sp, type,
2143*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
2144*0Sstevel@tonic-gate 		error = EFAULT;
2145*0Sstevel@tonic-gate 		goto out;
2146*0Sstevel@tonic-gate 	}
2147*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2148*0Sstevel@tonic-gate 	locked = 1;
2149*0Sstevel@tonic-gate 	fuword32_noerr(&sp->sema_count, (uint32_t *)&count);
2150*0Sstevel@tonic-gate 	if (count == _SEM_VALUE_MAX)
2151*0Sstevel@tonic-gate 		error = EOVERFLOW;
2152*0Sstevel@tonic-gate 	else
2153*0Sstevel@tonic-gate 		suword32_noerr(&sp->sema_count, ++count);
2154*0Sstevel@tonic-gate 	if (count == 1) {
2155*0Sstevel@tonic-gate 		fuword8_noerr(&sp->sema_waiters, &waiters);
2156*0Sstevel@tonic-gate 		if (waiters) {
2157*0Sstevel@tonic-gate 			(void) lwp_release(&lwpchan, &waiters, T_WAITCVSEM);
2158*0Sstevel@tonic-gate 			suword8_noerr(&sp->sema_waiters, waiters);
2159*0Sstevel@tonic-gate 		}
2160*0Sstevel@tonic-gate 	}
2161*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2162*0Sstevel@tonic-gate out:
2163*0Sstevel@tonic-gate 	no_fault();
2164*0Sstevel@tonic-gate 	if (watched)
2165*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)sp, sizeof (*sp), S_WRITE);
2166*0Sstevel@tonic-gate 	if (error)
2167*0Sstevel@tonic-gate 		return (set_errno(error));
2168*0Sstevel@tonic-gate 	return (0);
2169*0Sstevel@tonic-gate }
2170*0Sstevel@tonic-gate 
2171*0Sstevel@tonic-gate #define	TRW_WANT_WRITE		0x1
2172*0Sstevel@tonic-gate #define	TRW_LOCK_GRANTED	0x2
2173*0Sstevel@tonic-gate 
2174*0Sstevel@tonic-gate #define	READ_LOCK		0
2175*0Sstevel@tonic-gate #define	WRITE_LOCK		1
2176*0Sstevel@tonic-gate #define	TRY_FLAG		0x10
2177*0Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
2178*0Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
2179*0Sstevel@tonic-gate 
2180*0Sstevel@tonic-gate /*
2181*0Sstevel@tonic-gate  * Release one writer or one or more readers. Compute the rwstate word to
2182*0Sstevel@tonic-gate  * reflect the new state of the queue. For a safe hand-off we copy the new
2183*0Sstevel@tonic-gate  * rwstate value back to userland before we wake any of the new lock holders.
2184*0Sstevel@tonic-gate  *
2185*0Sstevel@tonic-gate  * Note that sleepq_insert() implements a prioritized FIFO (with writers
2186*0Sstevel@tonic-gate  * being given precedence over readers of the same priority).
2187*0Sstevel@tonic-gate  *
2188*0Sstevel@tonic-gate  * If the first thread is a reader we scan the queue releasing all readers
2189*0Sstevel@tonic-gate  * until we hit a writer or the end of the queue. If the first thread is a
2190*0Sstevel@tonic-gate  * writer we still need to check for another writer (i.e. URW_WRITE_WANTED).
2191*0Sstevel@tonic-gate  */
2192*0Sstevel@tonic-gate void
2193*0Sstevel@tonic-gate lwp_rwlock_release(lwpchan_t *lwpchan, lwp_rwlock_t *rw)
2194*0Sstevel@tonic-gate {
2195*0Sstevel@tonic-gate 	sleepq_head_t *sqh;
2196*0Sstevel@tonic-gate 	kthread_t *tp;
2197*0Sstevel@tonic-gate 	kthread_t **tpp;
2198*0Sstevel@tonic-gate 	kthread_t *tpnext;
2199*0Sstevel@tonic-gate 	kthread_t *wakelist = NULL;
2200*0Sstevel@tonic-gate 	uint32_t rwstate = 0;
2201*0Sstevel@tonic-gate 	int wcount = 0;
2202*0Sstevel@tonic-gate 	int rcount = 0;
2203*0Sstevel@tonic-gate 
2204*0Sstevel@tonic-gate 	sqh = lwpsqhash(lwpchan);
2205*0Sstevel@tonic-gate 	disp_lock_enter(&sqh->sq_lock);
2206*0Sstevel@tonic-gate 	tpp = &sqh->sq_queue.sq_first;
2207*0Sstevel@tonic-gate 	while ((tp = *tpp) != NULL) {
2208*0Sstevel@tonic-gate 		if (tp->t_lwpchan.lc_wchan0 == lwpchan->lc_wchan0 &&
2209*0Sstevel@tonic-gate 		    tp->t_lwpchan.lc_wchan == lwpchan->lc_wchan) {
2210*0Sstevel@tonic-gate 			if (tp->t_writer & TRW_WANT_WRITE) {
2211*0Sstevel@tonic-gate 				if ((wcount++ == 0) && (rcount == 0)) {
2212*0Sstevel@tonic-gate 					rwstate |= URW_WRITE_LOCKED;
2213*0Sstevel@tonic-gate 
2214*0Sstevel@tonic-gate 					/* Just one writer to wake. */
2215*0Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
2216*0Sstevel@tonic-gate 					wakelist = tp;
2217*0Sstevel@tonic-gate 
2218*0Sstevel@tonic-gate 					/* tpp already set for next thread. */
2219*0Sstevel@tonic-gate 					continue;
2220*0Sstevel@tonic-gate 				} else {
2221*0Sstevel@tonic-gate 					rwstate |=
2222*0Sstevel@tonic-gate 					    (URW_WRITE_WANTED|URW_HAS_WAITERS);
2223*0Sstevel@tonic-gate 
2224*0Sstevel@tonic-gate 					/* We need look no further. */
2225*0Sstevel@tonic-gate 					break;
2226*0Sstevel@tonic-gate 				}
2227*0Sstevel@tonic-gate 			} else {
2228*0Sstevel@tonic-gate 				rcount++;
2229*0Sstevel@tonic-gate 				if (wcount == 0) {
2230*0Sstevel@tonic-gate 					rwstate++;
2231*0Sstevel@tonic-gate 
2232*0Sstevel@tonic-gate 					/* Add reader to wake list. */
2233*0Sstevel@tonic-gate 					sleepq_unlink(tpp, tp);
2234*0Sstevel@tonic-gate 					tp->t_link = wakelist;
2235*0Sstevel@tonic-gate 					wakelist = tp;
2236*0Sstevel@tonic-gate 
2237*0Sstevel@tonic-gate 					/* tpp already set for next thread. */
2238*0Sstevel@tonic-gate 					continue;
2239*0Sstevel@tonic-gate 				} else
2240*0Sstevel@tonic-gate 					rwstate |= URW_HAS_WAITERS;
2241*0Sstevel@tonic-gate 			}
2242*0Sstevel@tonic-gate 		}
2243*0Sstevel@tonic-gate 		tpp = &tp->t_link;
2244*0Sstevel@tonic-gate 	}
2245*0Sstevel@tonic-gate 
2246*0Sstevel@tonic-gate 	/* Copy the new rwstate back to userland. */
2247*0Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
2248*0Sstevel@tonic-gate 
2249*0Sstevel@tonic-gate 	/* Wake the new lock holder(s) up. */
2250*0Sstevel@tonic-gate 	tp = wakelist;
2251*0Sstevel@tonic-gate 	while (tp != NULL) {
2252*0Sstevel@tonic-gate 		DTRACE_SCHED1(wakeup, kthread_t *, tp);
2253*0Sstevel@tonic-gate 		tp->t_wchan0 = NULL;
2254*0Sstevel@tonic-gate 		tp->t_wchan = NULL;
2255*0Sstevel@tonic-gate 		tp->t_sobj_ops = NULL;
2256*0Sstevel@tonic-gate 		tp->t_writer |= TRW_LOCK_GRANTED;
2257*0Sstevel@tonic-gate 		tpnext = tp->t_link;
2258*0Sstevel@tonic-gate 		tp->t_link = NULL;
2259*0Sstevel@tonic-gate 		CL_WAKEUP(tp);
2260*0Sstevel@tonic-gate 		thread_unlock_high(tp);
2261*0Sstevel@tonic-gate 		tp = tpnext;
2262*0Sstevel@tonic-gate 	}
2263*0Sstevel@tonic-gate 
2264*0Sstevel@tonic-gate 	disp_lock_exit(&sqh->sq_lock);
2265*0Sstevel@tonic-gate }
2266*0Sstevel@tonic-gate 
2267*0Sstevel@tonic-gate /*
2268*0Sstevel@tonic-gate  * We enter here holding the user-level mutex, which we must release before
2269*0Sstevel@tonic-gate  * returning or blocking. Based on lwp_cond_wait().
2270*0Sstevel@tonic-gate  */
2271*0Sstevel@tonic-gate static int
2272*0Sstevel@tonic-gate lwp_rwlock_lock(lwp_rwlock_t *rw, timespec_t *tsp, int rd_wr)
2273*0Sstevel@tonic-gate {
2274*0Sstevel@tonic-gate 	lwp_mutex_t *mp = NULL;
2275*0Sstevel@tonic-gate 	kthread_t *t = curthread;
2276*0Sstevel@tonic-gate 	kthread_t *tp;
2277*0Sstevel@tonic-gate 	klwp_t *lwp = ttolwp(t);
2278*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
2279*0Sstevel@tonic-gate 	lwp_timer_t lwpt;
2280*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
2281*0Sstevel@tonic-gate 	lwpchan_t mlwpchan;
2282*0Sstevel@tonic-gate 	caddr_t timedwait;
2283*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
2284*0Sstevel@tonic-gate 	volatile uint8_t mtype = 0;
2285*0Sstevel@tonic-gate 	uchar_t mwaiters;
2286*0Sstevel@tonic-gate 	volatile int error = 0;
2287*0Sstevel@tonic-gate 	int time_error;
2288*0Sstevel@tonic-gate 	clock_t tim = -1;
2289*0Sstevel@tonic-gate 	volatile int locked = 0;
2290*0Sstevel@tonic-gate 	volatile int mlocked = 0;
2291*0Sstevel@tonic-gate 	volatile int watched = 0;
2292*0Sstevel@tonic-gate 	volatile int mwatched = 0;
2293*0Sstevel@tonic-gate 	label_t ljb;
2294*0Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
2295*0Sstevel@tonic-gate 	int imm_timeout = 0;
2296*0Sstevel@tonic-gate 	int try_flag;
2297*0Sstevel@tonic-gate 	uint32_t rwstate;
2298*0Sstevel@tonic-gate 	int acquired = 0;
2299*0Sstevel@tonic-gate 
2300*0Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
2301*0Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
2302*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
2303*0Sstevel@tonic-gate 
2304*0Sstevel@tonic-gate 	/* We must only report this error if we are about to sleep (later). */
2305*0Sstevel@tonic-gate 	timedwait = (caddr_t)tsp;
2306*0Sstevel@tonic-gate 	if ((time_error = lwp_timer_copyin(&lwpt, tsp)) == 0 &&
2307*0Sstevel@tonic-gate 	    lwpt.lwpt_imm_timeout) {
2308*0Sstevel@tonic-gate 		imm_timeout = 1;
2309*0Sstevel@tonic-gate 		timedwait = NULL;
2310*0Sstevel@tonic-gate 	}
2311*0Sstevel@tonic-gate 
2312*0Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
2313*0Sstevel@tonic-gate 
2314*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2315*0Sstevel@tonic-gate 		if (no_lwpchan) {
2316*0Sstevel@tonic-gate 			error = EFAULT;
2317*0Sstevel@tonic-gate 			goto out_nodrop;
2318*0Sstevel@tonic-gate 		}
2319*0Sstevel@tonic-gate 		if (mlocked) {
2320*0Sstevel@tonic-gate 			mlocked = 0;
2321*0Sstevel@tonic-gate 			lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2322*0Sstevel@tonic-gate 		}
2323*0Sstevel@tonic-gate 		if (locked) {
2324*0Sstevel@tonic-gate 			locked = 0;
2325*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2326*0Sstevel@tonic-gate 		}
2327*0Sstevel@tonic-gate 		/*
2328*0Sstevel@tonic-gate 		 * Set up another on_fault() for a possible fault
2329*0Sstevel@tonic-gate 		 * on the user lock accessed at "out_drop".
2330*0Sstevel@tonic-gate 		 */
2331*0Sstevel@tonic-gate 		if (on_fault(&ljb)) {
2332*0Sstevel@tonic-gate 			if (mlocked) {
2333*0Sstevel@tonic-gate 				mlocked = 0;
2334*0Sstevel@tonic-gate 				lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2335*0Sstevel@tonic-gate 			}
2336*0Sstevel@tonic-gate 			error = EFAULT;
2337*0Sstevel@tonic-gate 			goto out_nodrop;
2338*0Sstevel@tonic-gate 		}
2339*0Sstevel@tonic-gate 		error = EFAULT;
2340*0Sstevel@tonic-gate 		goto out_nodrop;
2341*0Sstevel@tonic-gate 	}
2342*0Sstevel@tonic-gate 
2343*0Sstevel@tonic-gate 	/* Process rd_wr (including sanity check). */
2344*0Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
2345*0Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
2346*0Sstevel@tonic-gate 	if ((rd_wr != READ_LOCK) && (rd_wr != WRITE_LOCK)) {
2347*0Sstevel@tonic-gate 		error = EINVAL;
2348*0Sstevel@tonic-gate 		goto out_nodrop;
2349*0Sstevel@tonic-gate 	}
2350*0Sstevel@tonic-gate 
2351*0Sstevel@tonic-gate 	/* We can only continue for simple USYNC_PROCESS locks. */
2352*0Sstevel@tonic-gate 	mp = &rw->mutex;
2353*0Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_type, (uint8_t *)&mtype);
2354*0Sstevel@tonic-gate 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
2355*0Sstevel@tonic-gate 	if ((mtype != USYNC_PROCESS) || (type != USYNC_PROCESS)) {
2356*0Sstevel@tonic-gate 		error = EINVAL;
2357*0Sstevel@tonic-gate 		goto out_nodrop;
2358*0Sstevel@tonic-gate 	}
2359*0Sstevel@tonic-gate 
2360*0Sstevel@tonic-gate 	/* Force Copy-on-write fault incase objects are MAP_PRIVATE. */
2361*0Sstevel@tonic-gate 	suword8_noerr(&mp->mutex_type, mtype);
2362*0Sstevel@tonic-gate 	suword16_noerr(&rw->rwlock_type, type);
2363*0Sstevel@tonic-gate 
2364*0Sstevel@tonic-gate 	/* Convert user level mutex, "mp", to a unique lwpchan. */
2365*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)mp, mtype,
2366*0Sstevel@tonic-gate 	    &mlwpchan, LWPCHAN_MPPOOL)) {
2367*0Sstevel@tonic-gate 		error = EFAULT;
2368*0Sstevel@tonic-gate 		goto out_nodrop;
2369*0Sstevel@tonic-gate 	}
2370*0Sstevel@tonic-gate 
2371*0Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
2372*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
2373*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
2374*0Sstevel@tonic-gate 		error = EFAULT;
2375*0Sstevel@tonic-gate 		goto out_nodrop;
2376*0Sstevel@tonic-gate 	}
2377*0Sstevel@tonic-gate 
2378*0Sstevel@tonic-gate 	no_lwpchan = 0;
2379*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2380*0Sstevel@tonic-gate 	mwatched = watch_disable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2381*0Sstevel@tonic-gate 
2382*0Sstevel@tonic-gate 	/*
2383*0Sstevel@tonic-gate 	 * lwpchan_lock() ensures that the calling LWP is put to sleep
2384*0Sstevel@tonic-gate 	 * atomically with respect to a possible wakeup which is a result
2385*0Sstevel@tonic-gate 	 * of lwp_rwlock_unlock().
2386*0Sstevel@tonic-gate 	 *
2387*0Sstevel@tonic-gate 	 * What's misleading is that the LWP is put to sleep after the
2388*0Sstevel@tonic-gate 	 * rwlock's mutex is released. This is OK as long as the release
2389*0Sstevel@tonic-gate 	 * operation is also done while holding mlwpchan. The LWP is then
2390*0Sstevel@tonic-gate 	 * put to sleep when the possibility of pagefaulting or sleeping
2391*0Sstevel@tonic-gate 	 * has been completely eliminated.
2392*0Sstevel@tonic-gate 	 */
2393*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2394*0Sstevel@tonic-gate 	locked = 1;
2395*0Sstevel@tonic-gate 	lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
2396*0Sstevel@tonic-gate 	mlocked = 1;
2397*0Sstevel@tonic-gate 
2398*0Sstevel@tonic-gate 	/*
2399*0Sstevel@tonic-gate 	 * Fetch the current rwlock state.
2400*0Sstevel@tonic-gate 	 *
2401*0Sstevel@tonic-gate 	 * The possibility of spurious wake-ups or killed waiters means that
2402*0Sstevel@tonic-gate 	 * rwstate's URW_HAS_WAITERS and URW_WRITE_WANTED bits may indicate
2403*0Sstevel@tonic-gate 	 * false positives. We only fix these if they are important to us.
2404*0Sstevel@tonic-gate 	 *
2405*0Sstevel@tonic-gate 	 * Although various error states can be observed here (e.g. the lock
2406*0Sstevel@tonic-gate 	 * is not held, but there are waiters) we assume these are applicaton
2407*0Sstevel@tonic-gate 	 * errors and so we take no corrective action.
2408*0Sstevel@tonic-gate 	 */
2409*0Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
2410*0Sstevel@tonic-gate 
2411*0Sstevel@tonic-gate 	/*
2412*0Sstevel@tonic-gate 	 * If the lock is uncontended we can acquire it here. These tests
2413*0Sstevel@tonic-gate 	 * should have already been done at user-level, we just need to be
2414*0Sstevel@tonic-gate 	 * sure.
2415*0Sstevel@tonic-gate 	 */
2416*0Sstevel@tonic-gate 	if (rd_wr == READ_LOCK) {
2417*0Sstevel@tonic-gate 		if ((rwstate & ~URW_READERS_MASK) == 0) {
2418*0Sstevel@tonic-gate 			rwstate++;
2419*0Sstevel@tonic-gate 			acquired = 1;
2420*0Sstevel@tonic-gate 		}
2421*0Sstevel@tonic-gate 	} else if (rwstate == 0) {
2422*0Sstevel@tonic-gate 		rwstate = URW_WRITE_LOCKED;
2423*0Sstevel@tonic-gate 		acquired = 1;
2424*0Sstevel@tonic-gate 	}
2425*0Sstevel@tonic-gate 
2426*0Sstevel@tonic-gate 	/*
2427*0Sstevel@tonic-gate 	 * We can only try harder if the lock isn't held by a writer.
2428*0Sstevel@tonic-gate 	 */
2429*0Sstevel@tonic-gate 	if (!acquired && !(rwstate & URW_WRITE_LOCKED)) {
2430*0Sstevel@tonic-gate 		tp = lwp_queue_waiter(&lwpchan);
2431*0Sstevel@tonic-gate 		if (tp == NULL) {
2432*0Sstevel@tonic-gate 			/*
2433*0Sstevel@tonic-gate 			 * Hmmm, rwstate indicates waiters but there are
2434*0Sstevel@tonic-gate 			 * none queued. This could just be the result of a
2435*0Sstevel@tonic-gate 			 * spurious wakeup, so let's fix it.
2436*0Sstevel@tonic-gate 			 */
2437*0Sstevel@tonic-gate 			rwstate &= URW_READERS_MASK;
2438*0Sstevel@tonic-gate 
2439*0Sstevel@tonic-gate 			/*
2440*0Sstevel@tonic-gate 			 * We now have another chance to acquire the lock
2441*0Sstevel@tonic-gate 			 * uncontended, but this is the last chance for a
2442*0Sstevel@tonic-gate 			 * writer to acquire the lock without blocking.
2443*0Sstevel@tonic-gate 			 */
2444*0Sstevel@tonic-gate 			if (rd_wr == READ_LOCK) {
2445*0Sstevel@tonic-gate 				rwstate++;
2446*0Sstevel@tonic-gate 				acquired = 1;
2447*0Sstevel@tonic-gate 			} else if (rwstate == 0) {
2448*0Sstevel@tonic-gate 				rwstate = URW_WRITE_LOCKED;
2449*0Sstevel@tonic-gate 				acquired = 1;
2450*0Sstevel@tonic-gate 			}
2451*0Sstevel@tonic-gate 		} else if (rd_wr == READ_LOCK) {
2452*0Sstevel@tonic-gate 			/*
2453*0Sstevel@tonic-gate 			 * This is the last chance for a reader to acquire
2454*0Sstevel@tonic-gate 			 * the lock now, but it can only do so if there is
2455*0Sstevel@tonic-gate 			 * no writer of equal or greater priority at the
2456*0Sstevel@tonic-gate 			 * head of the queue .
2457*0Sstevel@tonic-gate 			 *
2458*0Sstevel@tonic-gate 			 * It is also just possible that there is a reader
2459*0Sstevel@tonic-gate 			 * at the head of the queue. This may be the result
2460*0Sstevel@tonic-gate 			 * of a spurious wakeup or an application failure.
2461*0Sstevel@tonic-gate 			 * In this case we only acquire the lock if we have
2462*0Sstevel@tonic-gate 			 * equal or greater priority. It is not our job to
2463*0Sstevel@tonic-gate 			 * release spurious waiters.
2464*0Sstevel@tonic-gate 			 */
2465*0Sstevel@tonic-gate 			pri_t our_pri = DISP_PRIO(t);
2466*0Sstevel@tonic-gate 			pri_t his_pri = DISP_PRIO(tp);
2467*0Sstevel@tonic-gate 
2468*0Sstevel@tonic-gate 			if ((our_pri > his_pri) || ((our_pri == his_pri) &&
2469*0Sstevel@tonic-gate 			    !(tp->t_writer & TRW_WANT_WRITE))) {
2470*0Sstevel@tonic-gate 				rwstate++;
2471*0Sstevel@tonic-gate 				acquired = 1;
2472*0Sstevel@tonic-gate 			}
2473*0Sstevel@tonic-gate 		}
2474*0Sstevel@tonic-gate 	}
2475*0Sstevel@tonic-gate 
2476*0Sstevel@tonic-gate 	if (acquired || try_flag || time_error) {
2477*0Sstevel@tonic-gate 		/*
2478*0Sstevel@tonic-gate 		 * We're not going to block this time!
2479*0Sstevel@tonic-gate 		 */
2480*0Sstevel@tonic-gate 		suword32_noerr(&rw->rwlock_readers, rwstate);
2481*0Sstevel@tonic-gate 		lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2482*0Sstevel@tonic-gate 		locked = 0;
2483*0Sstevel@tonic-gate 
2484*0Sstevel@tonic-gate 		if (acquired) {
2485*0Sstevel@tonic-gate 			/*
2486*0Sstevel@tonic-gate 			 * Got the lock!
2487*0Sstevel@tonic-gate 			 */
2488*0Sstevel@tonic-gate 			error = 0;
2489*0Sstevel@tonic-gate 
2490*0Sstevel@tonic-gate 		} else if (try_flag) {
2491*0Sstevel@tonic-gate 			/*
2492*0Sstevel@tonic-gate 			 * We didn't get the lock and we're about to block.
2493*0Sstevel@tonic-gate 			 * If we're doing a trylock, return EBUSY instead.
2494*0Sstevel@tonic-gate 			 */
2495*0Sstevel@tonic-gate 			error = EBUSY;
2496*0Sstevel@tonic-gate 
2497*0Sstevel@tonic-gate 		} else if (time_error) {
2498*0Sstevel@tonic-gate 			/*
2499*0Sstevel@tonic-gate 			 * The SUSV3 POSIX spec is very clear that we should
2500*0Sstevel@tonic-gate 			 * get no error from validating the timer (above)
2501*0Sstevel@tonic-gate 			 * until we would actually sleep.
2502*0Sstevel@tonic-gate 			 */
2503*0Sstevel@tonic-gate 			error = time_error;
2504*0Sstevel@tonic-gate 		}
2505*0Sstevel@tonic-gate 
2506*0Sstevel@tonic-gate 		goto out_drop;
2507*0Sstevel@tonic-gate 	}
2508*0Sstevel@tonic-gate 
2509*0Sstevel@tonic-gate 	/*
2510*0Sstevel@tonic-gate 	 * We're about to block, so indicate what kind of waiter we are.
2511*0Sstevel@tonic-gate 	 */
2512*0Sstevel@tonic-gate 	t->t_writer = 0;
2513*0Sstevel@tonic-gate 	rwstate |= URW_HAS_WAITERS;
2514*0Sstevel@tonic-gate 	if (rd_wr == WRITE_LOCK) {
2515*0Sstevel@tonic-gate 		t->t_writer = TRW_WANT_WRITE;
2516*0Sstevel@tonic-gate 		rwstate |= URW_WRITE_WANTED;
2517*0Sstevel@tonic-gate 	}
2518*0Sstevel@tonic-gate 	suword32_noerr(&rw->rwlock_readers, rwstate);
2519*0Sstevel@tonic-gate 
2520*0Sstevel@tonic-gate 	/*
2521*0Sstevel@tonic-gate 	 * Unlock the rwlock's mutex (pagefaults are possible here).
2522*0Sstevel@tonic-gate 	 */
2523*0Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
2524*0Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
2525*0Sstevel@tonic-gate 	if (mwaiters != 0) {
2526*0Sstevel@tonic-gate 		/*
2527*0Sstevel@tonic-gate 		 * Given the locking of mlwpchan around the release of
2528*0Sstevel@tonic-gate 		 * the mutex and checking for waiters, the following
2529*0Sstevel@tonic-gate 		 * call to lwp_release() can fail ONLY if the lock
2530*0Sstevel@tonic-gate 		 * acquirer is interrupted after setting the waiter bit,
2531*0Sstevel@tonic-gate 		 * calling lwp_block() and releasing mlwpchan.
2532*0Sstevel@tonic-gate 		 * In this case, it could get pulled off the LWP sleep
2533*0Sstevel@tonic-gate 		 * queue (via setrun()) before the following call to
2534*0Sstevel@tonic-gate 		 * lwp_release() occurs, and the lock requestor will
2535*0Sstevel@tonic-gate 		 * update the waiter bit correctly by re-evaluating it.
2536*0Sstevel@tonic-gate 		 */
2537*0Sstevel@tonic-gate 		if (lwp_release(&mlwpchan, &mwaiters, 0) > 0)
2538*0Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
2539*0Sstevel@tonic-gate 	}
2540*0Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2541*0Sstevel@tonic-gate 	mlocked = 0;
2542*0Sstevel@tonic-gate 	no_fault();
2543*0Sstevel@tonic-gate 
2544*0Sstevel@tonic-gate 	if (mwatched) {
2545*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2546*0Sstevel@tonic-gate 		mwatched = 0;
2547*0Sstevel@tonic-gate 	}
2548*0Sstevel@tonic-gate 	if (watched) {
2549*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2550*0Sstevel@tonic-gate 		watched = 0;
2551*0Sstevel@tonic-gate 	}
2552*0Sstevel@tonic-gate 
2553*0Sstevel@tonic-gate 	/*
2554*0Sstevel@tonic-gate 	 * Put the LWP in an orderly state for debugging.
2555*0Sstevel@tonic-gate 	 */
2556*0Sstevel@tonic-gate 	prstop(PR_REQUESTED, 0);
2557*0Sstevel@tonic-gate 	if (timedwait) {
2558*0Sstevel@tonic-gate 		/*
2559*0Sstevel@tonic-gate 		 * If we successfully queue the timeout,
2560*0Sstevel@tonic-gate 		 * then don't drop t_delay_lock until
2561*0Sstevel@tonic-gate 		 * we are on the sleep queue (below).
2562*0Sstevel@tonic-gate 		 */
2563*0Sstevel@tonic-gate 		mutex_enter(&t->t_delay_lock);
2564*0Sstevel@tonic-gate 		if (lwp_timer_enqueue(&lwpt) != 0) {
2565*0Sstevel@tonic-gate 			mutex_exit(&t->t_delay_lock);
2566*0Sstevel@tonic-gate 			imm_timeout = 1;
2567*0Sstevel@tonic-gate 			timedwait = NULL;
2568*0Sstevel@tonic-gate 		}
2569*0Sstevel@tonic-gate 	}
2570*0Sstevel@tonic-gate 	t->t_flag |= T_WAITCVSEM;
2571*0Sstevel@tonic-gate 	lwp_block(&lwpchan);
2572*0Sstevel@tonic-gate 
2573*0Sstevel@tonic-gate 	/*
2574*0Sstevel@tonic-gate 	 * Nothing should happen to cause the LWp to go to sleep until after
2575*0Sstevel@tonic-gate 	 * it returns from swtch().
2576*0Sstevel@tonic-gate 	 */
2577*0Sstevel@tonic-gate 	if (timedwait)
2578*0Sstevel@tonic-gate 		mutex_exit(&t->t_delay_lock);
2579*0Sstevel@tonic-gate 	locked = 0;
2580*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2581*0Sstevel@tonic-gate 	if (ISSIG(t, JUSTLOOKING) || MUSTRETURN(p, t))
2582*0Sstevel@tonic-gate 		setrun(t);
2583*0Sstevel@tonic-gate 	swtch();
2584*0Sstevel@tonic-gate 
2585*0Sstevel@tonic-gate 	/*
2586*0Sstevel@tonic-gate 	 * We're back, but we need to work out why. Were we interrupted? Did
2587*0Sstevel@tonic-gate 	 * we timeout? Were we granted the lock?
2588*0Sstevel@tonic-gate 	 */
2589*0Sstevel@tonic-gate 	error = EAGAIN;
2590*0Sstevel@tonic-gate 	acquired = (t->t_writer & TRW_LOCK_GRANTED);
2591*0Sstevel@tonic-gate 	t->t_writer = 0;
2592*0Sstevel@tonic-gate 	t->t_flag &= ~(T_WAITCVSEM | T_WAKEABLE);
2593*0Sstevel@tonic-gate 	if (timedwait)
2594*0Sstevel@tonic-gate 		tim = lwp_timer_dequeue(&lwpt);
2595*0Sstevel@tonic-gate 	if (ISSIG(t, FORREAL) || lwp->lwp_sysabort || MUSTRETURN(p, t))
2596*0Sstevel@tonic-gate 		error = EINTR;
2597*0Sstevel@tonic-gate 	else if (imm_timeout || (timedwait && tim == -1))
2598*0Sstevel@tonic-gate 		error = ETIME;
2599*0Sstevel@tonic-gate 	lwp->lwp_asleep = 0;
2600*0Sstevel@tonic-gate 	lwp->lwp_sysabort = 0;
2601*0Sstevel@tonic-gate 	setallwatch();
2602*0Sstevel@tonic-gate 
2603*0Sstevel@tonic-gate 	/*
2604*0Sstevel@tonic-gate 	 * If we were granted the lock we don't care about EINTR or ETIME.
2605*0Sstevel@tonic-gate 	 */
2606*0Sstevel@tonic-gate 	if (acquired)
2607*0Sstevel@tonic-gate 		error = 0;
2608*0Sstevel@tonic-gate 
2609*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
2610*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
2611*0Sstevel@tonic-gate 
2612*0Sstevel@tonic-gate 	if (error)
2613*0Sstevel@tonic-gate 		return (set_errno(error));
2614*0Sstevel@tonic-gate 	return (0);
2615*0Sstevel@tonic-gate 
2616*0Sstevel@tonic-gate out_drop:
2617*0Sstevel@tonic-gate 	/*
2618*0Sstevel@tonic-gate 	 * Make sure that the user level lock is dropped before returning
2619*0Sstevel@tonic-gate 	 * to the caller.
2620*0Sstevel@tonic-gate 	 */
2621*0Sstevel@tonic-gate 	if (!mlocked) {
2622*0Sstevel@tonic-gate 		lwpchan_lock(&mlwpchan, LWPCHAN_MPPOOL);
2623*0Sstevel@tonic-gate 		mlocked = 1;
2624*0Sstevel@tonic-gate 	}
2625*0Sstevel@tonic-gate 	suword32_noerr(&mp->mutex_ownerpid, 0);
2626*0Sstevel@tonic-gate 	ulock_clear(&mp->mutex_lockw);
2627*0Sstevel@tonic-gate 	fuword8_noerr(&mp->mutex_waiters, &mwaiters);
2628*0Sstevel@tonic-gate 	if (mwaiters != 0) {
2629*0Sstevel@tonic-gate 		/*
2630*0Sstevel@tonic-gate 		 * See comment above on lock clearing and lwp_release()
2631*0Sstevel@tonic-gate 		 * success/failure.
2632*0Sstevel@tonic-gate 		 */
2633*0Sstevel@tonic-gate 		if (lwp_release(&mlwpchan, &mwaiters, 0) > 0)
2634*0Sstevel@tonic-gate 			suword8_noerr(&mp->mutex_waiters, mwaiters);
2635*0Sstevel@tonic-gate 	}
2636*0Sstevel@tonic-gate 	lwpchan_unlock(&mlwpchan, LWPCHAN_MPPOOL);
2637*0Sstevel@tonic-gate 	mlocked = 0;
2638*0Sstevel@tonic-gate 
2639*0Sstevel@tonic-gate out_nodrop:
2640*0Sstevel@tonic-gate 	no_fault();
2641*0Sstevel@tonic-gate 	if (mwatched)
2642*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)mp, sizeof (*mp), S_WRITE);
2643*0Sstevel@tonic-gate 	if (watched)
2644*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2645*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
2646*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
2647*0Sstevel@tonic-gate 	if (error)
2648*0Sstevel@tonic-gate 		return (set_errno(error));
2649*0Sstevel@tonic-gate 	return (0);
2650*0Sstevel@tonic-gate }
2651*0Sstevel@tonic-gate 
2652*0Sstevel@tonic-gate /*
2653*0Sstevel@tonic-gate  * We enter here holding the user-level mutex but, unlike lwp_rwlock_lock(),
2654*0Sstevel@tonic-gate  * we never drop the lock.
2655*0Sstevel@tonic-gate  */
2656*0Sstevel@tonic-gate static int
2657*0Sstevel@tonic-gate lwp_rwlock_unlock(lwp_rwlock_t *rw)
2658*0Sstevel@tonic-gate {
2659*0Sstevel@tonic-gate 	kthread_t *t = curthread;
2660*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
2661*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
2662*0Sstevel@tonic-gate 	volatile uint16_t type = 0;
2663*0Sstevel@tonic-gate 	volatile int error = 0;
2664*0Sstevel@tonic-gate 	volatile int locked = 0;
2665*0Sstevel@tonic-gate 	volatile int watched = 0;
2666*0Sstevel@tonic-gate 	label_t ljb;
2667*0Sstevel@tonic-gate 	volatile int no_lwpchan = 1;
2668*0Sstevel@tonic-gate 	uint32_t rwstate;
2669*0Sstevel@tonic-gate 
2670*0Sstevel@tonic-gate 	/* We only check rw because the mutex is included in it. */
2671*0Sstevel@tonic-gate 	if ((caddr_t)rw >= p->p_as->a_userlimit)
2672*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
2673*0Sstevel@tonic-gate 
2674*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2675*0Sstevel@tonic-gate 		if (no_lwpchan) {
2676*0Sstevel@tonic-gate 			error = EFAULT;
2677*0Sstevel@tonic-gate 			goto out_nodrop;
2678*0Sstevel@tonic-gate 		}
2679*0Sstevel@tonic-gate 		if (locked) {
2680*0Sstevel@tonic-gate 			locked = 0;
2681*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2682*0Sstevel@tonic-gate 		}
2683*0Sstevel@tonic-gate 		error = EFAULT;
2684*0Sstevel@tonic-gate 		goto out_nodrop;
2685*0Sstevel@tonic-gate 	}
2686*0Sstevel@tonic-gate 
2687*0Sstevel@tonic-gate 	/* We can only continue for simple USYNC_PROCESS locks. */
2688*0Sstevel@tonic-gate 	fuword16_noerr(&rw->rwlock_type, (uint16_t *)&type);
2689*0Sstevel@tonic-gate 	if (type != USYNC_PROCESS) {
2690*0Sstevel@tonic-gate 		error = EINVAL;
2691*0Sstevel@tonic-gate 		goto out_nodrop;
2692*0Sstevel@tonic-gate 	}
2693*0Sstevel@tonic-gate 
2694*0Sstevel@tonic-gate 	/* Force Copy-on-write fault incase objects are MAP_PRIVATE. */
2695*0Sstevel@tonic-gate 	suword16_noerr(&rw->rwlock_type, type);
2696*0Sstevel@tonic-gate 
2697*0Sstevel@tonic-gate 	/* Convert user level rwlock, "rw", to a unique lwpchan. */
2698*0Sstevel@tonic-gate 	if (!get_lwpchan(p->p_as, (caddr_t)rw, type,
2699*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_CVPOOL)) {
2700*0Sstevel@tonic-gate 		error = EFAULT;
2701*0Sstevel@tonic-gate 		goto out_nodrop;
2702*0Sstevel@tonic-gate 	}
2703*0Sstevel@tonic-gate 
2704*0Sstevel@tonic-gate 	no_lwpchan = 0;
2705*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2706*0Sstevel@tonic-gate 
2707*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_CVPOOL);
2708*0Sstevel@tonic-gate 	locked = 1;
2709*0Sstevel@tonic-gate 
2710*0Sstevel@tonic-gate 	/*
2711*0Sstevel@tonic-gate 	 * We can resolve multiple readers (except the last reader) here.
2712*0Sstevel@tonic-gate 	 * For the last reader or a writer we need lwp_rwlock_release(),
2713*0Sstevel@tonic-gate 	 * to which we also delegate the task of copying the new rwstate
2714*0Sstevel@tonic-gate 	 * back to userland (see the comment there).
2715*0Sstevel@tonic-gate 	 */
2716*0Sstevel@tonic-gate 	fuword32_noerr(&rw->rwlock_readers, &rwstate);
2717*0Sstevel@tonic-gate 	if (rwstate & URW_WRITE_LOCKED)
2718*0Sstevel@tonic-gate 		lwp_rwlock_release(&lwpchan, rw);
2719*0Sstevel@tonic-gate 	else if ((rwstate & URW_READERS_MASK) > 0) {
2720*0Sstevel@tonic-gate 		rwstate--;
2721*0Sstevel@tonic-gate 		if ((rwstate & URW_READERS_MASK) == 0)
2722*0Sstevel@tonic-gate 			lwp_rwlock_release(&lwpchan, rw);
2723*0Sstevel@tonic-gate 		else
2724*0Sstevel@tonic-gate 			suword32_noerr(&rw->rwlock_readers, rwstate);
2725*0Sstevel@tonic-gate 	}
2726*0Sstevel@tonic-gate 
2727*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_CVPOOL);
2728*0Sstevel@tonic-gate 	locked = 0;
2729*0Sstevel@tonic-gate 	error = 0;
2730*0Sstevel@tonic-gate 
2731*0Sstevel@tonic-gate out_nodrop:
2732*0Sstevel@tonic-gate 	no_fault();
2733*0Sstevel@tonic-gate 	if (watched)
2734*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)rw, sizeof (*rw), S_WRITE);
2735*0Sstevel@tonic-gate 	if (error)
2736*0Sstevel@tonic-gate 		return (set_errno(error));
2737*0Sstevel@tonic-gate 	return (0);
2738*0Sstevel@tonic-gate }
2739*0Sstevel@tonic-gate 
2740*0Sstevel@tonic-gate int
2741*0Sstevel@tonic-gate lwp_rwlock_sys(int subcode, lwp_rwlock_t *rwlp, timespec_t *tsp)
2742*0Sstevel@tonic-gate {
2743*0Sstevel@tonic-gate 	switch (subcode) {
2744*0Sstevel@tonic-gate 	case 0:
2745*0Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, READ_LOCK));
2746*0Sstevel@tonic-gate 	case 1:
2747*0Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, tsp, WRITE_LOCK));
2748*0Sstevel@tonic-gate 	case 2:
2749*0Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, READ_LOCK_TRY));
2750*0Sstevel@tonic-gate 	case 3:
2751*0Sstevel@tonic-gate 		return (lwp_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY));
2752*0Sstevel@tonic-gate 	case 4:
2753*0Sstevel@tonic-gate 		return (lwp_rwlock_unlock(rwlp));
2754*0Sstevel@tonic-gate 	}
2755*0Sstevel@tonic-gate 	return (set_errno(EINVAL));
2756*0Sstevel@tonic-gate }
2757*0Sstevel@tonic-gate 
2758*0Sstevel@tonic-gate /*
2759*0Sstevel@tonic-gate  * Return the owner of the user-level s-object.
2760*0Sstevel@tonic-gate  * Since we can't really do this, return NULL.
2761*0Sstevel@tonic-gate  */
2762*0Sstevel@tonic-gate /* ARGSUSED */
2763*0Sstevel@tonic-gate static kthread_t *
2764*0Sstevel@tonic-gate lwpsobj_owner(caddr_t sobj)
2765*0Sstevel@tonic-gate {
2766*0Sstevel@tonic-gate 	return ((kthread_t *)NULL);
2767*0Sstevel@tonic-gate }
2768*0Sstevel@tonic-gate 
2769*0Sstevel@tonic-gate /*
2770*0Sstevel@tonic-gate  * Wake up a thread asleep on a user-level synchronization
2771*0Sstevel@tonic-gate  * object.
2772*0Sstevel@tonic-gate  */
2773*0Sstevel@tonic-gate static void
2774*0Sstevel@tonic-gate lwp_unsleep(kthread_t *t)
2775*0Sstevel@tonic-gate {
2776*0Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
2777*0Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
2778*0Sstevel@tonic-gate 		sleepq_head_t *sqh;
2779*0Sstevel@tonic-gate 		sleepq_t *sqp = t->t_sleepq;
2780*0Sstevel@tonic-gate 
2781*0Sstevel@tonic-gate 		if (sqp != NULL) {
2782*0Sstevel@tonic-gate 			sqh = lwpsqhash(&t->t_lwpchan);
2783*0Sstevel@tonic-gate 			ASSERT(&sqh->sq_queue == sqp);
2784*0Sstevel@tonic-gate 			sleepq_unsleep(t);
2785*0Sstevel@tonic-gate 			disp_lock_exit_high(&sqh->sq_lock);
2786*0Sstevel@tonic-gate 			CL_SETRUN(t);
2787*0Sstevel@tonic-gate 			return;
2788*0Sstevel@tonic-gate 		}
2789*0Sstevel@tonic-gate 	}
2790*0Sstevel@tonic-gate 	panic("lwp_unsleep: thread %p not on sleepq", (void *)t);
2791*0Sstevel@tonic-gate }
2792*0Sstevel@tonic-gate 
2793*0Sstevel@tonic-gate /*
2794*0Sstevel@tonic-gate  * Change the priority of a thread asleep on a user-level
2795*0Sstevel@tonic-gate  * synchronization object. To maintain proper priority order,
2796*0Sstevel@tonic-gate  * we:
2797*0Sstevel@tonic-gate  *	o dequeue the thread.
2798*0Sstevel@tonic-gate  *	o change its priority.
2799*0Sstevel@tonic-gate  *	o re-enqueue the thread.
2800*0Sstevel@tonic-gate  * Assumption: the thread is locked on entry.
2801*0Sstevel@tonic-gate  */
2802*0Sstevel@tonic-gate static void
2803*0Sstevel@tonic-gate lwp_change_pri(kthread_t *t, pri_t pri, pri_t *t_prip)
2804*0Sstevel@tonic-gate {
2805*0Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(t));
2806*0Sstevel@tonic-gate 	if (t->t_wchan0 != NULL) {
2807*0Sstevel@tonic-gate 		sleepq_t   *sqp = t->t_sleepq;
2808*0Sstevel@tonic-gate 
2809*0Sstevel@tonic-gate 		sleepq_dequeue(t);
2810*0Sstevel@tonic-gate 		*t_prip = pri;
2811*0Sstevel@tonic-gate 		sleepq_insert(sqp, t);
2812*0Sstevel@tonic-gate 	} else
2813*0Sstevel@tonic-gate 		panic("lwp_change_pri: %p not on a sleep queue", (void *)t);
2814*0Sstevel@tonic-gate }
2815*0Sstevel@tonic-gate 
2816*0Sstevel@tonic-gate /*
2817*0Sstevel@tonic-gate  * Clean up a locked a robust mutex
2818*0Sstevel@tonic-gate  */
2819*0Sstevel@tonic-gate static void
2820*0Sstevel@tonic-gate lwp_mutex_cleanup(lwpchan_entry_t *ent, uint16_t lockflg)
2821*0Sstevel@tonic-gate {
2822*0Sstevel@tonic-gate 	uint16_t flag;
2823*0Sstevel@tonic-gate 	uchar_t waiters;
2824*0Sstevel@tonic-gate 	label_t ljb;
2825*0Sstevel@tonic-gate 	pid_t owner_pid;
2826*0Sstevel@tonic-gate 	lwp_mutex_t *lp;
2827*0Sstevel@tonic-gate 	volatile int locked = 0;
2828*0Sstevel@tonic-gate 	volatile int watched = 0;
2829*0Sstevel@tonic-gate 
2830*0Sstevel@tonic-gate 	ASSERT(ent->lwpchan_type & USYNC_PROCESS_ROBUST);
2831*0Sstevel@tonic-gate 
2832*0Sstevel@tonic-gate 	lp = (lwp_mutex_t *)ent->lwpchan_addr;
2833*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2834*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2835*0Sstevel@tonic-gate 		if (locked)
2836*0Sstevel@tonic-gate 			lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2837*0Sstevel@tonic-gate 		goto out;
2838*0Sstevel@tonic-gate 	}
2839*0Sstevel@tonic-gate 	fuword32_noerr(&lp->mutex_ownerpid, (uint32_t *)&owner_pid);
2840*0Sstevel@tonic-gate 	if (owner_pid != curproc->p_pid) {
2841*0Sstevel@tonic-gate 		goto out;
2842*0Sstevel@tonic-gate 	}
2843*0Sstevel@tonic-gate 	lwpchan_lock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2844*0Sstevel@tonic-gate 	locked = 1;
2845*0Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
2846*0Sstevel@tonic-gate 	if ((flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) == 0) {
2847*0Sstevel@tonic-gate 		flag |= lockflg;
2848*0Sstevel@tonic-gate 		suword16_noerr(&lp->mutex_flag, flag);
2849*0Sstevel@tonic-gate 	}
2850*0Sstevel@tonic-gate 	suword32_noerr(&lp->mutex_ownerpid, 0);
2851*0Sstevel@tonic-gate 	ulock_clear(&lp->mutex_lockw);
2852*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_waiters, &waiters);
2853*0Sstevel@tonic-gate 	if (waiters && lwp_release(&ent->lwpchan_lwpchan, &waiters, 0))
2854*0Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, waiters);
2855*0Sstevel@tonic-gate 	lwpchan_unlock(&ent->lwpchan_lwpchan, LWPCHAN_MPPOOL);
2856*0Sstevel@tonic-gate out:
2857*0Sstevel@tonic-gate 	no_fault();
2858*0Sstevel@tonic-gate 	if (watched)
2859*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2860*0Sstevel@tonic-gate }
2861*0Sstevel@tonic-gate 
2862*0Sstevel@tonic-gate /*
2863*0Sstevel@tonic-gate  * Register the mutex and initialize the mutex if it is not already
2864*0Sstevel@tonic-gate  */
2865*0Sstevel@tonic-gate int
2866*0Sstevel@tonic-gate lwp_mutex_init(lwp_mutex_t *lp, int type)
2867*0Sstevel@tonic-gate {
2868*0Sstevel@tonic-gate 	proc_t *p = curproc;
2869*0Sstevel@tonic-gate 	int error = 0;
2870*0Sstevel@tonic-gate 	volatile int locked = 0;
2871*0Sstevel@tonic-gate 	volatile int watched = 0;
2872*0Sstevel@tonic-gate 	label_t ljb;
2873*0Sstevel@tonic-gate 	uint16_t flag;
2874*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
2875*0Sstevel@tonic-gate 	pid_t owner_pid;
2876*0Sstevel@tonic-gate 
2877*0Sstevel@tonic-gate 	if ((caddr_t)lp >= (caddr_t)USERLIMIT)
2878*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
2879*0Sstevel@tonic-gate 
2880*0Sstevel@tonic-gate 	if (type != USYNC_PROCESS_ROBUST)
2881*0Sstevel@tonic-gate 		return (set_errno(EINVAL));
2882*0Sstevel@tonic-gate 
2883*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2884*0Sstevel@tonic-gate 
2885*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2886*0Sstevel@tonic-gate 		if (locked)
2887*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
2888*0Sstevel@tonic-gate 		error = EFAULT;
2889*0Sstevel@tonic-gate 		goto out;
2890*0Sstevel@tonic-gate 	}
2891*0Sstevel@tonic-gate 	/*
2892*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_mutex_t object is
2893*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE and it was initialized to
2894*0Sstevel@tonic-gate 	 * USYNC_PROCESS.
2895*0Sstevel@tonic-gate 	 */
2896*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
2897*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
2898*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
2899*0Sstevel@tonic-gate 		error = EFAULT;
2900*0Sstevel@tonic-gate 		goto out;
2901*0Sstevel@tonic-gate 	}
2902*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
2903*0Sstevel@tonic-gate 	locked = 1;
2904*0Sstevel@tonic-gate 	fuword16_noerr(&lp->mutex_flag, &flag);
2905*0Sstevel@tonic-gate 	if (flag & LOCK_INITED) {
2906*0Sstevel@tonic-gate 		if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
2907*0Sstevel@tonic-gate 			fuword32_noerr(&lp->mutex_ownerpid,
2908*0Sstevel@tonic-gate 			    (uint32_t *)&owner_pid);
2909*0Sstevel@tonic-gate 			if (owner_pid == p->p_pid) {
2910*0Sstevel@tonic-gate 				flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
2911*0Sstevel@tonic-gate 				suword16_noerr(&lp->mutex_flag, flag);
2912*0Sstevel@tonic-gate 				locked = 0;
2913*0Sstevel@tonic-gate 				lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
2914*0Sstevel@tonic-gate 				goto out;
2915*0Sstevel@tonic-gate 			}
2916*0Sstevel@tonic-gate 		}
2917*0Sstevel@tonic-gate 		error = EBUSY;
2918*0Sstevel@tonic-gate 	} else {
2919*0Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_waiters, 0);
2920*0Sstevel@tonic-gate 		suword8_noerr(&lp->mutex_lockw, 0);
2921*0Sstevel@tonic-gate 		suword16_noerr(&lp->mutex_flag, LOCK_INITED);
2922*0Sstevel@tonic-gate 		suword32_noerr(&lp->mutex_ownerpid, 0);
2923*0Sstevel@tonic-gate 	}
2924*0Sstevel@tonic-gate 	locked = 0;
2925*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
2926*0Sstevel@tonic-gate out:
2927*0Sstevel@tonic-gate 	no_fault();
2928*0Sstevel@tonic-gate 	if (watched)
2929*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2930*0Sstevel@tonic-gate 	if (error)
2931*0Sstevel@tonic-gate 		return (set_errno(error));
2932*0Sstevel@tonic-gate 	return (0);
2933*0Sstevel@tonic-gate }
2934*0Sstevel@tonic-gate 
2935*0Sstevel@tonic-gate int
2936*0Sstevel@tonic-gate lwp_mutex_trylock(lwp_mutex_t *lp)
2937*0Sstevel@tonic-gate {
2938*0Sstevel@tonic-gate 	kthread_t *t = curthread;
2939*0Sstevel@tonic-gate 	proc_t *p = ttoproc(t);
2940*0Sstevel@tonic-gate 	int error = 0;
2941*0Sstevel@tonic-gate 	volatile int locked = 0;
2942*0Sstevel@tonic-gate 	volatile int watched = 0;
2943*0Sstevel@tonic-gate 	label_t ljb;
2944*0Sstevel@tonic-gate 	volatile uint8_t type = 0;
2945*0Sstevel@tonic-gate 	uint16_t flag;
2946*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
2947*0Sstevel@tonic-gate 
2948*0Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
2949*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
2950*0Sstevel@tonic-gate 
2951*0Sstevel@tonic-gate 	(void) new_mstate(t, LMS_USER_LOCK);
2952*0Sstevel@tonic-gate 
2953*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
2954*0Sstevel@tonic-gate 		if (locked)
2955*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
2956*0Sstevel@tonic-gate 		error = EFAULT;
2957*0Sstevel@tonic-gate 		goto out;
2958*0Sstevel@tonic-gate 	}
2959*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
2960*0Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
2961*0Sstevel@tonic-gate 		no_fault();
2962*0Sstevel@tonic-gate 		error = lwp_upimutex_lock(lp, type, UPIMUTEX_TRY, NULL);
2963*0Sstevel@tonic-gate 		if ((error == 0 || error == EOWNERDEAD) &&
2964*0Sstevel@tonic-gate 		    (type & USYNC_PROCESS))
2965*0Sstevel@tonic-gate 			(void) suword32(&lp->mutex_ownerpid, p->p_pid);
2966*0Sstevel@tonic-gate 		if (error)
2967*0Sstevel@tonic-gate 			return (set_errno(error));
2968*0Sstevel@tonic-gate 		return (0);
2969*0Sstevel@tonic-gate 	}
2970*0Sstevel@tonic-gate 	/*
2971*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_mutex_t object is
2972*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE and it was initialized to
2973*0Sstevel@tonic-gate 	 * USYNC_PROCESS.
2974*0Sstevel@tonic-gate 	 */
2975*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
2976*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
2977*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
2978*0Sstevel@tonic-gate 		error = EFAULT;
2979*0Sstevel@tonic-gate 		goto out;
2980*0Sstevel@tonic-gate 	}
2981*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
2982*0Sstevel@tonic-gate 	locked = 1;
2983*0Sstevel@tonic-gate 	if (type & USYNC_PROCESS_ROBUST) {
2984*0Sstevel@tonic-gate 		fuword16_noerr((uint16_t *)(&lp->mutex_flag), &flag);
2985*0Sstevel@tonic-gate 		if (flag & LOCK_NOTRECOVERABLE) {
2986*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
2987*0Sstevel@tonic-gate 			error =  ENOTRECOVERABLE;
2988*0Sstevel@tonic-gate 			goto out;
2989*0Sstevel@tonic-gate 		}
2990*0Sstevel@tonic-gate 	}
2991*0Sstevel@tonic-gate 
2992*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
2993*0Sstevel@tonic-gate 
2994*0Sstevel@tonic-gate 	if (!ulock_try(&lp->mutex_lockw))
2995*0Sstevel@tonic-gate 		error = EBUSY;
2996*0Sstevel@tonic-gate 	else if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) {
2997*0Sstevel@tonic-gate 		suword32_noerr(&lp->mutex_ownerpid, p->p_pid);
2998*0Sstevel@tonic-gate 		if (type & USYNC_PROCESS_ROBUST) {
2999*0Sstevel@tonic-gate 			if (flag & LOCK_OWNERDEAD)
3000*0Sstevel@tonic-gate 				error = EOWNERDEAD;
3001*0Sstevel@tonic-gate 			else if (flag & LOCK_UNMAPPED)
3002*0Sstevel@tonic-gate 				error = ELOCKUNMAPPED;
3003*0Sstevel@tonic-gate 		}
3004*0Sstevel@tonic-gate 	}
3005*0Sstevel@tonic-gate 	locked = 0;
3006*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3007*0Sstevel@tonic-gate out:
3008*0Sstevel@tonic-gate 
3009*0Sstevel@tonic-gate 	if (t->t_mstate == LMS_USER_LOCK)
3010*0Sstevel@tonic-gate 		(void) new_mstate(t, LMS_SYSTEM);
3011*0Sstevel@tonic-gate 
3012*0Sstevel@tonic-gate 	no_fault();
3013*0Sstevel@tonic-gate 	if (watched)
3014*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3015*0Sstevel@tonic-gate 	if (error)
3016*0Sstevel@tonic-gate 		return (set_errno(error));
3017*0Sstevel@tonic-gate 	return (0);
3018*0Sstevel@tonic-gate }
3019*0Sstevel@tonic-gate 
3020*0Sstevel@tonic-gate /*
3021*0Sstevel@tonic-gate  * unlock the mutex and unblock lwps that is trying to acquire this mutex.
3022*0Sstevel@tonic-gate  * the blocked lwp resumes and retries to acquire the lock.
3023*0Sstevel@tonic-gate  */
3024*0Sstevel@tonic-gate int
3025*0Sstevel@tonic-gate lwp_mutex_unlock(lwp_mutex_t *lp)
3026*0Sstevel@tonic-gate {
3027*0Sstevel@tonic-gate 	proc_t *p = ttoproc(curthread);
3028*0Sstevel@tonic-gate 	lwpchan_t lwpchan;
3029*0Sstevel@tonic-gate 	uchar_t waiters;
3030*0Sstevel@tonic-gate 	volatile int locked = 0;
3031*0Sstevel@tonic-gate 	volatile int watched = 0;
3032*0Sstevel@tonic-gate 	volatile uint8_t type = 0;
3033*0Sstevel@tonic-gate 	label_t ljb;
3034*0Sstevel@tonic-gate 	uint16_t flag;
3035*0Sstevel@tonic-gate 	int error = 0;
3036*0Sstevel@tonic-gate 
3037*0Sstevel@tonic-gate 	if ((caddr_t)lp >= p->p_as->a_userlimit)
3038*0Sstevel@tonic-gate 		return (set_errno(EFAULT));
3039*0Sstevel@tonic-gate 
3040*0Sstevel@tonic-gate 	if (on_fault(&ljb)) {
3041*0Sstevel@tonic-gate 		if (locked)
3042*0Sstevel@tonic-gate 			lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3043*0Sstevel@tonic-gate 		error = EFAULT;
3044*0Sstevel@tonic-gate 		goto out;
3045*0Sstevel@tonic-gate 	}
3046*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_type, (uint8_t *)&type);
3047*0Sstevel@tonic-gate 	if (UPIMUTEX(type)) {
3048*0Sstevel@tonic-gate 		no_fault();
3049*0Sstevel@tonic-gate 		error = lwp_upimutex_unlock(lp, type);
3050*0Sstevel@tonic-gate 		if (error)
3051*0Sstevel@tonic-gate 			return (set_errno(error));
3052*0Sstevel@tonic-gate 		return (0);
3053*0Sstevel@tonic-gate 	}
3054*0Sstevel@tonic-gate 
3055*0Sstevel@tonic-gate 	watched = watch_disable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3056*0Sstevel@tonic-gate 
3057*0Sstevel@tonic-gate 	/*
3058*0Sstevel@tonic-gate 	 * Force Copy-on-write fault if lwp_mutex_t object is
3059*0Sstevel@tonic-gate 	 * defined to be MAP_PRIVATE, and type is USYNC_PROCESS
3060*0Sstevel@tonic-gate 	 */
3061*0Sstevel@tonic-gate 	suword8_noerr(&lp->mutex_type, type);
3062*0Sstevel@tonic-gate 	if (!get_lwpchan(curproc->p_as, (caddr_t)lp, type,
3063*0Sstevel@tonic-gate 	    &lwpchan, LWPCHAN_MPPOOL)) {
3064*0Sstevel@tonic-gate 		error = EFAULT;
3065*0Sstevel@tonic-gate 		goto out;
3066*0Sstevel@tonic-gate 	}
3067*0Sstevel@tonic-gate 	lwpchan_lock(&lwpchan, LWPCHAN_MPPOOL);
3068*0Sstevel@tonic-gate 	locked = 1;
3069*0Sstevel@tonic-gate 	if (type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) {
3070*0Sstevel@tonic-gate 		if (type & USYNC_PROCESS_ROBUST) {
3071*0Sstevel@tonic-gate 			fuword16_noerr(&lp->mutex_flag, &flag);
3072*0Sstevel@tonic-gate 			if (flag & (LOCK_OWNERDEAD | LOCK_UNMAPPED)) {
3073*0Sstevel@tonic-gate 				flag &= ~(LOCK_OWNERDEAD | LOCK_UNMAPPED);
3074*0Sstevel@tonic-gate 				flag |= LOCK_NOTRECOVERABLE;
3075*0Sstevel@tonic-gate 				suword16_noerr(&lp->mutex_flag, flag);
3076*0Sstevel@tonic-gate 			}
3077*0Sstevel@tonic-gate 		}
3078*0Sstevel@tonic-gate 		suword32_noerr(&lp->mutex_ownerpid, 0);
3079*0Sstevel@tonic-gate 	}
3080*0Sstevel@tonic-gate 	ulock_clear(&lp->mutex_lockw);
3081*0Sstevel@tonic-gate 	/*
3082*0Sstevel@tonic-gate 	 * Always wake up an lwp (if any) waiting on lwpchan. The woken lwp will
3083*0Sstevel@tonic-gate 	 * re-try the lock in lwp_mutex_timedlock(). The call to lwp_release()
3084*0Sstevel@tonic-gate 	 * may fail.  If it fails, do not write into the waiter bit.
3085*0Sstevel@tonic-gate 	 * The call to lwp_release() might fail due to one of three reasons:
3086*0Sstevel@tonic-gate 	 *
3087*0Sstevel@tonic-gate 	 * 	1. due to the thread which set the waiter bit not actually
3088*0Sstevel@tonic-gate 	 *	   sleeping since it got the lock on the re-try. The waiter
3089*0Sstevel@tonic-gate 	 *	   bit will then be correctly updated by that thread. This
3090*0Sstevel@tonic-gate 	 *	   window may be closed by reading the wait bit again here
3091*0Sstevel@tonic-gate 	 *	   and not calling lwp_release() at all if it is zero.
3092*0Sstevel@tonic-gate 	 *	2. the thread which set the waiter bit and went to sleep
3093*0Sstevel@tonic-gate 	 *	   was woken up by a signal. This time, the waiter recomputes
3094*0Sstevel@tonic-gate 	 *	   the wait bit in the return with EINTR code.
3095*0Sstevel@tonic-gate 	 *	3. the waiter bit read by lwp_mutex_wakeup() was in
3096*0Sstevel@tonic-gate 	 *	   memory that has been re-used after the lock was dropped.
3097*0Sstevel@tonic-gate 	 *	   In this case, writing into the waiter bit would cause data
3098*0Sstevel@tonic-gate 	 *	   corruption.
3099*0Sstevel@tonic-gate 	 */
3100*0Sstevel@tonic-gate 	fuword8_noerr(&lp->mutex_waiters, &waiters);
3101*0Sstevel@tonic-gate 	if (waiters) {
3102*0Sstevel@tonic-gate 		if ((type & USYNC_PROCESS_ROBUST) &&
3103*0Sstevel@tonic-gate 		    (flag & LOCK_NOTRECOVERABLE)) {
3104*0Sstevel@tonic-gate 			lwp_release_all(&lwpchan);
3105*0Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, 0);
3106*0Sstevel@tonic-gate 		} else if (lwp_release(&lwpchan, &waiters, 0) == 1) {
3107*0Sstevel@tonic-gate 			suword8_noerr(&lp->mutex_waiters, waiters);
3108*0Sstevel@tonic-gate 		}
3109*0Sstevel@tonic-gate 	}
3110*0Sstevel@tonic-gate 
3111*0Sstevel@tonic-gate 	lwpchan_unlock(&lwpchan, LWPCHAN_MPPOOL);
3112*0Sstevel@tonic-gate out:
3113*0Sstevel@tonic-gate 	no_fault();
3114*0Sstevel@tonic-gate 	if (watched)
3115*0Sstevel@tonic-gate 		watch_enable_addr((caddr_t)lp, sizeof (*lp), S_WRITE);
3116*0Sstevel@tonic-gate 	if (error)
3117*0Sstevel@tonic-gate 		return (set_errno(error));
3118*0Sstevel@tonic-gate 	return (0);
3119*0Sstevel@tonic-gate }
3120