xref: /onnv-gate/usr/src/lib/libc/port/threads/synch.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate #include <sys/sdt.h>
30*0Sstevel@tonic-gate 
31*0Sstevel@tonic-gate #include "lint.h"
32*0Sstevel@tonic-gate #include "thr_uberdata.h"
33*0Sstevel@tonic-gate 
34*0Sstevel@tonic-gate /*
35*0Sstevel@tonic-gate  * This mutex is initialized to be held by lwp#1.
36*0Sstevel@tonic-gate  * It is used to block a thread that has returned from a mutex_lock()
37*0Sstevel@tonic-gate  * of a PTHREAD_PRIO_INHERIT mutex with an unrecoverable error.
38*0Sstevel@tonic-gate  */
39*0Sstevel@tonic-gate mutex_t	stall_mutex = DEFAULTMUTEX;
40*0Sstevel@tonic-gate 
41*0Sstevel@tonic-gate static int shared_mutex_held(mutex_t *);
42*0Sstevel@tonic-gate 
43*0Sstevel@tonic-gate /*
44*0Sstevel@tonic-gate  * Lock statistics support functions.
45*0Sstevel@tonic-gate  */
46*0Sstevel@tonic-gate void
47*0Sstevel@tonic-gate record_begin_hold(tdb_mutex_stats_t *msp)
48*0Sstevel@tonic-gate {
49*0Sstevel@tonic-gate 	tdb_incr(msp->mutex_lock);
50*0Sstevel@tonic-gate 	msp->mutex_begin_hold = gethrtime();
51*0Sstevel@tonic-gate }
52*0Sstevel@tonic-gate 
53*0Sstevel@tonic-gate hrtime_t
54*0Sstevel@tonic-gate record_hold_time(tdb_mutex_stats_t *msp)
55*0Sstevel@tonic-gate {
56*0Sstevel@tonic-gate 	hrtime_t now = gethrtime();
57*0Sstevel@tonic-gate 
58*0Sstevel@tonic-gate 	if (msp->mutex_begin_hold)
59*0Sstevel@tonic-gate 		msp->mutex_hold_time += now - msp->mutex_begin_hold;
60*0Sstevel@tonic-gate 	msp->mutex_begin_hold = 0;
61*0Sstevel@tonic-gate 	return (now);
62*0Sstevel@tonic-gate }
63*0Sstevel@tonic-gate 
64*0Sstevel@tonic-gate /*
65*0Sstevel@tonic-gate  * Called once at library initialization.
66*0Sstevel@tonic-gate  */
67*0Sstevel@tonic-gate void
68*0Sstevel@tonic-gate mutex_setup(void)
69*0Sstevel@tonic-gate {
70*0Sstevel@tonic-gate 	if (set_lock_byte(&stall_mutex.mutex_lockw))
71*0Sstevel@tonic-gate 		thr_panic("mutex_setup() cannot acquire stall_mutex");
72*0Sstevel@tonic-gate 	stall_mutex.mutex_owner = (uintptr_t)curthread;
73*0Sstevel@tonic-gate }
74*0Sstevel@tonic-gate 
75*0Sstevel@tonic-gate /*
76*0Sstevel@tonic-gate  * The default spin counts of 1000 and 500 are experimentally determined.
77*0Sstevel@tonic-gate  * On sun4u machines with any number of processors they could be raised
78*0Sstevel@tonic-gate  * to 10,000 but that (experimentally) makes almost no difference.
79*0Sstevel@tonic-gate  * The environment variables:
80*0Sstevel@tonic-gate  *	_THREAD_ADAPTIVE_SPIN=count
81*0Sstevel@tonic-gate  *	_THREAD_RELEASE_SPIN=count
82*0Sstevel@tonic-gate  * can be used to override and set the counts in the range [0 .. 1,000,000].
83*0Sstevel@tonic-gate  */
84*0Sstevel@tonic-gate int	thread_adaptive_spin = 1000;
85*0Sstevel@tonic-gate uint_t	thread_max_spinners = 100;
86*0Sstevel@tonic-gate int	thread_release_spin = 500;
87*0Sstevel@tonic-gate int	thread_queue_verify = 0;
88*0Sstevel@tonic-gate static	int	ncpus;
89*0Sstevel@tonic-gate 
90*0Sstevel@tonic-gate /*
91*0Sstevel@tonic-gate  * Distinguish spinning for queue locks from spinning for regular locks.
92*0Sstevel@tonic-gate  * The environment variable:
93*0Sstevel@tonic-gate  *	_THREAD_QUEUE_SPIN=count
94*0Sstevel@tonic-gate  * can be used to override and set the count in the range [0 .. 1,000,000].
95*0Sstevel@tonic-gate  * There is no release spin concept for queue locks.
96*0Sstevel@tonic-gate  */
97*0Sstevel@tonic-gate int	thread_queue_spin = 1000;
98*0Sstevel@tonic-gate 
99*0Sstevel@tonic-gate /*
100*0Sstevel@tonic-gate  * Use the otherwise-unused 'mutex_ownerpid' field of a USYNC_THREAD
101*0Sstevel@tonic-gate  * mutex to be a count of adaptive spins in progress.
102*0Sstevel@tonic-gate  */
103*0Sstevel@tonic-gate #define	mutex_spinners	mutex_ownerpid
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate void
106*0Sstevel@tonic-gate _mutex_set_typeattr(mutex_t *mp, int attr)
107*0Sstevel@tonic-gate {
108*0Sstevel@tonic-gate 	mp->mutex_type |= (uint8_t)attr;
109*0Sstevel@tonic-gate }
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate /*
112*0Sstevel@tonic-gate  * 'type' can be one of USYNC_THREAD or USYNC_PROCESS, possibly
113*0Sstevel@tonic-gate  * augmented by the flags LOCK_RECURSIVE and/or LOCK_ERRORCHECK,
114*0Sstevel@tonic-gate  * or it can be USYNC_PROCESS_ROBUST with no extra flags.
115*0Sstevel@tonic-gate  */
116*0Sstevel@tonic-gate #pragma weak _private_mutex_init = __mutex_init
117*0Sstevel@tonic-gate #pragma weak mutex_init = __mutex_init
118*0Sstevel@tonic-gate #pragma weak _mutex_init = __mutex_init
119*0Sstevel@tonic-gate /* ARGSUSED2 */
120*0Sstevel@tonic-gate int
121*0Sstevel@tonic-gate __mutex_init(mutex_t *mp, int type, void *arg)
122*0Sstevel@tonic-gate {
123*0Sstevel@tonic-gate 	int error;
124*0Sstevel@tonic-gate 
125*0Sstevel@tonic-gate 	switch (type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) {
126*0Sstevel@tonic-gate 	case USYNC_THREAD:
127*0Sstevel@tonic-gate 	case USYNC_PROCESS:
128*0Sstevel@tonic-gate 		(void) _memset(mp, 0, sizeof (*mp));
129*0Sstevel@tonic-gate 		mp->mutex_type = (uint8_t)type;
130*0Sstevel@tonic-gate 		mp->mutex_flag = LOCK_INITED;
131*0Sstevel@tonic-gate 		error = 0;
132*0Sstevel@tonic-gate 		break;
133*0Sstevel@tonic-gate 	case USYNC_PROCESS_ROBUST:
134*0Sstevel@tonic-gate 		if (type & (LOCK_RECURSIVE|LOCK_ERRORCHECK))
135*0Sstevel@tonic-gate 			error = EINVAL;
136*0Sstevel@tonic-gate 		else
137*0Sstevel@tonic-gate 			error = ___lwp_mutex_init(mp, type);
138*0Sstevel@tonic-gate 		break;
139*0Sstevel@tonic-gate 	default:
140*0Sstevel@tonic-gate 		error = EINVAL;
141*0Sstevel@tonic-gate 		break;
142*0Sstevel@tonic-gate 	}
143*0Sstevel@tonic-gate 	if (error == 0)
144*0Sstevel@tonic-gate 		mp->mutex_magic = MUTEX_MAGIC;
145*0Sstevel@tonic-gate 	return (error);
146*0Sstevel@tonic-gate }
147*0Sstevel@tonic-gate 
148*0Sstevel@tonic-gate /*
149*0Sstevel@tonic-gate  * Delete mp from list of ceil mutexes owned by curthread.
150*0Sstevel@tonic-gate  * Return 1 if the head of the chain was updated.
151*0Sstevel@tonic-gate  */
152*0Sstevel@tonic-gate int
153*0Sstevel@tonic-gate _ceil_mylist_del(mutex_t *mp)
154*0Sstevel@tonic-gate {
155*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
156*0Sstevel@tonic-gate 	mxchain_t **mcpp;
157*0Sstevel@tonic-gate 	mxchain_t *mcp;
158*0Sstevel@tonic-gate 
159*0Sstevel@tonic-gate 	mcpp = &self->ul_mxchain;
160*0Sstevel@tonic-gate 	while ((*mcpp)->mxchain_mx != mp)
161*0Sstevel@tonic-gate 		mcpp = &(*mcpp)->mxchain_next;
162*0Sstevel@tonic-gate 	mcp = *mcpp;
163*0Sstevel@tonic-gate 	*mcpp = mcp->mxchain_next;
164*0Sstevel@tonic-gate 	lfree(mcp, sizeof (*mcp));
165*0Sstevel@tonic-gate 	return (mcpp == &self->ul_mxchain);
166*0Sstevel@tonic-gate }
167*0Sstevel@tonic-gate 
168*0Sstevel@tonic-gate /*
169*0Sstevel@tonic-gate  * Add mp to head of list of ceil mutexes owned by curthread.
170*0Sstevel@tonic-gate  * Return ENOMEM if no memory could be allocated.
171*0Sstevel@tonic-gate  */
172*0Sstevel@tonic-gate int
173*0Sstevel@tonic-gate _ceil_mylist_add(mutex_t *mp)
174*0Sstevel@tonic-gate {
175*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
176*0Sstevel@tonic-gate 	mxchain_t *mcp;
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 	if ((mcp = lmalloc(sizeof (*mcp))) == NULL)
179*0Sstevel@tonic-gate 		return (ENOMEM);
180*0Sstevel@tonic-gate 	mcp->mxchain_mx = mp;
181*0Sstevel@tonic-gate 	mcp->mxchain_next = self->ul_mxchain;
182*0Sstevel@tonic-gate 	self->ul_mxchain = mcp;
183*0Sstevel@tonic-gate 	return (0);
184*0Sstevel@tonic-gate }
185*0Sstevel@tonic-gate 
186*0Sstevel@tonic-gate /*
187*0Sstevel@tonic-gate  * Inherit priority from ceiling.  The inheritance impacts the effective
188*0Sstevel@tonic-gate  * priority, not the assigned priority.  See _thread_setschedparam_main().
189*0Sstevel@tonic-gate  */
190*0Sstevel@tonic-gate void
191*0Sstevel@tonic-gate _ceil_prio_inherit(int ceil)
192*0Sstevel@tonic-gate {
193*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
194*0Sstevel@tonic-gate 	struct sched_param param;
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate 	(void) _memset(&param, 0, sizeof (param));
197*0Sstevel@tonic-gate 	param.sched_priority = ceil;
198*0Sstevel@tonic-gate 	if (_thread_setschedparam_main(self->ul_lwpid,
199*0Sstevel@tonic-gate 	    self->ul_policy, &param, PRIO_INHERIT)) {
200*0Sstevel@tonic-gate 		/*
201*0Sstevel@tonic-gate 		 * Panic since unclear what error code to return.
202*0Sstevel@tonic-gate 		 * If we do return the error codes returned by above
203*0Sstevel@tonic-gate 		 * called routine, update the man page...
204*0Sstevel@tonic-gate 		 */
205*0Sstevel@tonic-gate 		thr_panic("_thread_setschedparam_main() fails");
206*0Sstevel@tonic-gate 	}
207*0Sstevel@tonic-gate }
208*0Sstevel@tonic-gate 
209*0Sstevel@tonic-gate /*
210*0Sstevel@tonic-gate  * Waive inherited ceiling priority.  Inherit from head of owned ceiling locks
211*0Sstevel@tonic-gate  * if holding at least one ceiling lock.  If no ceiling locks are held at this
212*0Sstevel@tonic-gate  * point, disinherit completely, reverting back to assigned priority.
213*0Sstevel@tonic-gate  */
214*0Sstevel@tonic-gate void
215*0Sstevel@tonic-gate _ceil_prio_waive(void)
216*0Sstevel@tonic-gate {
217*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
218*0Sstevel@tonic-gate 	struct sched_param param;
219*0Sstevel@tonic-gate 
220*0Sstevel@tonic-gate 	(void) _memset(&param, 0, sizeof (param));
221*0Sstevel@tonic-gate 	if (self->ul_mxchain == NULL) {
222*0Sstevel@tonic-gate 		/*
223*0Sstevel@tonic-gate 		 * No ceil locks held.  Zero the epri, revert back to ul_pri.
224*0Sstevel@tonic-gate 		 * Since thread's hash lock is not held, one cannot just
225*0Sstevel@tonic-gate 		 * read ul_pri here...do it in the called routine...
226*0Sstevel@tonic-gate 		 */
227*0Sstevel@tonic-gate 		param.sched_priority = self->ul_pri;	/* ignored */
228*0Sstevel@tonic-gate 		if (_thread_setschedparam_main(self->ul_lwpid,
229*0Sstevel@tonic-gate 		    self->ul_policy, &param, PRIO_DISINHERIT))
230*0Sstevel@tonic-gate 			thr_panic("_thread_setschedparam_main() fails");
231*0Sstevel@tonic-gate 	} else {
232*0Sstevel@tonic-gate 		/*
233*0Sstevel@tonic-gate 		 * Set priority to that of the mutex at the head
234*0Sstevel@tonic-gate 		 * of the ceilmutex chain.
235*0Sstevel@tonic-gate 		 */
236*0Sstevel@tonic-gate 		param.sched_priority =
237*0Sstevel@tonic-gate 		    self->ul_mxchain->mxchain_mx->mutex_ceiling;
238*0Sstevel@tonic-gate 		if (_thread_setschedparam_main(self->ul_lwpid,
239*0Sstevel@tonic-gate 		    self->ul_policy, &param, PRIO_INHERIT))
240*0Sstevel@tonic-gate 			thr_panic("_thread_setschedparam_main() fails");
241*0Sstevel@tonic-gate 	}
242*0Sstevel@tonic-gate }
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate /*
245*0Sstevel@tonic-gate  * Non-preemptive spin locks.  Used by queue_lock().
246*0Sstevel@tonic-gate  * No lock statistics are gathered for these locks.
247*0Sstevel@tonic-gate  */
248*0Sstevel@tonic-gate void
249*0Sstevel@tonic-gate spin_lock_set(mutex_t *mp)
250*0Sstevel@tonic-gate {
251*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
252*0Sstevel@tonic-gate 
253*0Sstevel@tonic-gate 	no_preempt(self);
254*0Sstevel@tonic-gate 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
255*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
256*0Sstevel@tonic-gate 		return;
257*0Sstevel@tonic-gate 	}
258*0Sstevel@tonic-gate 	/*
259*0Sstevel@tonic-gate 	 * Spin for a while, attempting to acquire the lock.
260*0Sstevel@tonic-gate 	 */
261*0Sstevel@tonic-gate 	if (self->ul_spin_lock_spin != UINT_MAX)
262*0Sstevel@tonic-gate 		self->ul_spin_lock_spin++;
263*0Sstevel@tonic-gate 	if (mutex_queuelock_adaptive(mp) == 0 ||
264*0Sstevel@tonic-gate 	    set_lock_byte(&mp->mutex_lockw) == 0) {
265*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
266*0Sstevel@tonic-gate 		return;
267*0Sstevel@tonic-gate 	}
268*0Sstevel@tonic-gate 	/*
269*0Sstevel@tonic-gate 	 * Try harder if we were previously at a no premption level.
270*0Sstevel@tonic-gate 	 */
271*0Sstevel@tonic-gate 	if (self->ul_preempt > 1) {
272*0Sstevel@tonic-gate 		if (self->ul_spin_lock_spin2 != UINT_MAX)
273*0Sstevel@tonic-gate 			self->ul_spin_lock_spin2++;
274*0Sstevel@tonic-gate 		if (mutex_queuelock_adaptive(mp) == 0 ||
275*0Sstevel@tonic-gate 		    set_lock_byte(&mp->mutex_lockw) == 0) {
276*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
277*0Sstevel@tonic-gate 			return;
278*0Sstevel@tonic-gate 		}
279*0Sstevel@tonic-gate 	}
280*0Sstevel@tonic-gate 	/*
281*0Sstevel@tonic-gate 	 * Give up and block in the kernel for the mutex.
282*0Sstevel@tonic-gate 	 */
283*0Sstevel@tonic-gate 	if (self->ul_spin_lock_sleep != UINT_MAX)
284*0Sstevel@tonic-gate 		self->ul_spin_lock_sleep++;
285*0Sstevel@tonic-gate 	(void) ___lwp_mutex_timedlock(mp, NULL);
286*0Sstevel@tonic-gate 	mp->mutex_owner = (uintptr_t)self;
287*0Sstevel@tonic-gate }
288*0Sstevel@tonic-gate 
289*0Sstevel@tonic-gate void
290*0Sstevel@tonic-gate spin_lock_clear(mutex_t *mp)
291*0Sstevel@tonic-gate {
292*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
293*0Sstevel@tonic-gate 
294*0Sstevel@tonic-gate 	mp->mutex_owner = 0;
295*0Sstevel@tonic-gate 	if (swap32(&mp->mutex_lockword, 0) & WAITERMASK) {
296*0Sstevel@tonic-gate 		(void) ___lwp_mutex_wakeup(mp);
297*0Sstevel@tonic-gate 		if (self->ul_spin_lock_wakeup != UINT_MAX)
298*0Sstevel@tonic-gate 			self->ul_spin_lock_wakeup++;
299*0Sstevel@tonic-gate 	}
300*0Sstevel@tonic-gate 	preempt(self);
301*0Sstevel@tonic-gate }
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate /*
304*0Sstevel@tonic-gate  * Allocate the sleep queue hash table.
305*0Sstevel@tonic-gate  */
306*0Sstevel@tonic-gate void
307*0Sstevel@tonic-gate queue_alloc(void)
308*0Sstevel@tonic-gate {
309*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
310*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
311*0Sstevel@tonic-gate 	void *data;
312*0Sstevel@tonic-gate 	int i;
313*0Sstevel@tonic-gate 
314*0Sstevel@tonic-gate 	/*
315*0Sstevel@tonic-gate 	 * No locks are needed; we call here only when single-threaded.
316*0Sstevel@tonic-gate 	 */
317*0Sstevel@tonic-gate 	ASSERT(self == udp->ulwp_one);
318*0Sstevel@tonic-gate 	ASSERT(!udp->uberflags.uf_mt);
319*0Sstevel@tonic-gate 	if ((data = _private_mmap(NULL, 2 * QHASHSIZE * sizeof (queue_head_t),
320*0Sstevel@tonic-gate 	    PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, (off_t)0))
321*0Sstevel@tonic-gate 	    == MAP_FAILED)
322*0Sstevel@tonic-gate 		thr_panic("cannot allocate thread queue_head table");
323*0Sstevel@tonic-gate 	udp->queue_head = (queue_head_t *)data;
324*0Sstevel@tonic-gate 	for (i = 0; i < 2 * QHASHSIZE; i++)
325*0Sstevel@tonic-gate 		udp->queue_head[i].qh_lock.mutex_magic = MUTEX_MAGIC;
326*0Sstevel@tonic-gate }
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate #if defined(THREAD_DEBUG)
329*0Sstevel@tonic-gate 
330*0Sstevel@tonic-gate /*
331*0Sstevel@tonic-gate  * Debugging: verify correctness of a sleep queue.
332*0Sstevel@tonic-gate  */
333*0Sstevel@tonic-gate void
334*0Sstevel@tonic-gate QVERIFY(queue_head_t *qp)
335*0Sstevel@tonic-gate {
336*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
337*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
338*0Sstevel@tonic-gate 	ulwp_t *ulwp;
339*0Sstevel@tonic-gate 	ulwp_t *prev;
340*0Sstevel@tonic-gate 	uint_t index;
341*0Sstevel@tonic-gate 	uint32_t cnt = 0;
342*0Sstevel@tonic-gate 	char qtype;
343*0Sstevel@tonic-gate 	void *wchan;
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate 	ASSERT(qp >= udp->queue_head && (qp - udp->queue_head) < 2 * QHASHSIZE);
346*0Sstevel@tonic-gate 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
347*0Sstevel@tonic-gate 	ASSERT((qp->qh_head != NULL && qp->qh_tail != NULL) ||
348*0Sstevel@tonic-gate 		(qp->qh_head == NULL && qp->qh_tail == NULL));
349*0Sstevel@tonic-gate 	if (!thread_queue_verify)
350*0Sstevel@tonic-gate 		return;
351*0Sstevel@tonic-gate 	/* real expensive stuff, only for _THREAD_QUEUE_VERIFY */
352*0Sstevel@tonic-gate 	qtype = ((qp - udp->queue_head) < QHASHSIZE)? MX : CV;
353*0Sstevel@tonic-gate 	for (prev = NULL, ulwp = qp->qh_head; ulwp != NULL;
354*0Sstevel@tonic-gate 	    prev = ulwp, ulwp = ulwp->ul_link, cnt++) {
355*0Sstevel@tonic-gate 		ASSERT(ulwp->ul_qtype == qtype);
356*0Sstevel@tonic-gate 		ASSERT(ulwp->ul_wchan != NULL);
357*0Sstevel@tonic-gate 		ASSERT(ulwp->ul_sleepq == qp);
358*0Sstevel@tonic-gate 		wchan = ulwp->ul_wchan;
359*0Sstevel@tonic-gate 		index = QUEUE_HASH(wchan, qtype);
360*0Sstevel@tonic-gate 		ASSERT(&udp->queue_head[index] == qp);
361*0Sstevel@tonic-gate 	}
362*0Sstevel@tonic-gate 	ASSERT(qp->qh_tail == prev);
363*0Sstevel@tonic-gate 	ASSERT(qp->qh_qlen == cnt);
364*0Sstevel@tonic-gate }
365*0Sstevel@tonic-gate 
366*0Sstevel@tonic-gate #else	/* THREAD_DEBUG */
367*0Sstevel@tonic-gate 
368*0Sstevel@tonic-gate #define	QVERIFY(qp)
369*0Sstevel@tonic-gate 
370*0Sstevel@tonic-gate #endif	/* THREAD_DEBUG */
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate /*
373*0Sstevel@tonic-gate  * Acquire a queue head.
374*0Sstevel@tonic-gate  */
375*0Sstevel@tonic-gate queue_head_t *
376*0Sstevel@tonic-gate queue_lock(void *wchan, int qtype)
377*0Sstevel@tonic-gate {
378*0Sstevel@tonic-gate 	uberdata_t *udp = curthread->ul_uberdata;
379*0Sstevel@tonic-gate 	queue_head_t *qp;
380*0Sstevel@tonic-gate 
381*0Sstevel@tonic-gate 	ASSERT(qtype == MX || qtype == CV);
382*0Sstevel@tonic-gate 
383*0Sstevel@tonic-gate 	/*
384*0Sstevel@tonic-gate 	 * It is possible that we could be called while still single-threaded.
385*0Sstevel@tonic-gate 	 * If so, we call queue_alloc() to allocate the queue_head[] array.
386*0Sstevel@tonic-gate 	 */
387*0Sstevel@tonic-gate 	if ((qp = udp->queue_head) == NULL) {
388*0Sstevel@tonic-gate 		queue_alloc();
389*0Sstevel@tonic-gate 		qp = udp->queue_head;
390*0Sstevel@tonic-gate 	}
391*0Sstevel@tonic-gate 	qp += QUEUE_HASH(wchan, qtype);
392*0Sstevel@tonic-gate 	spin_lock_set(&qp->qh_lock);
393*0Sstevel@tonic-gate 	/*
394*0Sstevel@tonic-gate 	 * At once per nanosecond, qh_lockcount will wrap after 512 years.
395*0Sstevel@tonic-gate 	 * Were we to care about this, we could peg the value at UINT64_MAX.
396*0Sstevel@tonic-gate 	 */
397*0Sstevel@tonic-gate 	qp->qh_lockcount++;
398*0Sstevel@tonic-gate 	QVERIFY(qp);
399*0Sstevel@tonic-gate 	return (qp);
400*0Sstevel@tonic-gate }
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate /*
403*0Sstevel@tonic-gate  * Release a queue head.
404*0Sstevel@tonic-gate  */
405*0Sstevel@tonic-gate void
406*0Sstevel@tonic-gate queue_unlock(queue_head_t *qp)
407*0Sstevel@tonic-gate {
408*0Sstevel@tonic-gate 	QVERIFY(qp);
409*0Sstevel@tonic-gate 	spin_lock_clear(&qp->qh_lock);
410*0Sstevel@tonic-gate }
411*0Sstevel@tonic-gate 
412*0Sstevel@tonic-gate /*
413*0Sstevel@tonic-gate  * For rwlock queueing, we must queue writers ahead of readers of the
414*0Sstevel@tonic-gate  * same priority.  We do this by making writers appear to have a half
415*0Sstevel@tonic-gate  * point higher priority for purposes of priority comparisons below.
416*0Sstevel@tonic-gate  */
417*0Sstevel@tonic-gate #define	CMP_PRIO(ulwp)	((real_priority(ulwp) << 1) + (ulwp)->ul_writer)
418*0Sstevel@tonic-gate 
419*0Sstevel@tonic-gate void
420*0Sstevel@tonic-gate enqueue(queue_head_t *qp, ulwp_t *ulwp, void *wchan, int qtype)
421*0Sstevel@tonic-gate {
422*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
423*0Sstevel@tonic-gate 	ulwp_t *next;
424*0Sstevel@tonic-gate 	int pri = CMP_PRIO(ulwp);
425*0Sstevel@tonic-gate 	int force_fifo = (qtype & FIFOQ);
426*0Sstevel@tonic-gate 	int do_fifo;
427*0Sstevel@tonic-gate 
428*0Sstevel@tonic-gate 	qtype &= ~FIFOQ;
429*0Sstevel@tonic-gate 	ASSERT(qtype == MX || qtype == CV);
430*0Sstevel@tonic-gate 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
431*0Sstevel@tonic-gate 	ASSERT(ulwp->ul_sleepq != qp);
432*0Sstevel@tonic-gate 
433*0Sstevel@tonic-gate 	/*
434*0Sstevel@tonic-gate 	 * LIFO queue ordering is unfair and can lead to starvation,
435*0Sstevel@tonic-gate 	 * but it gives better performance for heavily contended locks.
436*0Sstevel@tonic-gate 	 * We use thread_queue_fifo (range is 0..8) to determine
437*0Sstevel@tonic-gate 	 * the frequency of FIFO vs LIFO queuing:
438*0Sstevel@tonic-gate 	 *	0 : every 256th time	(almost always LIFO)
439*0Sstevel@tonic-gate 	 *	1 : every 128th time
440*0Sstevel@tonic-gate 	 *	2 : every 64th  time
441*0Sstevel@tonic-gate 	 *	3 : every 32nd  time
442*0Sstevel@tonic-gate 	 *	4 : every 16th  time	(the default value, mostly LIFO)
443*0Sstevel@tonic-gate 	 *	5 : every 8th   time
444*0Sstevel@tonic-gate 	 *	6 : every 4th   time
445*0Sstevel@tonic-gate 	 *	7 : every 2nd   time
446*0Sstevel@tonic-gate 	 *	8 : every time		(never LIFO, always FIFO)
447*0Sstevel@tonic-gate 	 * Note that there is always some degree of FIFO ordering.
448*0Sstevel@tonic-gate 	 * This breaks live lock conditions that occur in applications
449*0Sstevel@tonic-gate 	 * that are written assuming (incorrectly) that threads acquire
450*0Sstevel@tonic-gate 	 * locks fairly, that is, in roughly round-robin order.
451*0Sstevel@tonic-gate 	 * In any event, the queue is maintained in priority order.
452*0Sstevel@tonic-gate 	 *
453*0Sstevel@tonic-gate 	 * If we are given the FIFOQ flag in qtype, fifo queueing is forced.
454*0Sstevel@tonic-gate 	 * SUSV3 requires this for semaphores.
455*0Sstevel@tonic-gate 	 */
456*0Sstevel@tonic-gate 	do_fifo = (force_fifo ||
457*0Sstevel@tonic-gate 		((++qp->qh_qcnt << curthread->ul_queue_fifo) & 0xff) == 0);
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate 	if (qp->qh_head == NULL) {
460*0Sstevel@tonic-gate 		/*
461*0Sstevel@tonic-gate 		 * The queue is empty.  LIFO/FIFO doesn't matter.
462*0Sstevel@tonic-gate 		 */
463*0Sstevel@tonic-gate 		ASSERT(qp->qh_tail == NULL);
464*0Sstevel@tonic-gate 		ulwpp = &qp->qh_head;
465*0Sstevel@tonic-gate 	} else if (do_fifo) {
466*0Sstevel@tonic-gate 		/*
467*0Sstevel@tonic-gate 		 * Enqueue after the last thread whose priority is greater
468*0Sstevel@tonic-gate 		 * than or equal to the priority of the thread being queued.
469*0Sstevel@tonic-gate 		 * Attempt first to go directly onto the tail of the queue.
470*0Sstevel@tonic-gate 		 */
471*0Sstevel@tonic-gate 		if (pri <= CMP_PRIO(qp->qh_tail))
472*0Sstevel@tonic-gate 			ulwpp = &qp->qh_tail->ul_link;
473*0Sstevel@tonic-gate 		else {
474*0Sstevel@tonic-gate 			for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
475*0Sstevel@tonic-gate 			    ulwpp = &next->ul_link)
476*0Sstevel@tonic-gate 				if (pri > CMP_PRIO(next))
477*0Sstevel@tonic-gate 					break;
478*0Sstevel@tonic-gate 		}
479*0Sstevel@tonic-gate 	} else {
480*0Sstevel@tonic-gate 		/*
481*0Sstevel@tonic-gate 		 * Enqueue before the first thread whose priority is less
482*0Sstevel@tonic-gate 		 * than or equal to the priority of the thread being queued.
483*0Sstevel@tonic-gate 		 * Hopefully we can go directly onto the head of the queue.
484*0Sstevel@tonic-gate 		 */
485*0Sstevel@tonic-gate 		for (ulwpp = &qp->qh_head; (next = *ulwpp) != NULL;
486*0Sstevel@tonic-gate 		    ulwpp = &next->ul_link)
487*0Sstevel@tonic-gate 			if (pri >= CMP_PRIO(next))
488*0Sstevel@tonic-gate 				break;
489*0Sstevel@tonic-gate 	}
490*0Sstevel@tonic-gate 	if ((ulwp->ul_link = *ulwpp) == NULL)
491*0Sstevel@tonic-gate 		qp->qh_tail = ulwp;
492*0Sstevel@tonic-gate 	*ulwpp = ulwp;
493*0Sstevel@tonic-gate 
494*0Sstevel@tonic-gate 	ulwp->ul_sleepq = qp;
495*0Sstevel@tonic-gate 	ulwp->ul_wchan = wchan;
496*0Sstevel@tonic-gate 	ulwp->ul_qtype = qtype;
497*0Sstevel@tonic-gate 	if (qp->qh_qmax < ++qp->qh_qlen)
498*0Sstevel@tonic-gate 		qp->qh_qmax = qp->qh_qlen;
499*0Sstevel@tonic-gate }
500*0Sstevel@tonic-gate 
501*0Sstevel@tonic-gate /*
502*0Sstevel@tonic-gate  * Return a pointer to the queue slot of the
503*0Sstevel@tonic-gate  * highest priority thread on the queue.
504*0Sstevel@tonic-gate  * On return, prevp, if not NULL, will contain a pointer
505*0Sstevel@tonic-gate  * to the thread's predecessor on the queue
506*0Sstevel@tonic-gate  */
507*0Sstevel@tonic-gate static ulwp_t **
508*0Sstevel@tonic-gate queue_slot(queue_head_t *qp, void *wchan, int *more, ulwp_t **prevp)
509*0Sstevel@tonic-gate {
510*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
511*0Sstevel@tonic-gate 	ulwp_t *ulwp;
512*0Sstevel@tonic-gate 	ulwp_t *prev = NULL;
513*0Sstevel@tonic-gate 	ulwp_t **suspp = NULL;
514*0Sstevel@tonic-gate 	ulwp_t *susprev;
515*0Sstevel@tonic-gate 
516*0Sstevel@tonic-gate 	ASSERT(MUTEX_OWNED(&qp->qh_lock, curthread));
517*0Sstevel@tonic-gate 
518*0Sstevel@tonic-gate 	/*
519*0Sstevel@tonic-gate 	 * Find a waiter on the sleep queue.
520*0Sstevel@tonic-gate 	 */
521*0Sstevel@tonic-gate 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
522*0Sstevel@tonic-gate 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
523*0Sstevel@tonic-gate 		if (ulwp->ul_wchan == wchan) {
524*0Sstevel@tonic-gate 			if (!ulwp->ul_stop)
525*0Sstevel@tonic-gate 				break;
526*0Sstevel@tonic-gate 			/*
527*0Sstevel@tonic-gate 			 * Try not to return a suspended thread.
528*0Sstevel@tonic-gate 			 * This mimics the old libthread's behavior.
529*0Sstevel@tonic-gate 			 */
530*0Sstevel@tonic-gate 			if (suspp == NULL) {
531*0Sstevel@tonic-gate 				suspp = ulwpp;
532*0Sstevel@tonic-gate 				susprev = prev;
533*0Sstevel@tonic-gate 			}
534*0Sstevel@tonic-gate 		}
535*0Sstevel@tonic-gate 	}
536*0Sstevel@tonic-gate 
537*0Sstevel@tonic-gate 	if (ulwp == NULL && suspp != NULL) {
538*0Sstevel@tonic-gate 		ulwp = *(ulwpp = suspp);
539*0Sstevel@tonic-gate 		prev = susprev;
540*0Sstevel@tonic-gate 		suspp = NULL;
541*0Sstevel@tonic-gate 	}
542*0Sstevel@tonic-gate 	if (ulwp == NULL) {
543*0Sstevel@tonic-gate 		if (more != NULL)
544*0Sstevel@tonic-gate 			*more = 0;
545*0Sstevel@tonic-gate 		return (NULL);
546*0Sstevel@tonic-gate 	}
547*0Sstevel@tonic-gate 
548*0Sstevel@tonic-gate 	if (prevp != NULL)
549*0Sstevel@tonic-gate 		*prevp = prev;
550*0Sstevel@tonic-gate 	if (more == NULL)
551*0Sstevel@tonic-gate 		return (ulwpp);
552*0Sstevel@tonic-gate 
553*0Sstevel@tonic-gate 	/*
554*0Sstevel@tonic-gate 	 * Scan the remainder of the queue for another waiter.
555*0Sstevel@tonic-gate 	 */
556*0Sstevel@tonic-gate 	if (suspp != NULL) {
557*0Sstevel@tonic-gate 		*more = 1;
558*0Sstevel@tonic-gate 		return (ulwpp);
559*0Sstevel@tonic-gate 	}
560*0Sstevel@tonic-gate 	for (ulwp = ulwp->ul_link; ulwp != NULL; ulwp = ulwp->ul_link) {
561*0Sstevel@tonic-gate 		if (ulwp->ul_wchan == wchan) {
562*0Sstevel@tonic-gate 			*more = 1;
563*0Sstevel@tonic-gate 			return (ulwpp);
564*0Sstevel@tonic-gate 		}
565*0Sstevel@tonic-gate 	}
566*0Sstevel@tonic-gate 
567*0Sstevel@tonic-gate 	*more = 0;
568*0Sstevel@tonic-gate 	return (ulwpp);
569*0Sstevel@tonic-gate }
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate ulwp_t *
572*0Sstevel@tonic-gate dequeue(queue_head_t *qp, void *wchan, int *more)
573*0Sstevel@tonic-gate {
574*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
575*0Sstevel@tonic-gate 	ulwp_t *ulwp;
576*0Sstevel@tonic-gate 	ulwp_t *prev;
577*0Sstevel@tonic-gate 
578*0Sstevel@tonic-gate 	if ((ulwpp = queue_slot(qp, wchan, more, &prev)) == NULL)
579*0Sstevel@tonic-gate 		return (NULL);
580*0Sstevel@tonic-gate 
581*0Sstevel@tonic-gate 	/*
582*0Sstevel@tonic-gate 	 * Dequeue the waiter.
583*0Sstevel@tonic-gate 	 */
584*0Sstevel@tonic-gate 	ulwp = *ulwpp;
585*0Sstevel@tonic-gate 	*ulwpp = ulwp->ul_link;
586*0Sstevel@tonic-gate 	ulwp->ul_link = NULL;
587*0Sstevel@tonic-gate 	if (qp->qh_tail == ulwp)
588*0Sstevel@tonic-gate 		qp->qh_tail = prev;
589*0Sstevel@tonic-gate 	qp->qh_qlen--;
590*0Sstevel@tonic-gate 	ulwp->ul_sleepq = NULL;
591*0Sstevel@tonic-gate 	ulwp->ul_wchan = NULL;
592*0Sstevel@tonic-gate 
593*0Sstevel@tonic-gate 	return (ulwp);
594*0Sstevel@tonic-gate }
595*0Sstevel@tonic-gate 
596*0Sstevel@tonic-gate /*
597*0Sstevel@tonic-gate  * Return a pointer to the highest priority thread sleeping on wchan.
598*0Sstevel@tonic-gate  */
599*0Sstevel@tonic-gate ulwp_t *
600*0Sstevel@tonic-gate queue_waiter(queue_head_t *qp, void *wchan)
601*0Sstevel@tonic-gate {
602*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
603*0Sstevel@tonic-gate 
604*0Sstevel@tonic-gate 	if ((ulwpp = queue_slot(qp, wchan, NULL, NULL)) == NULL)
605*0Sstevel@tonic-gate 		return (NULL);
606*0Sstevel@tonic-gate 	return (*ulwpp);
607*0Sstevel@tonic-gate }
608*0Sstevel@tonic-gate 
609*0Sstevel@tonic-gate uint8_t
610*0Sstevel@tonic-gate dequeue_self(queue_head_t *qp, void *wchan)
611*0Sstevel@tonic-gate {
612*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
613*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
614*0Sstevel@tonic-gate 	ulwp_t *ulwp;
615*0Sstevel@tonic-gate 	ulwp_t *prev = NULL;
616*0Sstevel@tonic-gate 	int found = 0;
617*0Sstevel@tonic-gate 	int more = 0;
618*0Sstevel@tonic-gate 
619*0Sstevel@tonic-gate 	ASSERT(MUTEX_OWNED(&qp->qh_lock, self));
620*0Sstevel@tonic-gate 
621*0Sstevel@tonic-gate 	/* find self on the sleep queue */
622*0Sstevel@tonic-gate 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
623*0Sstevel@tonic-gate 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
624*0Sstevel@tonic-gate 		if (ulwp == self) {
625*0Sstevel@tonic-gate 			/* dequeue ourself */
626*0Sstevel@tonic-gate 			*ulwpp = self->ul_link;
627*0Sstevel@tonic-gate 			if (qp->qh_tail == self)
628*0Sstevel@tonic-gate 				qp->qh_tail = prev;
629*0Sstevel@tonic-gate 			qp->qh_qlen--;
630*0Sstevel@tonic-gate 			ASSERT(self->ul_wchan == wchan);
631*0Sstevel@tonic-gate 			self->ul_cvmutex = NULL;
632*0Sstevel@tonic-gate 			self->ul_sleepq = NULL;
633*0Sstevel@tonic-gate 			self->ul_wchan = NULL;
634*0Sstevel@tonic-gate 			self->ul_cv_wake = 0;
635*0Sstevel@tonic-gate 			self->ul_link = NULL;
636*0Sstevel@tonic-gate 			found = 1;
637*0Sstevel@tonic-gate 			break;
638*0Sstevel@tonic-gate 		}
639*0Sstevel@tonic-gate 		if (ulwp->ul_wchan == wchan)
640*0Sstevel@tonic-gate 			more = 1;
641*0Sstevel@tonic-gate 	}
642*0Sstevel@tonic-gate 
643*0Sstevel@tonic-gate 	if (!found)
644*0Sstevel@tonic-gate 		thr_panic("dequeue_self(): curthread not found on queue");
645*0Sstevel@tonic-gate 
646*0Sstevel@tonic-gate 	if (more)
647*0Sstevel@tonic-gate 		return (1);
648*0Sstevel@tonic-gate 
649*0Sstevel@tonic-gate 	/* scan the remainder of the queue for another waiter */
650*0Sstevel@tonic-gate 	for (ulwp = *ulwpp; ulwp != NULL; ulwp = ulwp->ul_link) {
651*0Sstevel@tonic-gate 		if (ulwp->ul_wchan == wchan)
652*0Sstevel@tonic-gate 			return (1);
653*0Sstevel@tonic-gate 	}
654*0Sstevel@tonic-gate 
655*0Sstevel@tonic-gate 	return (0);
656*0Sstevel@tonic-gate }
657*0Sstevel@tonic-gate 
658*0Sstevel@tonic-gate /*
659*0Sstevel@tonic-gate  * Called from call_user_handler() and _thrp_suspend() to take
660*0Sstevel@tonic-gate  * ourself off of our sleep queue so we can grab locks.
661*0Sstevel@tonic-gate  */
662*0Sstevel@tonic-gate void
663*0Sstevel@tonic-gate unsleep_self(void)
664*0Sstevel@tonic-gate {
665*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
666*0Sstevel@tonic-gate 	queue_head_t *qp;
667*0Sstevel@tonic-gate 
668*0Sstevel@tonic-gate 	/*
669*0Sstevel@tonic-gate 	 * Calling enter_critical()/exit_critical() here would lead
670*0Sstevel@tonic-gate 	 * to recursion.  Just manipulate self->ul_critical directly.
671*0Sstevel@tonic-gate 	 */
672*0Sstevel@tonic-gate 	self->ul_critical++;
673*0Sstevel@tonic-gate 	self->ul_writer = 0;
674*0Sstevel@tonic-gate 	while (self->ul_sleepq != NULL) {
675*0Sstevel@tonic-gate 		qp = queue_lock(self->ul_wchan, self->ul_qtype);
676*0Sstevel@tonic-gate 		/*
677*0Sstevel@tonic-gate 		 * We may have been moved from a CV queue to a
678*0Sstevel@tonic-gate 		 * mutex queue while we were attempting queue_lock().
679*0Sstevel@tonic-gate 		 * If so, just loop around and try again.
680*0Sstevel@tonic-gate 		 * dequeue_self() clears self->ul_sleepq.
681*0Sstevel@tonic-gate 		 */
682*0Sstevel@tonic-gate 		if (qp == self->ul_sleepq)
683*0Sstevel@tonic-gate 			(void) dequeue_self(qp, self->ul_wchan);
684*0Sstevel@tonic-gate 		queue_unlock(qp);
685*0Sstevel@tonic-gate 	}
686*0Sstevel@tonic-gate 	self->ul_critical--;
687*0Sstevel@tonic-gate }
688*0Sstevel@tonic-gate 
689*0Sstevel@tonic-gate /*
690*0Sstevel@tonic-gate  * Common code for calling the the ___lwp_mutex_timedlock() system call.
691*0Sstevel@tonic-gate  * Returns with mutex_owner and mutex_ownerpid set correctly.
692*0Sstevel@tonic-gate  */
693*0Sstevel@tonic-gate int
694*0Sstevel@tonic-gate mutex_lock_kernel(mutex_t *mp, timespec_t *tsp, tdb_mutex_stats_t *msp)
695*0Sstevel@tonic-gate {
696*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
697*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
698*0Sstevel@tonic-gate 	hrtime_t begin_sleep;
699*0Sstevel@tonic-gate 	int error;
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 	self->ul_sp = stkptr();
702*0Sstevel@tonic-gate 	self->ul_wchan = mp;
703*0Sstevel@tonic-gate 	if (__td_event_report(self, TD_SLEEP, udp)) {
704*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventnum = TD_SLEEP;
705*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventdata = mp;
706*0Sstevel@tonic-gate 		tdb_event(TD_SLEEP, udp);
707*0Sstevel@tonic-gate 	}
708*0Sstevel@tonic-gate 	if (msp) {
709*0Sstevel@tonic-gate 		tdb_incr(msp->mutex_sleep);
710*0Sstevel@tonic-gate 		begin_sleep = gethrtime();
711*0Sstevel@tonic-gate 	}
712*0Sstevel@tonic-gate 
713*0Sstevel@tonic-gate 	DTRACE_PROBE1(plockstat, mutex__block, mp);
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate 	for (;;) {
716*0Sstevel@tonic-gate 		if ((error = ___lwp_mutex_timedlock(mp, tsp)) != 0) {
717*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
718*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
719*0Sstevel@tonic-gate 			break;
720*0Sstevel@tonic-gate 		}
721*0Sstevel@tonic-gate 
722*0Sstevel@tonic-gate 		if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) {
723*0Sstevel@tonic-gate 			/*
724*0Sstevel@tonic-gate 			 * Defend against forkall().  We may be the child,
725*0Sstevel@tonic-gate 			 * in which case we don't actually own the mutex.
726*0Sstevel@tonic-gate 			 */
727*0Sstevel@tonic-gate 			enter_critical(self);
728*0Sstevel@tonic-gate 			if (mp->mutex_ownerpid == udp->pid) {
729*0Sstevel@tonic-gate 				mp->mutex_owner = (uintptr_t)self;
730*0Sstevel@tonic-gate 				exit_critical(self);
731*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
732*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
733*0Sstevel@tonic-gate 				    0, 0);
734*0Sstevel@tonic-gate 				break;
735*0Sstevel@tonic-gate 			}
736*0Sstevel@tonic-gate 			exit_critical(self);
737*0Sstevel@tonic-gate 		} else {
738*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
739*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
740*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
741*0Sstevel@tonic-gate 			break;
742*0Sstevel@tonic-gate 		}
743*0Sstevel@tonic-gate 	}
744*0Sstevel@tonic-gate 	if (msp)
745*0Sstevel@tonic-gate 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
746*0Sstevel@tonic-gate 	self->ul_wchan = NULL;
747*0Sstevel@tonic-gate 	self->ul_sp = 0;
748*0Sstevel@tonic-gate 
749*0Sstevel@tonic-gate 	return (error);
750*0Sstevel@tonic-gate }
751*0Sstevel@tonic-gate 
752*0Sstevel@tonic-gate /*
753*0Sstevel@tonic-gate  * Common code for calling the ___lwp_mutex_trylock() system call.
754*0Sstevel@tonic-gate  * Returns with mutex_owner and mutex_ownerpid set correctly.
755*0Sstevel@tonic-gate  */
756*0Sstevel@tonic-gate int
757*0Sstevel@tonic-gate mutex_trylock_kernel(mutex_t *mp)
758*0Sstevel@tonic-gate {
759*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
760*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
761*0Sstevel@tonic-gate 	int error;
762*0Sstevel@tonic-gate 
763*0Sstevel@tonic-gate 	for (;;) {
764*0Sstevel@tonic-gate 		if ((error = ___lwp_mutex_trylock(mp)) != 0) {
765*0Sstevel@tonic-gate 			if (error != EBUSY) {
766*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__error, mp,
767*0Sstevel@tonic-gate 				    error);
768*0Sstevel@tonic-gate 			}
769*0Sstevel@tonic-gate 			break;
770*0Sstevel@tonic-gate 		}
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate 		if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) {
773*0Sstevel@tonic-gate 			/*
774*0Sstevel@tonic-gate 			 * Defend against forkall().  We may be the child,
775*0Sstevel@tonic-gate 			 * in which case we don't actually own the mutex.
776*0Sstevel@tonic-gate 			 */
777*0Sstevel@tonic-gate 			enter_critical(self);
778*0Sstevel@tonic-gate 			if (mp->mutex_ownerpid == udp->pid) {
779*0Sstevel@tonic-gate 				mp->mutex_owner = (uintptr_t)self;
780*0Sstevel@tonic-gate 				exit_critical(self);
781*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
782*0Sstevel@tonic-gate 				    0, 0);
783*0Sstevel@tonic-gate 				break;
784*0Sstevel@tonic-gate 			}
785*0Sstevel@tonic-gate 			exit_critical(self);
786*0Sstevel@tonic-gate 		} else {
787*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
788*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
789*0Sstevel@tonic-gate 			break;
790*0Sstevel@tonic-gate 		}
791*0Sstevel@tonic-gate 	}
792*0Sstevel@tonic-gate 
793*0Sstevel@tonic-gate 	return (error);
794*0Sstevel@tonic-gate }
795*0Sstevel@tonic-gate 
796*0Sstevel@tonic-gate volatile sc_shared_t *
797*0Sstevel@tonic-gate setup_schedctl(void)
798*0Sstevel@tonic-gate {
799*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
800*0Sstevel@tonic-gate 	volatile sc_shared_t *scp;
801*0Sstevel@tonic-gate 	sc_shared_t *tmp;
802*0Sstevel@tonic-gate 
803*0Sstevel@tonic-gate 	if ((scp = self->ul_schedctl) == NULL && /* no shared state yet */
804*0Sstevel@tonic-gate 	    !self->ul_vfork &&			/* not a child of vfork() */
805*0Sstevel@tonic-gate 	    !self->ul_schedctl_called) {	/* haven't been called before */
806*0Sstevel@tonic-gate 		enter_critical(self);
807*0Sstevel@tonic-gate 		self->ul_schedctl_called = &self->ul_uberdata->uberflags;
808*0Sstevel@tonic-gate 		if ((tmp = __schedctl()) != (sc_shared_t *)(-1))
809*0Sstevel@tonic-gate 			self->ul_schedctl = scp = tmp;
810*0Sstevel@tonic-gate 		exit_critical(self);
811*0Sstevel@tonic-gate 	}
812*0Sstevel@tonic-gate 	/*
813*0Sstevel@tonic-gate 	 * Unless the call to setup_schedctl() is surrounded
814*0Sstevel@tonic-gate 	 * by enter_critical()/exit_critical(), the address
815*0Sstevel@tonic-gate 	 * we are returning could be invalid due to a forkall()
816*0Sstevel@tonic-gate 	 * having occurred in another thread.
817*0Sstevel@tonic-gate 	 */
818*0Sstevel@tonic-gate 	return (scp);
819*0Sstevel@tonic-gate }
820*0Sstevel@tonic-gate 
821*0Sstevel@tonic-gate /*
822*0Sstevel@tonic-gate  * Interfaces from libsched, incorporated into libc.
823*0Sstevel@tonic-gate  * libsched.so.1 is now a filter library onto libc.
824*0Sstevel@tonic-gate  */
825*0Sstevel@tonic-gate #pragma weak schedctl_lookup = _schedctl_init
826*0Sstevel@tonic-gate #pragma weak _schedctl_lookup = _schedctl_init
827*0Sstevel@tonic-gate #pragma weak schedctl_init = _schedctl_init
828*0Sstevel@tonic-gate schedctl_t *
829*0Sstevel@tonic-gate _schedctl_init(void)
830*0Sstevel@tonic-gate {
831*0Sstevel@tonic-gate 	volatile sc_shared_t *scp = setup_schedctl();
832*0Sstevel@tonic-gate 	return ((scp == NULL)? NULL : (schedctl_t *)&scp->sc_preemptctl);
833*0Sstevel@tonic-gate }
834*0Sstevel@tonic-gate 
835*0Sstevel@tonic-gate #pragma weak schedctl_exit = _schedctl_exit
836*0Sstevel@tonic-gate void
837*0Sstevel@tonic-gate _schedctl_exit(void)
838*0Sstevel@tonic-gate {
839*0Sstevel@tonic-gate }
840*0Sstevel@tonic-gate 
841*0Sstevel@tonic-gate /*
842*0Sstevel@tonic-gate  * Contract private interface for java.
843*0Sstevel@tonic-gate  * Set up the schedctl data if it doesn't exist yet.
844*0Sstevel@tonic-gate  * Return a pointer to the pointer to the schedctl data.
845*0Sstevel@tonic-gate  */
846*0Sstevel@tonic-gate volatile sc_shared_t *volatile *
847*0Sstevel@tonic-gate _thr_schedctl(void)
848*0Sstevel@tonic-gate {
849*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
850*0Sstevel@tonic-gate 	volatile sc_shared_t *volatile *ptr;
851*0Sstevel@tonic-gate 
852*0Sstevel@tonic-gate 	if (self->ul_vfork)
853*0Sstevel@tonic-gate 		return (NULL);
854*0Sstevel@tonic-gate 	if (*(ptr = &self->ul_schedctl) == NULL)
855*0Sstevel@tonic-gate 		(void) setup_schedctl();
856*0Sstevel@tonic-gate 	return (ptr);
857*0Sstevel@tonic-gate }
858*0Sstevel@tonic-gate 
859*0Sstevel@tonic-gate /*
860*0Sstevel@tonic-gate  * Block signals and attempt to block preemption.
861*0Sstevel@tonic-gate  * no_preempt()/preempt() must be used in pairs but can be nested.
862*0Sstevel@tonic-gate  */
863*0Sstevel@tonic-gate void
864*0Sstevel@tonic-gate no_preempt(ulwp_t *self)
865*0Sstevel@tonic-gate {
866*0Sstevel@tonic-gate 	volatile sc_shared_t *scp;
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate 	if (self->ul_preempt++ == 0) {
869*0Sstevel@tonic-gate 		enter_critical(self);
870*0Sstevel@tonic-gate 		if ((scp = self->ul_schedctl) != NULL ||
871*0Sstevel@tonic-gate 		    (scp = setup_schedctl()) != NULL) {
872*0Sstevel@tonic-gate 			/*
873*0Sstevel@tonic-gate 			 * Save the pre-existing preempt value.
874*0Sstevel@tonic-gate 			 */
875*0Sstevel@tonic-gate 			self->ul_savpreempt = scp->sc_preemptctl.sc_nopreempt;
876*0Sstevel@tonic-gate 			scp->sc_preemptctl.sc_nopreempt = 1;
877*0Sstevel@tonic-gate 		}
878*0Sstevel@tonic-gate 	}
879*0Sstevel@tonic-gate }
880*0Sstevel@tonic-gate 
881*0Sstevel@tonic-gate /*
882*0Sstevel@tonic-gate  * Undo the effects of no_preempt().
883*0Sstevel@tonic-gate  */
884*0Sstevel@tonic-gate void
885*0Sstevel@tonic-gate preempt(ulwp_t *self)
886*0Sstevel@tonic-gate {
887*0Sstevel@tonic-gate 	volatile sc_shared_t *scp;
888*0Sstevel@tonic-gate 
889*0Sstevel@tonic-gate 	ASSERT(self->ul_preempt > 0);
890*0Sstevel@tonic-gate 	if (--self->ul_preempt == 0) {
891*0Sstevel@tonic-gate 		if ((scp = self->ul_schedctl) != NULL) {
892*0Sstevel@tonic-gate 			/*
893*0Sstevel@tonic-gate 			 * Restore the pre-existing preempt value.
894*0Sstevel@tonic-gate 			 */
895*0Sstevel@tonic-gate 			scp->sc_preemptctl.sc_nopreempt = self->ul_savpreempt;
896*0Sstevel@tonic-gate 			if (scp->sc_preemptctl.sc_yield &&
897*0Sstevel@tonic-gate 			    scp->sc_preemptctl.sc_nopreempt == 0) {
898*0Sstevel@tonic-gate 				lwp_yield();
899*0Sstevel@tonic-gate 				if (scp->sc_preemptctl.sc_yield) {
900*0Sstevel@tonic-gate 					/*
901*0Sstevel@tonic-gate 					 * Shouldn't happen.  This is either
902*0Sstevel@tonic-gate 					 * a race condition or the thread
903*0Sstevel@tonic-gate 					 * just entered the real-time class.
904*0Sstevel@tonic-gate 					 */
905*0Sstevel@tonic-gate 					lwp_yield();
906*0Sstevel@tonic-gate 					scp->sc_preemptctl.sc_yield = 0;
907*0Sstevel@tonic-gate 				}
908*0Sstevel@tonic-gate 			}
909*0Sstevel@tonic-gate 		}
910*0Sstevel@tonic-gate 		exit_critical(self);
911*0Sstevel@tonic-gate 	}
912*0Sstevel@tonic-gate }
913*0Sstevel@tonic-gate 
914*0Sstevel@tonic-gate /*
915*0Sstevel@tonic-gate  * If a call to preempt() would cause the current thread to yield or to
916*0Sstevel@tonic-gate  * take deferred actions in exit_critical(), then unpark the specified
917*0Sstevel@tonic-gate  * lwp so it can run while we delay.  Return the original lwpid if the
918*0Sstevel@tonic-gate  * unpark was not performed, else return zero.  The tests are a repeat
919*0Sstevel@tonic-gate  * of some of the tests in preempt(), above.  This is a statistical
920*0Sstevel@tonic-gate  * optimization solely for cond_sleep_queue(), below.
921*0Sstevel@tonic-gate  */
922*0Sstevel@tonic-gate static lwpid_t
923*0Sstevel@tonic-gate preempt_unpark(ulwp_t *self, lwpid_t lwpid)
924*0Sstevel@tonic-gate {
925*0Sstevel@tonic-gate 	volatile sc_shared_t *scp = self->ul_schedctl;
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 	ASSERT(self->ul_preempt == 1 && self->ul_critical > 0);
928*0Sstevel@tonic-gate 	if ((scp != NULL && scp->sc_preemptctl.sc_yield) ||
929*0Sstevel@tonic-gate 	    (self->ul_curplease && self->ul_critical == 1)) {
930*0Sstevel@tonic-gate 		(void) __lwp_unpark(lwpid);
931*0Sstevel@tonic-gate 		lwpid = 0;
932*0Sstevel@tonic-gate 	}
933*0Sstevel@tonic-gate 	return (lwpid);
934*0Sstevel@tonic-gate }
935*0Sstevel@tonic-gate 
936*0Sstevel@tonic-gate /*
937*0Sstevel@tonic-gate  * Spin for a while, trying to grab the lock.  We know that we
938*0Sstevel@tonic-gate  * failed set_lock_byte(&mp->mutex_lockw) once before coming here.
939*0Sstevel@tonic-gate  * If this fails, return EBUSY and let the caller deal with it.
940*0Sstevel@tonic-gate  * If this succeeds, return 0 with mutex_owner set to curthread.
941*0Sstevel@tonic-gate  */
942*0Sstevel@tonic-gate int
943*0Sstevel@tonic-gate mutex_trylock_adaptive(mutex_t *mp)
944*0Sstevel@tonic-gate {
945*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
946*0Sstevel@tonic-gate 	ulwp_t *ulwp;
947*0Sstevel@tonic-gate 	volatile sc_shared_t *scp;
948*0Sstevel@tonic-gate 	volatile uint8_t *lockp;
949*0Sstevel@tonic-gate 	volatile uint64_t *ownerp;
950*0Sstevel@tonic-gate 	int count, max = self->ul_adaptive_spin;
951*0Sstevel@tonic-gate 
952*0Sstevel@tonic-gate 	ASSERT(!(mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)));
953*0Sstevel@tonic-gate 
954*0Sstevel@tonic-gate 	if (max == 0 || (mp->mutex_spinners >= self->ul_max_spinners))
955*0Sstevel@tonic-gate 		return (EBUSY);
956*0Sstevel@tonic-gate 
957*0Sstevel@tonic-gate 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
958*0Sstevel@tonic-gate 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
959*0Sstevel@tonic-gate 
960*0Sstevel@tonic-gate 	DTRACE_PROBE1(plockstat, mutex__spin, mp);
961*0Sstevel@tonic-gate 
962*0Sstevel@tonic-gate 	/*
963*0Sstevel@tonic-gate 	 * This spin loop is unfair to lwps that have already dropped into
964*0Sstevel@tonic-gate 	 * the kernel to sleep.  They will starve on a highly-contended mutex.
965*0Sstevel@tonic-gate 	 * This is just too bad.  The adaptive spin algorithm is intended
966*0Sstevel@tonic-gate 	 * to allow programs with highly-contended locks (that is, broken
967*0Sstevel@tonic-gate 	 * programs) to execute with reasonable speed despite their contention.
968*0Sstevel@tonic-gate 	 * Being fair would reduce the speed of such programs and well-written
969*0Sstevel@tonic-gate 	 * programs will not suffer in any case.
970*0Sstevel@tonic-gate 	 */
971*0Sstevel@tonic-gate 	enter_critical(self);		/* protects ul_schedctl */
972*0Sstevel@tonic-gate 	incr32(&mp->mutex_spinners);
973*0Sstevel@tonic-gate 	for (count = 0; count < max; count++) {
974*0Sstevel@tonic-gate 		if (*lockp == 0 && set_lock_byte(lockp) == 0) {
975*0Sstevel@tonic-gate 			*ownerp = (uintptr_t)self;
976*0Sstevel@tonic-gate 			decr32(&mp->mutex_spinners);
977*0Sstevel@tonic-gate 			exit_critical(self);
978*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__spun, 1, count);
979*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, count);
980*0Sstevel@tonic-gate 			return (0);
981*0Sstevel@tonic-gate 		}
982*0Sstevel@tonic-gate 		SMT_PAUSE();
983*0Sstevel@tonic-gate 		/*
984*0Sstevel@tonic-gate 		 * Stop spinning if the mutex owner is not running on
985*0Sstevel@tonic-gate 		 * a processor; it will not drop the lock any time soon
986*0Sstevel@tonic-gate 		 * and we would just be wasting time to keep spinning.
987*0Sstevel@tonic-gate 		 *
988*0Sstevel@tonic-gate 		 * Note that we are looking at another thread (ulwp_t)
989*0Sstevel@tonic-gate 		 * without ensuring that the other thread does not exit.
990*0Sstevel@tonic-gate 		 * The scheme relies on ulwp_t structures never being
991*0Sstevel@tonic-gate 		 * deallocated by the library (the library employs a free
992*0Sstevel@tonic-gate 		 * list of ulwp_t structs that are reused when new threads
993*0Sstevel@tonic-gate 		 * are created) and on schedctl shared memory never being
994*0Sstevel@tonic-gate 		 * deallocated once created via __schedctl().
995*0Sstevel@tonic-gate 		 *
996*0Sstevel@tonic-gate 		 * Thus, the worst that can happen when the spinning thread
997*0Sstevel@tonic-gate 		 * looks at the owner's schedctl data is that it is looking
998*0Sstevel@tonic-gate 		 * at some other thread's schedctl data.  This almost never
999*0Sstevel@tonic-gate 		 * happens and is benign when it does.
1000*0Sstevel@tonic-gate 		 */
1001*0Sstevel@tonic-gate 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1002*0Sstevel@tonic-gate 		    ((scp = ulwp->ul_schedctl) == NULL ||
1003*0Sstevel@tonic-gate 		    scp->sc_state != SC_ONPROC))
1004*0Sstevel@tonic-gate 			break;
1005*0Sstevel@tonic-gate 	}
1006*0Sstevel@tonic-gate 	decr32(&mp->mutex_spinners);
1007*0Sstevel@tonic-gate 	exit_critical(self);
1008*0Sstevel@tonic-gate 
1009*0Sstevel@tonic-gate 	DTRACE_PROBE2(plockstat, mutex__spun, 0, count);
1010*0Sstevel@tonic-gate 
1011*0Sstevel@tonic-gate 	return (EBUSY);
1012*0Sstevel@tonic-gate }
1013*0Sstevel@tonic-gate 
1014*0Sstevel@tonic-gate /*
1015*0Sstevel@tonic-gate  * Same as mutex_trylock_adaptive(), except specifically for queue locks.
1016*0Sstevel@tonic-gate  * The owner field is not set here; the caller (spin_lock_set()) sets it.
1017*0Sstevel@tonic-gate  */
1018*0Sstevel@tonic-gate int
1019*0Sstevel@tonic-gate mutex_queuelock_adaptive(mutex_t *mp)
1020*0Sstevel@tonic-gate {
1021*0Sstevel@tonic-gate 	ulwp_t *ulwp;
1022*0Sstevel@tonic-gate 	volatile sc_shared_t *scp;
1023*0Sstevel@tonic-gate 	volatile uint8_t *lockp;
1024*0Sstevel@tonic-gate 	volatile uint64_t *ownerp;
1025*0Sstevel@tonic-gate 	int count = curthread->ul_queue_spin;
1026*0Sstevel@tonic-gate 
1027*0Sstevel@tonic-gate 	ASSERT(mp->mutex_type == USYNC_THREAD);
1028*0Sstevel@tonic-gate 
1029*0Sstevel@tonic-gate 	if (count == 0)
1030*0Sstevel@tonic-gate 		return (EBUSY);
1031*0Sstevel@tonic-gate 
1032*0Sstevel@tonic-gate 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
1033*0Sstevel@tonic-gate 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
1034*0Sstevel@tonic-gate 	while (--count >= 0) {
1035*0Sstevel@tonic-gate 		if (*lockp == 0 && set_lock_byte(lockp) == 0)
1036*0Sstevel@tonic-gate 			return (0);
1037*0Sstevel@tonic-gate 		SMT_PAUSE();
1038*0Sstevel@tonic-gate 		if ((ulwp = (ulwp_t *)(uintptr_t)*ownerp) != NULL &&
1039*0Sstevel@tonic-gate 		    ((scp = ulwp->ul_schedctl) == NULL ||
1040*0Sstevel@tonic-gate 		    scp->sc_state != SC_ONPROC))
1041*0Sstevel@tonic-gate 			break;
1042*0Sstevel@tonic-gate 	}
1043*0Sstevel@tonic-gate 
1044*0Sstevel@tonic-gate 	return (EBUSY);
1045*0Sstevel@tonic-gate }
1046*0Sstevel@tonic-gate 
1047*0Sstevel@tonic-gate /*
1048*0Sstevel@tonic-gate  * Like mutex_trylock_adaptive(), but for process-shared mutexes.
1049*0Sstevel@tonic-gate  * Spin for a while, trying to grab the lock.  We know that we
1050*0Sstevel@tonic-gate  * failed set_lock_byte(&mp->mutex_lockw) once before coming here.
1051*0Sstevel@tonic-gate  * If this fails, return EBUSY and let the caller deal with it.
1052*0Sstevel@tonic-gate  * If this succeeds, return 0 with mutex_owner set to curthread
1053*0Sstevel@tonic-gate  * and mutex_ownerpid set to the current pid.
1054*0Sstevel@tonic-gate  */
1055*0Sstevel@tonic-gate int
1056*0Sstevel@tonic-gate mutex_trylock_process(mutex_t *mp)
1057*0Sstevel@tonic-gate {
1058*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1059*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1060*0Sstevel@tonic-gate 	int count;
1061*0Sstevel@tonic-gate 	volatile uint8_t *lockp;
1062*0Sstevel@tonic-gate 	volatile uint64_t *ownerp;
1063*0Sstevel@tonic-gate 	volatile int32_t *pidp;
1064*0Sstevel@tonic-gate 	pid_t pid, newpid;
1065*0Sstevel@tonic-gate 	uint64_t owner, newowner;
1066*0Sstevel@tonic-gate 
1067*0Sstevel@tonic-gate 	if ((count = ncpus) == 0)
1068*0Sstevel@tonic-gate 		count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1069*0Sstevel@tonic-gate 	count = (count > 1)? self->ul_adaptive_spin : 0;
1070*0Sstevel@tonic-gate 
1071*0Sstevel@tonic-gate 	ASSERT((mp->mutex_type & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) ==
1072*0Sstevel@tonic-gate 		USYNC_PROCESS);
1073*0Sstevel@tonic-gate 
1074*0Sstevel@tonic-gate 	if (count == 0)
1075*0Sstevel@tonic-gate 		return (EBUSY);
1076*0Sstevel@tonic-gate 
1077*0Sstevel@tonic-gate 	lockp = (volatile uint8_t *)&mp->mutex_lockw;
1078*0Sstevel@tonic-gate 	ownerp = (volatile uint64_t *)&mp->mutex_owner;
1079*0Sstevel@tonic-gate 	pidp = (volatile int32_t *)&mp->mutex_ownerpid;
1080*0Sstevel@tonic-gate 	owner = *ownerp;
1081*0Sstevel@tonic-gate 	pid = *pidp;
1082*0Sstevel@tonic-gate 	/*
1083*0Sstevel@tonic-gate 	 * This is a process-shared mutex.
1084*0Sstevel@tonic-gate 	 * We cannot know if the owner is running on a processor.
1085*0Sstevel@tonic-gate 	 * We just spin and hope that it is on a processor.
1086*0Sstevel@tonic-gate 	 */
1087*0Sstevel@tonic-gate 	while (--count >= 0) {
1088*0Sstevel@tonic-gate 		if (*lockp == 0) {
1089*0Sstevel@tonic-gate 			enter_critical(self);
1090*0Sstevel@tonic-gate 			if (set_lock_byte(lockp) == 0) {
1091*0Sstevel@tonic-gate 				*ownerp = (uintptr_t)self;
1092*0Sstevel@tonic-gate 				*pidp = udp->pid;
1093*0Sstevel@tonic-gate 				exit_critical(self);
1094*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1095*0Sstevel@tonic-gate 				    0, 0);
1096*0Sstevel@tonic-gate 				return (0);
1097*0Sstevel@tonic-gate 			}
1098*0Sstevel@tonic-gate 			exit_critical(self);
1099*0Sstevel@tonic-gate 		} else if ((newowner = *ownerp) == owner &&
1100*0Sstevel@tonic-gate 		    (newpid = *pidp) == pid) {
1101*0Sstevel@tonic-gate 			SMT_PAUSE();
1102*0Sstevel@tonic-gate 			continue;
1103*0Sstevel@tonic-gate 		}
1104*0Sstevel@tonic-gate 		/*
1105*0Sstevel@tonic-gate 		 * The owner of the lock changed; start the count over again.
1106*0Sstevel@tonic-gate 		 * This may be too aggressive; it needs testing.
1107*0Sstevel@tonic-gate 		 */
1108*0Sstevel@tonic-gate 		owner = newowner;
1109*0Sstevel@tonic-gate 		pid = newpid;
1110*0Sstevel@tonic-gate 		count = self->ul_adaptive_spin;
1111*0Sstevel@tonic-gate 	}
1112*0Sstevel@tonic-gate 
1113*0Sstevel@tonic-gate 	return (EBUSY);
1114*0Sstevel@tonic-gate }
1115*0Sstevel@tonic-gate 
1116*0Sstevel@tonic-gate /*
1117*0Sstevel@tonic-gate  * Mutex wakeup code for releasing a USYNC_THREAD mutex.
1118*0Sstevel@tonic-gate  * Returns the lwpid of the thread that was dequeued, if any.
1119*0Sstevel@tonic-gate  * The caller of mutex_wakeup() must call __lwp_unpark(lwpid)
1120*0Sstevel@tonic-gate  * to wake up the specified lwp.
1121*0Sstevel@tonic-gate  */
1122*0Sstevel@tonic-gate lwpid_t
1123*0Sstevel@tonic-gate mutex_wakeup(mutex_t *mp)
1124*0Sstevel@tonic-gate {
1125*0Sstevel@tonic-gate 	lwpid_t lwpid = 0;
1126*0Sstevel@tonic-gate 	queue_head_t *qp;
1127*0Sstevel@tonic-gate 	ulwp_t *ulwp;
1128*0Sstevel@tonic-gate 	int more;
1129*0Sstevel@tonic-gate 
1130*0Sstevel@tonic-gate 	/*
1131*0Sstevel@tonic-gate 	 * Dequeue a waiter from the sleep queue.  Don't touch the mutex
1132*0Sstevel@tonic-gate 	 * waiters bit if no one was found on the queue because the mutex
1133*0Sstevel@tonic-gate 	 * might have been deallocated or reallocated for another purpose.
1134*0Sstevel@tonic-gate 	 */
1135*0Sstevel@tonic-gate 	qp = queue_lock(mp, MX);
1136*0Sstevel@tonic-gate 	if ((ulwp = dequeue(qp, mp, &more)) != NULL) {
1137*0Sstevel@tonic-gate 		lwpid = ulwp->ul_lwpid;
1138*0Sstevel@tonic-gate 		mp->mutex_waiters = (more? 1 : 0);
1139*0Sstevel@tonic-gate 	}
1140*0Sstevel@tonic-gate 	queue_unlock(qp);
1141*0Sstevel@tonic-gate 	return (lwpid);
1142*0Sstevel@tonic-gate }
1143*0Sstevel@tonic-gate 
1144*0Sstevel@tonic-gate /*
1145*0Sstevel@tonic-gate  * Spin for a while, testing to see if the lock has been grabbed.
1146*0Sstevel@tonic-gate  * If this fails, call mutex_wakeup() to release a waiter.
1147*0Sstevel@tonic-gate  */
1148*0Sstevel@tonic-gate lwpid_t
1149*0Sstevel@tonic-gate mutex_unlock_queue(mutex_t *mp)
1150*0Sstevel@tonic-gate {
1151*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1152*0Sstevel@tonic-gate 	uint32_t *lockw = &mp->mutex_lockword;
1153*0Sstevel@tonic-gate 	lwpid_t lwpid;
1154*0Sstevel@tonic-gate 	volatile uint8_t *lockp;
1155*0Sstevel@tonic-gate 	volatile uint32_t *spinp;
1156*0Sstevel@tonic-gate 	int count;
1157*0Sstevel@tonic-gate 
1158*0Sstevel@tonic-gate 	/*
1159*0Sstevel@tonic-gate 	 * We use the swap primitive to clear the lock, but we must
1160*0Sstevel@tonic-gate 	 * atomically retain the waiters bit for the remainder of this
1161*0Sstevel@tonic-gate 	 * code to work.  We first check to see if the waiters bit is
1162*0Sstevel@tonic-gate 	 * set and if so clear the lock by swapping in a word containing
1163*0Sstevel@tonic-gate 	 * only the waiters bit.  This could produce a false positive test
1164*0Sstevel@tonic-gate 	 * for whether there are waiters that need to be waked up, but
1165*0Sstevel@tonic-gate 	 * this just causes an extra call to mutex_wakeup() to do nothing.
1166*0Sstevel@tonic-gate 	 * The opposite case is more delicate:  If there are no waiters,
1167*0Sstevel@tonic-gate 	 * we swap in a zero lock byte and a zero waiters bit.  The result
1168*0Sstevel@tonic-gate 	 * of the swap could indicate that there really was a waiter so in
1169*0Sstevel@tonic-gate 	 * this case we go directly to mutex_wakeup() without performing
1170*0Sstevel@tonic-gate 	 * any of the adaptive code because the waiter bit has been cleared
1171*0Sstevel@tonic-gate 	 * and the adaptive code is unreliable in this case.
1172*0Sstevel@tonic-gate 	 */
1173*0Sstevel@tonic-gate 	if (!(*lockw & WAITERMASK)) {	/* no waiter exists right now */
1174*0Sstevel@tonic-gate 		mp->mutex_owner = 0;
1175*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1176*0Sstevel@tonic-gate 		if (!(swap32(lockw, 0) & WAITERMASK))	/* still no waiters */
1177*0Sstevel@tonic-gate 			return (0);
1178*0Sstevel@tonic-gate 		no_preempt(self);	/* ensure a prompt wakeup */
1179*0Sstevel@tonic-gate 		lwpid = mutex_wakeup(mp);
1180*0Sstevel@tonic-gate 	} else {
1181*0Sstevel@tonic-gate 		no_preempt(self);	/* ensure a prompt wakeup */
1182*0Sstevel@tonic-gate 		lockp = (volatile uint8_t *)&mp->mutex_lockw;
1183*0Sstevel@tonic-gate 		spinp = (volatile uint32_t *)&mp->mutex_spinners;
1184*0Sstevel@tonic-gate 		mp->mutex_owner = 0;
1185*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1186*0Sstevel@tonic-gate 		(void) swap32(lockw, WAITER);	/* clear lock, retain waiter */
1187*0Sstevel@tonic-gate 
1188*0Sstevel@tonic-gate 		/*
1189*0Sstevel@tonic-gate 		 * We spin here fewer times than mutex_trylock_adaptive().
1190*0Sstevel@tonic-gate 		 * We are trying to balance two conflicting goals:
1191*0Sstevel@tonic-gate 		 * 1. Avoid waking up anyone if a spinning thread
1192*0Sstevel@tonic-gate 		 *    grabs the lock.
1193*0Sstevel@tonic-gate 		 * 2. Wake up a sleeping thread promptly to get on
1194*0Sstevel@tonic-gate 		 *    with useful work.
1195*0Sstevel@tonic-gate 		 * We don't spin at all if there is no acquiring spinner;
1196*0Sstevel@tonic-gate 		 * (mp->mutex_spinners is non-zero if there are spinners).
1197*0Sstevel@tonic-gate 		 */
1198*0Sstevel@tonic-gate 		for (count = self->ul_release_spin;
1199*0Sstevel@tonic-gate 		    *spinp && count > 0; count--) {
1200*0Sstevel@tonic-gate 			/*
1201*0Sstevel@tonic-gate 			 * There is a waiter that we will have to wake
1202*0Sstevel@tonic-gate 			 * up unless someone else grabs the lock while
1203*0Sstevel@tonic-gate 			 * we are busy spinning.  Like the spin loop in
1204*0Sstevel@tonic-gate 			 * mutex_trylock_adaptive(), this spin loop is
1205*0Sstevel@tonic-gate 			 * unfair to lwps that have already dropped into
1206*0Sstevel@tonic-gate 			 * the kernel to sleep.  They will starve on a
1207*0Sstevel@tonic-gate 			 * highly-contended mutex.  Too bad.
1208*0Sstevel@tonic-gate 			 */
1209*0Sstevel@tonic-gate 			if (*lockp != 0) {	/* somebody grabbed the lock */
1210*0Sstevel@tonic-gate 				preempt(self);
1211*0Sstevel@tonic-gate 				return (0);
1212*0Sstevel@tonic-gate 			}
1213*0Sstevel@tonic-gate 			SMT_PAUSE();
1214*0Sstevel@tonic-gate 		}
1215*0Sstevel@tonic-gate 
1216*0Sstevel@tonic-gate 		/*
1217*0Sstevel@tonic-gate 		 * No one grabbed the lock.
1218*0Sstevel@tonic-gate 		 * Wake up some lwp that is waiting for it.
1219*0Sstevel@tonic-gate 		 */
1220*0Sstevel@tonic-gate 		mp->mutex_waiters = 0;
1221*0Sstevel@tonic-gate 		lwpid = mutex_wakeup(mp);
1222*0Sstevel@tonic-gate 	}
1223*0Sstevel@tonic-gate 
1224*0Sstevel@tonic-gate 	if (lwpid == 0)
1225*0Sstevel@tonic-gate 		preempt(self);
1226*0Sstevel@tonic-gate 	return (lwpid);
1227*0Sstevel@tonic-gate }
1228*0Sstevel@tonic-gate 
1229*0Sstevel@tonic-gate /*
1230*0Sstevel@tonic-gate  * Like mutex_unlock_queue(), but for process-shared mutexes.
1231*0Sstevel@tonic-gate  * We tested the waiters field before calling here and it was non-zero.
1232*0Sstevel@tonic-gate  */
1233*0Sstevel@tonic-gate void
1234*0Sstevel@tonic-gate mutex_unlock_process(mutex_t *mp)
1235*0Sstevel@tonic-gate {
1236*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1237*0Sstevel@tonic-gate 	int count;
1238*0Sstevel@tonic-gate 	volatile uint8_t *lockp;
1239*0Sstevel@tonic-gate 
1240*0Sstevel@tonic-gate 	/*
1241*0Sstevel@tonic-gate 	 * See the comments in mutex_unlock_queue(), above.
1242*0Sstevel@tonic-gate 	 */
1243*0Sstevel@tonic-gate 	if ((count = ncpus) == 0)
1244*0Sstevel@tonic-gate 		count = ncpus = (int)_sysconf(_SC_NPROCESSORS_ONLN);
1245*0Sstevel@tonic-gate 	count = (count > 1)? self->ul_release_spin : 0;
1246*0Sstevel@tonic-gate 	no_preempt(self);
1247*0Sstevel@tonic-gate 	mp->mutex_owner = 0;
1248*0Sstevel@tonic-gate 	mp->mutex_ownerpid = 0;
1249*0Sstevel@tonic-gate 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1250*0Sstevel@tonic-gate 	if (count == 0) {
1251*0Sstevel@tonic-gate 		/* clear lock, test waiter */
1252*0Sstevel@tonic-gate 		if (!(swap32(&mp->mutex_lockword, 0) & WAITERMASK)) {
1253*0Sstevel@tonic-gate 			/* no waiters now */
1254*0Sstevel@tonic-gate 			preempt(self);
1255*0Sstevel@tonic-gate 			return;
1256*0Sstevel@tonic-gate 		}
1257*0Sstevel@tonic-gate 	} else {
1258*0Sstevel@tonic-gate 		/* clear lock, retain waiter */
1259*0Sstevel@tonic-gate 		(void) swap32(&mp->mutex_lockword, WAITER);
1260*0Sstevel@tonic-gate 		lockp = (volatile uint8_t *)&mp->mutex_lockw;
1261*0Sstevel@tonic-gate 		while (--count >= 0) {
1262*0Sstevel@tonic-gate 			if (*lockp != 0) {
1263*0Sstevel@tonic-gate 				/* somebody grabbed the lock */
1264*0Sstevel@tonic-gate 				preempt(self);
1265*0Sstevel@tonic-gate 				return;
1266*0Sstevel@tonic-gate 			}
1267*0Sstevel@tonic-gate 			SMT_PAUSE();
1268*0Sstevel@tonic-gate 		}
1269*0Sstevel@tonic-gate 		/*
1270*0Sstevel@tonic-gate 		 * We must clear the waiters field before going
1271*0Sstevel@tonic-gate 		 * to the kernel, else it could remain set forever.
1272*0Sstevel@tonic-gate 		 */
1273*0Sstevel@tonic-gate 		mp->mutex_waiters = 0;
1274*0Sstevel@tonic-gate 	}
1275*0Sstevel@tonic-gate 	(void) ___lwp_mutex_wakeup(mp);
1276*0Sstevel@tonic-gate 	preempt(self);
1277*0Sstevel@tonic-gate }
1278*0Sstevel@tonic-gate 
1279*0Sstevel@tonic-gate /*
1280*0Sstevel@tonic-gate  * Return the real priority of a thread.
1281*0Sstevel@tonic-gate  */
1282*0Sstevel@tonic-gate int
1283*0Sstevel@tonic-gate real_priority(ulwp_t *ulwp)
1284*0Sstevel@tonic-gate {
1285*0Sstevel@tonic-gate 	if (ulwp->ul_epri == 0)
1286*0Sstevel@tonic-gate 		return (ulwp->ul_mappedpri? ulwp->ul_mappedpri : ulwp->ul_pri);
1287*0Sstevel@tonic-gate 	return (ulwp->ul_emappedpri? ulwp->ul_emappedpri : ulwp->ul_epri);
1288*0Sstevel@tonic-gate }
1289*0Sstevel@tonic-gate 
1290*0Sstevel@tonic-gate void
1291*0Sstevel@tonic-gate stall(void)
1292*0Sstevel@tonic-gate {
1293*0Sstevel@tonic-gate 	for (;;)
1294*0Sstevel@tonic-gate 		(void) mutex_lock_kernel(&stall_mutex, NULL, NULL);
1295*0Sstevel@tonic-gate }
1296*0Sstevel@tonic-gate 
1297*0Sstevel@tonic-gate /*
1298*0Sstevel@tonic-gate  * Acquire a USYNC_THREAD mutex via user-level sleep queues.
1299*0Sstevel@tonic-gate  * We failed set_lock_byte(&mp->mutex_lockw) before coming here.
1300*0Sstevel@tonic-gate  * Returns with mutex_owner set correctly.
1301*0Sstevel@tonic-gate  */
1302*0Sstevel@tonic-gate int
1303*0Sstevel@tonic-gate mutex_lock_queue(ulwp_t *self, tdb_mutex_stats_t *msp, mutex_t *mp,
1304*0Sstevel@tonic-gate 	timespec_t *tsp)
1305*0Sstevel@tonic-gate {
1306*0Sstevel@tonic-gate 	uberdata_t *udp = curthread->ul_uberdata;
1307*0Sstevel@tonic-gate 	queue_head_t *qp;
1308*0Sstevel@tonic-gate 	hrtime_t begin_sleep;
1309*0Sstevel@tonic-gate 	int error = 0;
1310*0Sstevel@tonic-gate 
1311*0Sstevel@tonic-gate 	self->ul_sp = stkptr();
1312*0Sstevel@tonic-gate 	if (__td_event_report(self, TD_SLEEP, udp)) {
1313*0Sstevel@tonic-gate 		self->ul_wchan = mp;
1314*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventnum = TD_SLEEP;
1315*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventdata = mp;
1316*0Sstevel@tonic-gate 		tdb_event(TD_SLEEP, udp);
1317*0Sstevel@tonic-gate 	}
1318*0Sstevel@tonic-gate 	if (msp) {
1319*0Sstevel@tonic-gate 		tdb_incr(msp->mutex_sleep);
1320*0Sstevel@tonic-gate 		begin_sleep = gethrtime();
1321*0Sstevel@tonic-gate 	}
1322*0Sstevel@tonic-gate 
1323*0Sstevel@tonic-gate 	DTRACE_PROBE1(plockstat, mutex__block, mp);
1324*0Sstevel@tonic-gate 
1325*0Sstevel@tonic-gate 	/*
1326*0Sstevel@tonic-gate 	 * Put ourself on the sleep queue, and while we are
1327*0Sstevel@tonic-gate 	 * unable to grab the lock, go park in the kernel.
1328*0Sstevel@tonic-gate 	 * Take ourself off the sleep queue after we acquire the lock.
1329*0Sstevel@tonic-gate 	 * The waiter bit can be set/cleared only while holding the queue lock.
1330*0Sstevel@tonic-gate 	 */
1331*0Sstevel@tonic-gate 	qp = queue_lock(mp, MX);
1332*0Sstevel@tonic-gate 	enqueue(qp, self, mp, MX);
1333*0Sstevel@tonic-gate 	mp->mutex_waiters = 1;
1334*0Sstevel@tonic-gate 	for (;;) {
1335*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1336*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1337*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1338*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1339*0Sstevel@tonic-gate 			mp->mutex_waiters = dequeue_self(qp, mp);
1340*0Sstevel@tonic-gate 			break;
1341*0Sstevel@tonic-gate 		}
1342*0Sstevel@tonic-gate 		set_parking_flag(self, 1);
1343*0Sstevel@tonic-gate 		queue_unlock(qp);
1344*0Sstevel@tonic-gate 		/*
1345*0Sstevel@tonic-gate 		 * __lwp_park() will return the residual time in tsp
1346*0Sstevel@tonic-gate 		 * if we are unparked before the timeout expires.
1347*0Sstevel@tonic-gate 		 */
1348*0Sstevel@tonic-gate 		if ((error = __lwp_park(tsp, 0)) == EINTR)
1349*0Sstevel@tonic-gate 			error = 0;
1350*0Sstevel@tonic-gate 		set_parking_flag(self, 0);
1351*0Sstevel@tonic-gate 		/*
1352*0Sstevel@tonic-gate 		 * We could have taken a signal or suspended ourself.
1353*0Sstevel@tonic-gate 		 * If we did, then we removed ourself from the queue.
1354*0Sstevel@tonic-gate 		 * Someone else may have removed us from the queue
1355*0Sstevel@tonic-gate 		 * as a consequence of mutex_unlock().  We may have
1356*0Sstevel@tonic-gate 		 * gotten a timeout from __lwp_park().  Or we may still
1357*0Sstevel@tonic-gate 		 * be on the queue and this is just a spurious wakeup.
1358*0Sstevel@tonic-gate 		 */
1359*0Sstevel@tonic-gate 		qp = queue_lock(mp, MX);
1360*0Sstevel@tonic-gate 		if (self->ul_sleepq == NULL) {
1361*0Sstevel@tonic-gate 			if (error) {
1362*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1363*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__error, mp,
1364*0Sstevel@tonic-gate 				    error);
1365*0Sstevel@tonic-gate 				break;
1366*0Sstevel@tonic-gate 			}
1367*0Sstevel@tonic-gate 			if (set_lock_byte(&mp->mutex_lockw) == 0) {
1368*0Sstevel@tonic-gate 				mp->mutex_owner = (uintptr_t)self;
1369*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__blocked, mp, 1);
1370*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1371*0Sstevel@tonic-gate 				    0, 0);
1372*0Sstevel@tonic-gate 				break;
1373*0Sstevel@tonic-gate 			}
1374*0Sstevel@tonic-gate 			enqueue(qp, self, mp, MX);
1375*0Sstevel@tonic-gate 			mp->mutex_waiters = 1;
1376*0Sstevel@tonic-gate 		}
1377*0Sstevel@tonic-gate 		ASSERT(self->ul_sleepq == qp &&
1378*0Sstevel@tonic-gate 		    self->ul_qtype == MX &&
1379*0Sstevel@tonic-gate 		    self->ul_wchan == mp);
1380*0Sstevel@tonic-gate 		if (error) {
1381*0Sstevel@tonic-gate 			mp->mutex_waiters = dequeue_self(qp, mp);
1382*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__blocked, mp, 0);
1383*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, error);
1384*0Sstevel@tonic-gate 			break;
1385*0Sstevel@tonic-gate 		}
1386*0Sstevel@tonic-gate 	}
1387*0Sstevel@tonic-gate 
1388*0Sstevel@tonic-gate 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
1389*0Sstevel@tonic-gate 	    self->ul_wchan == NULL);
1390*0Sstevel@tonic-gate 	self->ul_sp = 0;
1391*0Sstevel@tonic-gate 
1392*0Sstevel@tonic-gate 	queue_unlock(qp);
1393*0Sstevel@tonic-gate 	if (msp)
1394*0Sstevel@tonic-gate 		msp->mutex_sleep_time += gethrtime() - begin_sleep;
1395*0Sstevel@tonic-gate 
1396*0Sstevel@tonic-gate 	ASSERT(error == 0 || error == EINVAL || error == ETIME);
1397*0Sstevel@tonic-gate 	return (error);
1398*0Sstevel@tonic-gate }
1399*0Sstevel@tonic-gate 
1400*0Sstevel@tonic-gate /*
1401*0Sstevel@tonic-gate  * Returns with mutex_owner set correctly.
1402*0Sstevel@tonic-gate  */
1403*0Sstevel@tonic-gate int
1404*0Sstevel@tonic-gate mutex_lock_internal(mutex_t *mp, timespec_t *tsp, int try)
1405*0Sstevel@tonic-gate {
1406*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1407*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1408*0Sstevel@tonic-gate 	int mtype = mp->mutex_type;
1409*0Sstevel@tonic-gate 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
1410*0Sstevel@tonic-gate 	int error = 0;
1411*0Sstevel@tonic-gate 
1412*0Sstevel@tonic-gate 	ASSERT(try == MUTEX_TRY || try == MUTEX_LOCK);
1413*0Sstevel@tonic-gate 
1414*0Sstevel@tonic-gate 	if (!self->ul_schedctl_called)
1415*0Sstevel@tonic-gate 		(void) setup_schedctl();
1416*0Sstevel@tonic-gate 
1417*0Sstevel@tonic-gate 	if (msp && try == MUTEX_TRY)
1418*0Sstevel@tonic-gate 		tdb_incr(msp->mutex_try);
1419*0Sstevel@tonic-gate 
1420*0Sstevel@tonic-gate 	if ((mtype & (LOCK_RECURSIVE|LOCK_ERRORCHECK)) && mutex_is_held(mp)) {
1421*0Sstevel@tonic-gate 		if (mtype & LOCK_RECURSIVE) {
1422*0Sstevel@tonic-gate 			if (mp->mutex_rcount == RECURSION_MAX) {
1423*0Sstevel@tonic-gate 				error = EAGAIN;
1424*0Sstevel@tonic-gate 			} else {
1425*0Sstevel@tonic-gate 				mp->mutex_rcount++;
1426*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1427*0Sstevel@tonic-gate 				    1, 0);
1428*0Sstevel@tonic-gate 				return (0);
1429*0Sstevel@tonic-gate 			}
1430*0Sstevel@tonic-gate 		} else if (try == MUTEX_TRY) {
1431*0Sstevel@tonic-gate 			return (EBUSY);
1432*0Sstevel@tonic-gate 		} else {
1433*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1434*0Sstevel@tonic-gate 			return (EDEADLK);
1435*0Sstevel@tonic-gate 		}
1436*0Sstevel@tonic-gate 	}
1437*0Sstevel@tonic-gate 
1438*0Sstevel@tonic-gate 	if (self->ul_error_detection && try == MUTEX_LOCK &&
1439*0Sstevel@tonic-gate 	    tsp == NULL && mutex_is_held(mp))
1440*0Sstevel@tonic-gate 		lock_error(mp, "mutex_lock", NULL, NULL);
1441*0Sstevel@tonic-gate 
1442*0Sstevel@tonic-gate 	if (mtype &
1443*0Sstevel@tonic-gate 	    (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) {
1444*0Sstevel@tonic-gate 		uint8_t ceil;
1445*0Sstevel@tonic-gate 		int myprio;
1446*0Sstevel@tonic-gate 
1447*0Sstevel@tonic-gate 		if (mtype & PTHREAD_PRIO_PROTECT) {
1448*0Sstevel@tonic-gate 			ceil = mp->mutex_ceiling;
1449*0Sstevel@tonic-gate 			ASSERT(_validate_rt_prio(SCHED_FIFO, ceil) == 0);
1450*0Sstevel@tonic-gate 			myprio = real_priority(self);
1451*0Sstevel@tonic-gate 			if (myprio > ceil) {
1452*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__error, mp,
1453*0Sstevel@tonic-gate 				    EINVAL);
1454*0Sstevel@tonic-gate 				return (EINVAL);
1455*0Sstevel@tonic-gate 			}
1456*0Sstevel@tonic-gate 			if ((error = _ceil_mylist_add(mp)) != 0) {
1457*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__error, mp,
1458*0Sstevel@tonic-gate 				    error);
1459*0Sstevel@tonic-gate 				return (error);
1460*0Sstevel@tonic-gate 			}
1461*0Sstevel@tonic-gate 			if (myprio < ceil)
1462*0Sstevel@tonic-gate 				_ceil_prio_inherit(ceil);
1463*0Sstevel@tonic-gate 		}
1464*0Sstevel@tonic-gate 
1465*0Sstevel@tonic-gate 		if (mtype & PTHREAD_PRIO_INHERIT) {
1466*0Sstevel@tonic-gate 			/* go straight to the kernel */
1467*0Sstevel@tonic-gate 			if (try == MUTEX_TRY)
1468*0Sstevel@tonic-gate 				error = mutex_trylock_kernel(mp);
1469*0Sstevel@tonic-gate 			else	/* MUTEX_LOCK */
1470*0Sstevel@tonic-gate 				error = mutex_lock_kernel(mp, tsp, msp);
1471*0Sstevel@tonic-gate 			/*
1472*0Sstevel@tonic-gate 			 * The kernel never sets or clears the lock byte
1473*0Sstevel@tonic-gate 			 * for PTHREAD_PRIO_INHERIT mutexes.
1474*0Sstevel@tonic-gate 			 * Set it here for debugging consistency.
1475*0Sstevel@tonic-gate 			 */
1476*0Sstevel@tonic-gate 			switch (error) {
1477*0Sstevel@tonic-gate 			case 0:
1478*0Sstevel@tonic-gate 			case EOWNERDEAD:
1479*0Sstevel@tonic-gate 				mp->mutex_lockw = LOCKSET;
1480*0Sstevel@tonic-gate 				break;
1481*0Sstevel@tonic-gate 			}
1482*0Sstevel@tonic-gate 		} else if (mtype & USYNC_PROCESS_ROBUST) {
1483*0Sstevel@tonic-gate 			/* go straight to the kernel */
1484*0Sstevel@tonic-gate 			if (try == MUTEX_TRY)
1485*0Sstevel@tonic-gate 				error = mutex_trylock_kernel(mp);
1486*0Sstevel@tonic-gate 			else	/* MUTEX_LOCK */
1487*0Sstevel@tonic-gate 				error = mutex_lock_kernel(mp, tsp, msp);
1488*0Sstevel@tonic-gate 		} else {	/* PTHREAD_PRIO_PROTECT */
1489*0Sstevel@tonic-gate 			/*
1490*0Sstevel@tonic-gate 			 * Try once at user level before going to the kernel.
1491*0Sstevel@tonic-gate 			 * If this is a process shared mutex then protect
1492*0Sstevel@tonic-gate 			 * against forkall() while setting mp->mutex_ownerpid.
1493*0Sstevel@tonic-gate 			 */
1494*0Sstevel@tonic-gate 			if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)) {
1495*0Sstevel@tonic-gate 				enter_critical(self);
1496*0Sstevel@tonic-gate 				if (set_lock_byte(&mp->mutex_lockw) == 0) {
1497*0Sstevel@tonic-gate 					mp->mutex_owner = (uintptr_t)self;
1498*0Sstevel@tonic-gate 					mp->mutex_ownerpid = udp->pid;
1499*0Sstevel@tonic-gate 					exit_critical(self);
1500*0Sstevel@tonic-gate 					DTRACE_PROBE3(plockstat,
1501*0Sstevel@tonic-gate 					    mutex__acquire, mp, 0, 0);
1502*0Sstevel@tonic-gate 				} else {
1503*0Sstevel@tonic-gate 					exit_critical(self);
1504*0Sstevel@tonic-gate 					error = EBUSY;
1505*0Sstevel@tonic-gate 				}
1506*0Sstevel@tonic-gate 			} else {
1507*0Sstevel@tonic-gate 				if (set_lock_byte(&mp->mutex_lockw) == 0) {
1508*0Sstevel@tonic-gate 					mp->mutex_owner = (uintptr_t)self;
1509*0Sstevel@tonic-gate 					DTRACE_PROBE3(plockstat,
1510*0Sstevel@tonic-gate 					    mutex__acquire, mp, 0, 0);
1511*0Sstevel@tonic-gate 				} else {
1512*0Sstevel@tonic-gate 					error = EBUSY;
1513*0Sstevel@tonic-gate 				}
1514*0Sstevel@tonic-gate 			}
1515*0Sstevel@tonic-gate 			if (error && try == MUTEX_LOCK)
1516*0Sstevel@tonic-gate 				error = mutex_lock_kernel(mp, tsp, msp);
1517*0Sstevel@tonic-gate 		}
1518*0Sstevel@tonic-gate 
1519*0Sstevel@tonic-gate 		if (error) {
1520*0Sstevel@tonic-gate 			if (mtype & PTHREAD_PRIO_INHERIT) {
1521*0Sstevel@tonic-gate 				switch (error) {
1522*0Sstevel@tonic-gate 				case EOWNERDEAD:
1523*0Sstevel@tonic-gate 				case ENOTRECOVERABLE:
1524*0Sstevel@tonic-gate 					if (mtype & PTHREAD_MUTEX_ROBUST_NP)
1525*0Sstevel@tonic-gate 						break;
1526*0Sstevel@tonic-gate 					if (error == EOWNERDEAD) {
1527*0Sstevel@tonic-gate 						/*
1528*0Sstevel@tonic-gate 						 * We own the mutex; unlock it.
1529*0Sstevel@tonic-gate 						 * It becomes ENOTRECOVERABLE.
1530*0Sstevel@tonic-gate 						 * All waiters are waked up.
1531*0Sstevel@tonic-gate 						 */
1532*0Sstevel@tonic-gate 						mp->mutex_owner = 0;
1533*0Sstevel@tonic-gate 						mp->mutex_ownerpid = 0;
1534*0Sstevel@tonic-gate 						DTRACE_PROBE2(plockstat,
1535*0Sstevel@tonic-gate 						    mutex__release, mp, 0);
1536*0Sstevel@tonic-gate 						mp->mutex_lockw = LOCKCLEAR;
1537*0Sstevel@tonic-gate 						(void) ___lwp_mutex_unlock(mp);
1538*0Sstevel@tonic-gate 					}
1539*0Sstevel@tonic-gate 					/* FALLTHROUGH */
1540*0Sstevel@tonic-gate 				case EDEADLK:
1541*0Sstevel@tonic-gate 					if (try == MUTEX_LOCK)
1542*0Sstevel@tonic-gate 						stall();
1543*0Sstevel@tonic-gate 					error = EBUSY;
1544*0Sstevel@tonic-gate 					break;
1545*0Sstevel@tonic-gate 				}
1546*0Sstevel@tonic-gate 			}
1547*0Sstevel@tonic-gate 			if ((mtype & PTHREAD_PRIO_PROTECT) &&
1548*0Sstevel@tonic-gate 			    error != EOWNERDEAD) {
1549*0Sstevel@tonic-gate 				(void) _ceil_mylist_del(mp);
1550*0Sstevel@tonic-gate 				if (myprio < ceil)
1551*0Sstevel@tonic-gate 					_ceil_prio_waive();
1552*0Sstevel@tonic-gate 			}
1553*0Sstevel@tonic-gate 		}
1554*0Sstevel@tonic-gate 	} else if (mtype & USYNC_PROCESS) {
1555*0Sstevel@tonic-gate 		/*
1556*0Sstevel@tonic-gate 		 * This is a process shared mutex.  Protect against
1557*0Sstevel@tonic-gate 		 * forkall() while setting mp->mutex_ownerpid.
1558*0Sstevel@tonic-gate 		 */
1559*0Sstevel@tonic-gate 		enter_critical(self);
1560*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1561*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1562*0Sstevel@tonic-gate 			mp->mutex_ownerpid = udp->pid;
1563*0Sstevel@tonic-gate 			exit_critical(self);
1564*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1565*0Sstevel@tonic-gate 		} else {
1566*0Sstevel@tonic-gate 			/* try a little harder */
1567*0Sstevel@tonic-gate 			exit_critical(self);
1568*0Sstevel@tonic-gate 			error = mutex_trylock_process(mp);
1569*0Sstevel@tonic-gate 		}
1570*0Sstevel@tonic-gate 		if (error && try == MUTEX_LOCK)
1571*0Sstevel@tonic-gate 			error = mutex_lock_kernel(mp, tsp, msp);
1572*0Sstevel@tonic-gate 	} else  {	/* USYNC_THREAD */
1573*0Sstevel@tonic-gate 		/* try once */
1574*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1575*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1576*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1577*0Sstevel@tonic-gate 		} else {
1578*0Sstevel@tonic-gate 			/* try a little harder if we don't own the mutex */
1579*0Sstevel@tonic-gate 			error = EBUSY;
1580*0Sstevel@tonic-gate 			if (MUTEX_OWNER(mp) != self)
1581*0Sstevel@tonic-gate 				error = mutex_trylock_adaptive(mp);
1582*0Sstevel@tonic-gate 			if (error && try == MUTEX_LOCK)		/* go park */
1583*0Sstevel@tonic-gate 				error = mutex_lock_queue(self, msp, mp, tsp);
1584*0Sstevel@tonic-gate 		}
1585*0Sstevel@tonic-gate 	}
1586*0Sstevel@tonic-gate 
1587*0Sstevel@tonic-gate 	switch (error) {
1588*0Sstevel@tonic-gate 	case EOWNERDEAD:
1589*0Sstevel@tonic-gate 	case ELOCKUNMAPPED:
1590*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
1591*0Sstevel@tonic-gate 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1592*0Sstevel@tonic-gate 		/* FALLTHROUGH */
1593*0Sstevel@tonic-gate 	case 0:
1594*0Sstevel@tonic-gate 		if (msp)
1595*0Sstevel@tonic-gate 			record_begin_hold(msp);
1596*0Sstevel@tonic-gate 		break;
1597*0Sstevel@tonic-gate 	default:
1598*0Sstevel@tonic-gate 		if (try == MUTEX_TRY) {
1599*0Sstevel@tonic-gate 			if (msp)
1600*0Sstevel@tonic-gate 				tdb_incr(msp->mutex_try_fail);
1601*0Sstevel@tonic-gate 			if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1602*0Sstevel@tonic-gate 				self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1603*0Sstevel@tonic-gate 				tdb_event(TD_LOCK_TRY, udp);
1604*0Sstevel@tonic-gate 			}
1605*0Sstevel@tonic-gate 		}
1606*0Sstevel@tonic-gate 		break;
1607*0Sstevel@tonic-gate 	}
1608*0Sstevel@tonic-gate 
1609*0Sstevel@tonic-gate 	return (error);
1610*0Sstevel@tonic-gate }
1611*0Sstevel@tonic-gate 
1612*0Sstevel@tonic-gate int
1613*0Sstevel@tonic-gate fast_process_lock(mutex_t *mp, timespec_t *tsp, int mtype, int try)
1614*0Sstevel@tonic-gate {
1615*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1616*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1617*0Sstevel@tonic-gate 
1618*0Sstevel@tonic-gate 	/*
1619*0Sstevel@tonic-gate 	 * We know that USYNC_PROCESS is set in mtype and that
1620*0Sstevel@tonic-gate 	 * zero, one, or both of the flags LOCK_RECURSIVE and
1621*0Sstevel@tonic-gate 	 * LOCK_ERRORCHECK are set, and that no other flags are set.
1622*0Sstevel@tonic-gate 	 */
1623*0Sstevel@tonic-gate 	enter_critical(self);
1624*0Sstevel@tonic-gate 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
1625*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
1626*0Sstevel@tonic-gate 		mp->mutex_ownerpid = udp->pid;
1627*0Sstevel@tonic-gate 		exit_critical(self);
1628*0Sstevel@tonic-gate 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1629*0Sstevel@tonic-gate 		return (0);
1630*0Sstevel@tonic-gate 	}
1631*0Sstevel@tonic-gate 	exit_critical(self);
1632*0Sstevel@tonic-gate 
1633*0Sstevel@tonic-gate 	if ((mtype & ~USYNC_PROCESS) && shared_mutex_held(mp)) {
1634*0Sstevel@tonic-gate 		if (mtype & LOCK_RECURSIVE) {
1635*0Sstevel@tonic-gate 			if (mp->mutex_rcount == RECURSION_MAX)
1636*0Sstevel@tonic-gate 				return (EAGAIN);
1637*0Sstevel@tonic-gate 			mp->mutex_rcount++;
1638*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 1, 0);
1639*0Sstevel@tonic-gate 			return (0);
1640*0Sstevel@tonic-gate 		}
1641*0Sstevel@tonic-gate 		if (try == MUTEX_LOCK) {
1642*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1643*0Sstevel@tonic-gate 			return (EDEADLK);
1644*0Sstevel@tonic-gate 		}
1645*0Sstevel@tonic-gate 		return (EBUSY);
1646*0Sstevel@tonic-gate 	}
1647*0Sstevel@tonic-gate 
1648*0Sstevel@tonic-gate 	/* try a little harder if we don't own the mutex */
1649*0Sstevel@tonic-gate 	if (!shared_mutex_held(mp) && mutex_trylock_process(mp) == 0)
1650*0Sstevel@tonic-gate 		return (0);
1651*0Sstevel@tonic-gate 
1652*0Sstevel@tonic-gate 	if (try == MUTEX_LOCK)
1653*0Sstevel@tonic-gate 		return (mutex_lock_kernel(mp, tsp, NULL));
1654*0Sstevel@tonic-gate 
1655*0Sstevel@tonic-gate 	if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1656*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1657*0Sstevel@tonic-gate 		tdb_event(TD_LOCK_TRY, udp);
1658*0Sstevel@tonic-gate 	}
1659*0Sstevel@tonic-gate 	return (EBUSY);
1660*0Sstevel@tonic-gate }
1661*0Sstevel@tonic-gate 
1662*0Sstevel@tonic-gate static int
1663*0Sstevel@tonic-gate slow_lock(ulwp_t *self, mutex_t *mp, timespec_t *tsp)
1664*0Sstevel@tonic-gate {
1665*0Sstevel@tonic-gate 	int error = 0;
1666*0Sstevel@tonic-gate 
1667*0Sstevel@tonic-gate 	if (MUTEX_OWNER(mp) == self || mutex_trylock_adaptive(mp) != 0)
1668*0Sstevel@tonic-gate 		error = mutex_lock_queue(self, NULL, mp, tsp);
1669*0Sstevel@tonic-gate 	return (error);
1670*0Sstevel@tonic-gate }
1671*0Sstevel@tonic-gate 
1672*0Sstevel@tonic-gate int
1673*0Sstevel@tonic-gate mutex_lock_impl(mutex_t *mp, timespec_t *tsp)
1674*0Sstevel@tonic-gate {
1675*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1676*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1677*0Sstevel@tonic-gate 	uberflags_t *gflags;
1678*0Sstevel@tonic-gate 	int mtype;
1679*0Sstevel@tonic-gate 
1680*0Sstevel@tonic-gate 	/*
1681*0Sstevel@tonic-gate 	 * Optimize the case of USYNC_THREAD, including
1682*0Sstevel@tonic-gate 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
1683*0Sstevel@tonic-gate 	 * no error detection, no lock statistics,
1684*0Sstevel@tonic-gate 	 * and the process has only a single thread.
1685*0Sstevel@tonic-gate 	 * (Most likely a traditional single-threaded application.)
1686*0Sstevel@tonic-gate 	 */
1687*0Sstevel@tonic-gate 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
1688*0Sstevel@tonic-gate 	    udp->uberflags.uf_all) == 0) {
1689*0Sstevel@tonic-gate 		/*
1690*0Sstevel@tonic-gate 		 * Only one thread exists so we don't need an atomic operation.
1691*0Sstevel@tonic-gate 		 */
1692*0Sstevel@tonic-gate 		if (mp->mutex_lockw == 0) {
1693*0Sstevel@tonic-gate 			mp->mutex_lockw = LOCKSET;
1694*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1695*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1696*0Sstevel@tonic-gate 			return (0);
1697*0Sstevel@tonic-gate 		}
1698*0Sstevel@tonic-gate 		if (mtype && MUTEX_OWNER(mp) == self) {
1699*0Sstevel@tonic-gate 			/*
1700*0Sstevel@tonic-gate 			 * LOCK_RECURSIVE, LOCK_ERRORCHECK, or both.
1701*0Sstevel@tonic-gate 			 */
1702*0Sstevel@tonic-gate 			if (mtype & LOCK_RECURSIVE) {
1703*0Sstevel@tonic-gate 				if (mp->mutex_rcount == RECURSION_MAX)
1704*0Sstevel@tonic-gate 					return (EAGAIN);
1705*0Sstevel@tonic-gate 				mp->mutex_rcount++;
1706*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1707*0Sstevel@tonic-gate 				    1, 0);
1708*0Sstevel@tonic-gate 				return (0);
1709*0Sstevel@tonic-gate 			}
1710*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1711*0Sstevel@tonic-gate 			return (EDEADLK);	/* LOCK_ERRORCHECK */
1712*0Sstevel@tonic-gate 		}
1713*0Sstevel@tonic-gate 		/*
1714*0Sstevel@tonic-gate 		 * We have reached a deadlock, probably because the
1715*0Sstevel@tonic-gate 		 * process is executing non-async-signal-safe code in
1716*0Sstevel@tonic-gate 		 * a signal handler and is attempting to acquire a lock
1717*0Sstevel@tonic-gate 		 * that it already owns.  This is not surprising, given
1718*0Sstevel@tonic-gate 		 * bad programming practices over the years that has
1719*0Sstevel@tonic-gate 		 * resulted in applications calling printf() and such
1720*0Sstevel@tonic-gate 		 * in their signal handlers.  Unless the user has told
1721*0Sstevel@tonic-gate 		 * us that the signal handlers are safe by setting:
1722*0Sstevel@tonic-gate 		 *	export _THREAD_ASYNC_SAFE=1
1723*0Sstevel@tonic-gate 		 * we return EDEADLK rather than actually deadlocking.
1724*0Sstevel@tonic-gate 		 */
1725*0Sstevel@tonic-gate 		if (tsp == NULL &&
1726*0Sstevel@tonic-gate 		    MUTEX_OWNER(mp) == self && !self->ul_async_safe) {
1727*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1728*0Sstevel@tonic-gate 			return (EDEADLK);
1729*0Sstevel@tonic-gate 		}
1730*0Sstevel@tonic-gate 	}
1731*0Sstevel@tonic-gate 
1732*0Sstevel@tonic-gate 	/*
1733*0Sstevel@tonic-gate 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
1734*0Sstevel@tonic-gate 	 * no error detection, and no lock statistics.
1735*0Sstevel@tonic-gate 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
1736*0Sstevel@tonic-gate 	 */
1737*0Sstevel@tonic-gate 	if ((gflags = self->ul_schedctl_called) != NULL &&
1738*0Sstevel@tonic-gate 	    (gflags->uf_trs_ted |
1739*0Sstevel@tonic-gate 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
1740*0Sstevel@tonic-gate 
1741*0Sstevel@tonic-gate 		if (mtype & USYNC_PROCESS)
1742*0Sstevel@tonic-gate 			return (fast_process_lock(mp, tsp, mtype, MUTEX_LOCK));
1743*0Sstevel@tonic-gate 
1744*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1745*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1746*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1747*0Sstevel@tonic-gate 			return (0);
1748*0Sstevel@tonic-gate 		}
1749*0Sstevel@tonic-gate 
1750*0Sstevel@tonic-gate 		if (mtype && MUTEX_OWNER(mp) == self) {
1751*0Sstevel@tonic-gate 			if (mtype & LOCK_RECURSIVE) {
1752*0Sstevel@tonic-gate 				if (mp->mutex_rcount == RECURSION_MAX)
1753*0Sstevel@tonic-gate 					return (EAGAIN);
1754*0Sstevel@tonic-gate 				mp->mutex_rcount++;
1755*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1756*0Sstevel@tonic-gate 				    1, 0);
1757*0Sstevel@tonic-gate 				return (0);
1758*0Sstevel@tonic-gate 			}
1759*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__error, mp, EDEADLK);
1760*0Sstevel@tonic-gate 			return (EDEADLK);	/* LOCK_ERRORCHECK */
1761*0Sstevel@tonic-gate 		}
1762*0Sstevel@tonic-gate 
1763*0Sstevel@tonic-gate 		return (slow_lock(self, mp, tsp));
1764*0Sstevel@tonic-gate 	}
1765*0Sstevel@tonic-gate 
1766*0Sstevel@tonic-gate 	/* else do it the long way */
1767*0Sstevel@tonic-gate 	return (mutex_lock_internal(mp, tsp, MUTEX_LOCK));
1768*0Sstevel@tonic-gate }
1769*0Sstevel@tonic-gate 
1770*0Sstevel@tonic-gate #pragma weak _private_mutex_lock = __mutex_lock
1771*0Sstevel@tonic-gate #pragma weak mutex_lock = __mutex_lock
1772*0Sstevel@tonic-gate #pragma weak _mutex_lock = __mutex_lock
1773*0Sstevel@tonic-gate #pragma weak pthread_mutex_lock = __mutex_lock
1774*0Sstevel@tonic-gate #pragma weak _pthread_mutex_lock = __mutex_lock
1775*0Sstevel@tonic-gate int
1776*0Sstevel@tonic-gate __mutex_lock(mutex_t *mp)
1777*0Sstevel@tonic-gate {
1778*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
1779*0Sstevel@tonic-gate 	return (mutex_lock_impl(mp, NULL));
1780*0Sstevel@tonic-gate }
1781*0Sstevel@tonic-gate 
1782*0Sstevel@tonic-gate #pragma weak pthread_mutex_timedlock = _pthread_mutex_timedlock
1783*0Sstevel@tonic-gate int
1784*0Sstevel@tonic-gate _pthread_mutex_timedlock(mutex_t *mp, const timespec_t *abstime)
1785*0Sstevel@tonic-gate {
1786*0Sstevel@tonic-gate 	timespec_t tslocal;
1787*0Sstevel@tonic-gate 	int error;
1788*0Sstevel@tonic-gate 
1789*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
1790*0Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
1791*0Sstevel@tonic-gate 	error = mutex_lock_impl(mp, &tslocal);
1792*0Sstevel@tonic-gate 	if (error == ETIME)
1793*0Sstevel@tonic-gate 		error = ETIMEDOUT;
1794*0Sstevel@tonic-gate 	return (error);
1795*0Sstevel@tonic-gate }
1796*0Sstevel@tonic-gate 
1797*0Sstevel@tonic-gate #pragma weak pthread_mutex_reltimedlock_np = _pthread_mutex_reltimedlock_np
1798*0Sstevel@tonic-gate int
1799*0Sstevel@tonic-gate _pthread_mutex_reltimedlock_np(mutex_t *mp, const timespec_t *reltime)
1800*0Sstevel@tonic-gate {
1801*0Sstevel@tonic-gate 	timespec_t tslocal;
1802*0Sstevel@tonic-gate 	int error;
1803*0Sstevel@tonic-gate 
1804*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
1805*0Sstevel@tonic-gate 	tslocal = *reltime;
1806*0Sstevel@tonic-gate 	error = mutex_lock_impl(mp, &tslocal);
1807*0Sstevel@tonic-gate 	if (error == ETIME)
1808*0Sstevel@tonic-gate 		error = ETIMEDOUT;
1809*0Sstevel@tonic-gate 	return (error);
1810*0Sstevel@tonic-gate }
1811*0Sstevel@tonic-gate 
1812*0Sstevel@tonic-gate static int
1813*0Sstevel@tonic-gate slow_trylock(mutex_t *mp, ulwp_t *self)
1814*0Sstevel@tonic-gate {
1815*0Sstevel@tonic-gate 	if (MUTEX_OWNER(mp) == self ||
1816*0Sstevel@tonic-gate 	    mutex_trylock_adaptive(mp) != 0) {
1817*0Sstevel@tonic-gate 		uberdata_t *udp = self->ul_uberdata;
1818*0Sstevel@tonic-gate 
1819*0Sstevel@tonic-gate 		if (__td_event_report(self, TD_LOCK_TRY, udp)) {
1820*0Sstevel@tonic-gate 			self->ul_td_evbuf.eventnum = TD_LOCK_TRY;
1821*0Sstevel@tonic-gate 			tdb_event(TD_LOCK_TRY, udp);
1822*0Sstevel@tonic-gate 		}
1823*0Sstevel@tonic-gate 		return (EBUSY);
1824*0Sstevel@tonic-gate 	}
1825*0Sstevel@tonic-gate 	return (0);
1826*0Sstevel@tonic-gate }
1827*0Sstevel@tonic-gate 
1828*0Sstevel@tonic-gate #pragma weak _private_mutex_trylock = __mutex_trylock
1829*0Sstevel@tonic-gate #pragma weak mutex_trylock = __mutex_trylock
1830*0Sstevel@tonic-gate #pragma weak _mutex_trylock = __mutex_trylock
1831*0Sstevel@tonic-gate #pragma weak pthread_mutex_trylock = __mutex_trylock
1832*0Sstevel@tonic-gate #pragma weak _pthread_mutex_trylock = __mutex_trylock
1833*0Sstevel@tonic-gate int
1834*0Sstevel@tonic-gate __mutex_trylock(mutex_t *mp)
1835*0Sstevel@tonic-gate {
1836*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1837*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1838*0Sstevel@tonic-gate 	uberflags_t *gflags;
1839*0Sstevel@tonic-gate 	int mtype;
1840*0Sstevel@tonic-gate 
1841*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
1842*0Sstevel@tonic-gate 	/*
1843*0Sstevel@tonic-gate 	 * Optimize the case of USYNC_THREAD, including
1844*0Sstevel@tonic-gate 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
1845*0Sstevel@tonic-gate 	 * no error detection, no lock statistics,
1846*0Sstevel@tonic-gate 	 * and the process has only a single thread.
1847*0Sstevel@tonic-gate 	 * (Most likely a traditional single-threaded application.)
1848*0Sstevel@tonic-gate 	 */
1849*0Sstevel@tonic-gate 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
1850*0Sstevel@tonic-gate 	    udp->uberflags.uf_all) == 0) {
1851*0Sstevel@tonic-gate 		/*
1852*0Sstevel@tonic-gate 		 * Only one thread exists so we don't need an atomic operation.
1853*0Sstevel@tonic-gate 		 */
1854*0Sstevel@tonic-gate 		if (mp->mutex_lockw == 0) {
1855*0Sstevel@tonic-gate 			mp->mutex_lockw = LOCKSET;
1856*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1857*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1858*0Sstevel@tonic-gate 			return (0);
1859*0Sstevel@tonic-gate 		}
1860*0Sstevel@tonic-gate 		if (mtype && MUTEX_OWNER(mp) == self) {
1861*0Sstevel@tonic-gate 			if (mtype & LOCK_RECURSIVE) {
1862*0Sstevel@tonic-gate 				if (mp->mutex_rcount == RECURSION_MAX)
1863*0Sstevel@tonic-gate 					return (EAGAIN);
1864*0Sstevel@tonic-gate 				mp->mutex_rcount++;
1865*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1866*0Sstevel@tonic-gate 				    1, 0);
1867*0Sstevel@tonic-gate 				return (0);
1868*0Sstevel@tonic-gate 			}
1869*0Sstevel@tonic-gate 			return (EDEADLK);	/* LOCK_ERRORCHECK */
1870*0Sstevel@tonic-gate 		}
1871*0Sstevel@tonic-gate 		return (EBUSY);
1872*0Sstevel@tonic-gate 	}
1873*0Sstevel@tonic-gate 
1874*0Sstevel@tonic-gate 	/*
1875*0Sstevel@tonic-gate 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
1876*0Sstevel@tonic-gate 	 * no error detection, and no lock statistics.
1877*0Sstevel@tonic-gate 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
1878*0Sstevel@tonic-gate 	 */
1879*0Sstevel@tonic-gate 	if ((gflags = self->ul_schedctl_called) != NULL &&
1880*0Sstevel@tonic-gate 	    (gflags->uf_trs_ted |
1881*0Sstevel@tonic-gate 	    (mtype & ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK))) == 0) {
1882*0Sstevel@tonic-gate 
1883*0Sstevel@tonic-gate 		if (mtype & USYNC_PROCESS)
1884*0Sstevel@tonic-gate 			return (fast_process_lock(mp, NULL, mtype, MUTEX_TRY));
1885*0Sstevel@tonic-gate 
1886*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
1887*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
1888*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
1889*0Sstevel@tonic-gate 			return (0);
1890*0Sstevel@tonic-gate 		}
1891*0Sstevel@tonic-gate 
1892*0Sstevel@tonic-gate 		if (mtype && MUTEX_OWNER(mp) == self) {
1893*0Sstevel@tonic-gate 			if (mtype & LOCK_RECURSIVE) {
1894*0Sstevel@tonic-gate 				if (mp->mutex_rcount == RECURSION_MAX)
1895*0Sstevel@tonic-gate 					return (EAGAIN);
1896*0Sstevel@tonic-gate 				mp->mutex_rcount++;
1897*0Sstevel@tonic-gate 				DTRACE_PROBE3(plockstat, mutex__acquire, mp,
1898*0Sstevel@tonic-gate 				    1, 0);
1899*0Sstevel@tonic-gate 				return (0);
1900*0Sstevel@tonic-gate 			}
1901*0Sstevel@tonic-gate 			return (EBUSY);		/* LOCK_ERRORCHECK */
1902*0Sstevel@tonic-gate 		}
1903*0Sstevel@tonic-gate 
1904*0Sstevel@tonic-gate 		return (slow_trylock(mp, self));
1905*0Sstevel@tonic-gate 	}
1906*0Sstevel@tonic-gate 
1907*0Sstevel@tonic-gate 	/* else do it the long way */
1908*0Sstevel@tonic-gate 	return (mutex_lock_internal(mp, NULL, MUTEX_TRY));
1909*0Sstevel@tonic-gate }
1910*0Sstevel@tonic-gate 
1911*0Sstevel@tonic-gate int
1912*0Sstevel@tonic-gate mutex_unlock_internal(mutex_t *mp)
1913*0Sstevel@tonic-gate {
1914*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1915*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1916*0Sstevel@tonic-gate 	int mtype = mp->mutex_type;
1917*0Sstevel@tonic-gate 	tdb_mutex_stats_t *msp;
1918*0Sstevel@tonic-gate 	int error;
1919*0Sstevel@tonic-gate 	lwpid_t lwpid;
1920*0Sstevel@tonic-gate 
1921*0Sstevel@tonic-gate 	if ((mtype & LOCK_ERRORCHECK) && !mutex_is_held(mp))
1922*0Sstevel@tonic-gate 		return (EPERM);
1923*0Sstevel@tonic-gate 
1924*0Sstevel@tonic-gate 	if (self->ul_error_detection && !mutex_is_held(mp))
1925*0Sstevel@tonic-gate 		lock_error(mp, "mutex_unlock", NULL, NULL);
1926*0Sstevel@tonic-gate 
1927*0Sstevel@tonic-gate 	if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
1928*0Sstevel@tonic-gate 		mp->mutex_rcount--;
1929*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
1930*0Sstevel@tonic-gate 		return (0);
1931*0Sstevel@tonic-gate 	}
1932*0Sstevel@tonic-gate 
1933*0Sstevel@tonic-gate 	if ((msp = MUTEX_STATS(mp, udp)) != NULL)
1934*0Sstevel@tonic-gate 		(void) record_hold_time(msp);
1935*0Sstevel@tonic-gate 
1936*0Sstevel@tonic-gate 	if (mtype &
1937*0Sstevel@tonic-gate 	    (USYNC_PROCESS_ROBUST|PTHREAD_PRIO_INHERIT|PTHREAD_PRIO_PROTECT)) {
1938*0Sstevel@tonic-gate 		no_preempt(self);
1939*0Sstevel@tonic-gate 		mp->mutex_owner = 0;
1940*0Sstevel@tonic-gate 		mp->mutex_ownerpid = 0;
1941*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1942*0Sstevel@tonic-gate 		if (mtype & PTHREAD_PRIO_INHERIT) {
1943*0Sstevel@tonic-gate 			mp->mutex_lockw = LOCKCLEAR;
1944*0Sstevel@tonic-gate 			error = ___lwp_mutex_unlock(mp);
1945*0Sstevel@tonic-gate 		} else if (mtype & USYNC_PROCESS_ROBUST) {
1946*0Sstevel@tonic-gate 			error = ___lwp_mutex_unlock(mp);
1947*0Sstevel@tonic-gate 		} else {
1948*0Sstevel@tonic-gate 			if (swap32(&mp->mutex_lockword, 0) & WAITERMASK)
1949*0Sstevel@tonic-gate 				(void) ___lwp_mutex_wakeup(mp);
1950*0Sstevel@tonic-gate 			error = 0;
1951*0Sstevel@tonic-gate 		}
1952*0Sstevel@tonic-gate 		if (mtype & PTHREAD_PRIO_PROTECT) {
1953*0Sstevel@tonic-gate 			if (_ceil_mylist_del(mp))
1954*0Sstevel@tonic-gate 				_ceil_prio_waive();
1955*0Sstevel@tonic-gate 		}
1956*0Sstevel@tonic-gate 		preempt(self);
1957*0Sstevel@tonic-gate 	} else if (mtype & USYNC_PROCESS) {
1958*0Sstevel@tonic-gate 		if (mp->mutex_lockword & WAITERMASK)
1959*0Sstevel@tonic-gate 			mutex_unlock_process(mp);
1960*0Sstevel@tonic-gate 		else {
1961*0Sstevel@tonic-gate 			mp->mutex_owner = 0;
1962*0Sstevel@tonic-gate 			mp->mutex_ownerpid = 0;
1963*0Sstevel@tonic-gate 			DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
1964*0Sstevel@tonic-gate 			if (swap32(&mp->mutex_lockword, 0) & WAITERMASK) {
1965*0Sstevel@tonic-gate 				no_preempt(self);
1966*0Sstevel@tonic-gate 				(void) ___lwp_mutex_wakeup(mp);
1967*0Sstevel@tonic-gate 				preempt(self);
1968*0Sstevel@tonic-gate 			}
1969*0Sstevel@tonic-gate 		}
1970*0Sstevel@tonic-gate 		error = 0;
1971*0Sstevel@tonic-gate 	} else {	/* USYNC_THREAD */
1972*0Sstevel@tonic-gate 		if ((lwpid = mutex_unlock_queue(mp)) != 0) {
1973*0Sstevel@tonic-gate 			(void) __lwp_unpark(lwpid);
1974*0Sstevel@tonic-gate 			preempt(self);
1975*0Sstevel@tonic-gate 		}
1976*0Sstevel@tonic-gate 		error = 0;
1977*0Sstevel@tonic-gate 	}
1978*0Sstevel@tonic-gate 
1979*0Sstevel@tonic-gate 	return (error);
1980*0Sstevel@tonic-gate }
1981*0Sstevel@tonic-gate 
1982*0Sstevel@tonic-gate #pragma weak _private_mutex_unlock = __mutex_unlock
1983*0Sstevel@tonic-gate #pragma weak mutex_unlock = __mutex_unlock
1984*0Sstevel@tonic-gate #pragma weak _mutex_unlock = __mutex_unlock
1985*0Sstevel@tonic-gate #pragma weak pthread_mutex_unlock = __mutex_unlock
1986*0Sstevel@tonic-gate #pragma weak _pthread_mutex_unlock = __mutex_unlock
1987*0Sstevel@tonic-gate int
1988*0Sstevel@tonic-gate __mutex_unlock(mutex_t *mp)
1989*0Sstevel@tonic-gate {
1990*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
1991*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
1992*0Sstevel@tonic-gate 	uberflags_t *gflags;
1993*0Sstevel@tonic-gate 	lwpid_t lwpid;
1994*0Sstevel@tonic-gate 	int mtype;
1995*0Sstevel@tonic-gate 	short el;
1996*0Sstevel@tonic-gate 
1997*0Sstevel@tonic-gate 	/*
1998*0Sstevel@tonic-gate 	 * Optimize the case of USYNC_THREAD, including
1999*0Sstevel@tonic-gate 	 * the LOCK_RECURSIVE and LOCK_ERRORCHECK cases,
2000*0Sstevel@tonic-gate 	 * no error detection, no lock statistics,
2001*0Sstevel@tonic-gate 	 * and the process has only a single thread.
2002*0Sstevel@tonic-gate 	 * (Most likely a traditional single-threaded application.)
2003*0Sstevel@tonic-gate 	 */
2004*0Sstevel@tonic-gate 	if ((((mtype = mp->mutex_type) & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) |
2005*0Sstevel@tonic-gate 	    udp->uberflags.uf_all) == 0) {
2006*0Sstevel@tonic-gate 		if (mtype) {
2007*0Sstevel@tonic-gate 			/*
2008*0Sstevel@tonic-gate 			 * At this point we know that one or both of the
2009*0Sstevel@tonic-gate 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2010*0Sstevel@tonic-gate 			 */
2011*0Sstevel@tonic-gate 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2012*0Sstevel@tonic-gate 				return (EPERM);
2013*0Sstevel@tonic-gate 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2014*0Sstevel@tonic-gate 				mp->mutex_rcount--;
2015*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2016*0Sstevel@tonic-gate 				return (0);
2017*0Sstevel@tonic-gate 			}
2018*0Sstevel@tonic-gate 		}
2019*0Sstevel@tonic-gate 		/*
2020*0Sstevel@tonic-gate 		 * Only one thread exists so we don't need an atomic operation.
2021*0Sstevel@tonic-gate 		 * Also, there can be no waiters.
2022*0Sstevel@tonic-gate 		 */
2023*0Sstevel@tonic-gate 		mp->mutex_owner = 0;
2024*0Sstevel@tonic-gate 		mp->mutex_lockword = 0;
2025*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2026*0Sstevel@tonic-gate 		return (0);
2027*0Sstevel@tonic-gate 	}
2028*0Sstevel@tonic-gate 
2029*0Sstevel@tonic-gate 	/*
2030*0Sstevel@tonic-gate 	 * Optimize the common cases of USYNC_THREAD or USYNC_PROCESS,
2031*0Sstevel@tonic-gate 	 * no error detection, and no lock statistics.
2032*0Sstevel@tonic-gate 	 * Include LOCK_RECURSIVE and LOCK_ERRORCHECK cases.
2033*0Sstevel@tonic-gate 	 */
2034*0Sstevel@tonic-gate 	if ((gflags = self->ul_schedctl_called) != NULL) {
2035*0Sstevel@tonic-gate 		if (((el = gflags->uf_trs_ted) | mtype) == 0) {
2036*0Sstevel@tonic-gate fast_unlock:
2037*0Sstevel@tonic-gate 			if (!(mp->mutex_lockword & WAITERMASK)) {
2038*0Sstevel@tonic-gate 				/* no waiter exists right now */
2039*0Sstevel@tonic-gate 				mp->mutex_owner = 0;
2040*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2041*0Sstevel@tonic-gate 				if (swap32(&mp->mutex_lockword, 0) &
2042*0Sstevel@tonic-gate 				    WAITERMASK) {
2043*0Sstevel@tonic-gate 					/* a waiter suddenly appeared */
2044*0Sstevel@tonic-gate 					no_preempt(self);
2045*0Sstevel@tonic-gate 					if ((lwpid = mutex_wakeup(mp)) != 0)
2046*0Sstevel@tonic-gate 						(void) __lwp_unpark(lwpid);
2047*0Sstevel@tonic-gate 					preempt(self);
2048*0Sstevel@tonic-gate 				}
2049*0Sstevel@tonic-gate 			} else if ((lwpid = mutex_unlock_queue(mp)) != 0) {
2050*0Sstevel@tonic-gate 				(void) __lwp_unpark(lwpid);
2051*0Sstevel@tonic-gate 				preempt(self);
2052*0Sstevel@tonic-gate 			}
2053*0Sstevel@tonic-gate 			return (0);
2054*0Sstevel@tonic-gate 		}
2055*0Sstevel@tonic-gate 		if (el)		/* error detection or lock statistics */
2056*0Sstevel@tonic-gate 			goto slow_unlock;
2057*0Sstevel@tonic-gate 		if ((mtype & ~(LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2058*0Sstevel@tonic-gate 			/*
2059*0Sstevel@tonic-gate 			 * At this point we know that one or both of the
2060*0Sstevel@tonic-gate 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set.
2061*0Sstevel@tonic-gate 			 */
2062*0Sstevel@tonic-gate 			if ((mtype & LOCK_ERRORCHECK) && !MUTEX_OWNED(mp, self))
2063*0Sstevel@tonic-gate 				return (EPERM);
2064*0Sstevel@tonic-gate 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2065*0Sstevel@tonic-gate 				mp->mutex_rcount--;
2066*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2067*0Sstevel@tonic-gate 				return (0);
2068*0Sstevel@tonic-gate 			}
2069*0Sstevel@tonic-gate 			goto fast_unlock;
2070*0Sstevel@tonic-gate 		}
2071*0Sstevel@tonic-gate 		if ((mtype &
2072*0Sstevel@tonic-gate 		    ~(USYNC_PROCESS|LOCK_RECURSIVE|LOCK_ERRORCHECK)) == 0) {
2073*0Sstevel@tonic-gate 			/*
2074*0Sstevel@tonic-gate 			 * At this point we know that zero, one, or both of the
2075*0Sstevel@tonic-gate 			 * flags LOCK_RECURSIVE or LOCK_ERRORCHECK is set and
2076*0Sstevel@tonic-gate 			 * that the USYNC_PROCESS flag is set.
2077*0Sstevel@tonic-gate 			 */
2078*0Sstevel@tonic-gate 			if ((mtype & LOCK_ERRORCHECK) && !shared_mutex_held(mp))
2079*0Sstevel@tonic-gate 				return (EPERM);
2080*0Sstevel@tonic-gate 			if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0) {
2081*0Sstevel@tonic-gate 				mp->mutex_rcount--;
2082*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__release, mp, 1);
2083*0Sstevel@tonic-gate 				return (0);
2084*0Sstevel@tonic-gate 			}
2085*0Sstevel@tonic-gate 			if (mp->mutex_lockword & WAITERMASK)
2086*0Sstevel@tonic-gate 				mutex_unlock_process(mp);
2087*0Sstevel@tonic-gate 			else {
2088*0Sstevel@tonic-gate 				mp->mutex_owner = 0;
2089*0Sstevel@tonic-gate 				mp->mutex_ownerpid = 0;
2090*0Sstevel@tonic-gate 				DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2091*0Sstevel@tonic-gate 				if (swap32(&mp->mutex_lockword, 0) &
2092*0Sstevel@tonic-gate 				    WAITERMASK) {
2093*0Sstevel@tonic-gate 					no_preempt(self);
2094*0Sstevel@tonic-gate 					(void) ___lwp_mutex_wakeup(mp);
2095*0Sstevel@tonic-gate 					preempt(self);
2096*0Sstevel@tonic-gate 				}
2097*0Sstevel@tonic-gate 			}
2098*0Sstevel@tonic-gate 			return (0);
2099*0Sstevel@tonic-gate 		}
2100*0Sstevel@tonic-gate 	}
2101*0Sstevel@tonic-gate 
2102*0Sstevel@tonic-gate 	/* else do it the long way */
2103*0Sstevel@tonic-gate slow_unlock:
2104*0Sstevel@tonic-gate 	return (mutex_unlock_internal(mp));
2105*0Sstevel@tonic-gate }
2106*0Sstevel@tonic-gate 
2107*0Sstevel@tonic-gate /*
2108*0Sstevel@tonic-gate  * Internally to the library, almost all mutex lock/unlock actions
2109*0Sstevel@tonic-gate  * go through these lmutex_ functions, to protect critical regions.
2110*0Sstevel@tonic-gate  * We replicate a bit of code from __mutex_lock() and __mutex_unlock()
2111*0Sstevel@tonic-gate  * to make these functions faster since we know that the mutex type
2112*0Sstevel@tonic-gate  * of all internal locks is USYNC_THREAD.  We also know that internal
2113*0Sstevel@tonic-gate  * locking can never fail, so we panic if it does.
2114*0Sstevel@tonic-gate  */
2115*0Sstevel@tonic-gate void
2116*0Sstevel@tonic-gate lmutex_lock(mutex_t *mp)
2117*0Sstevel@tonic-gate {
2118*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2119*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2120*0Sstevel@tonic-gate 
2121*0Sstevel@tonic-gate 	ASSERT(mp->mutex_type == USYNC_THREAD);
2122*0Sstevel@tonic-gate 
2123*0Sstevel@tonic-gate 	enter_critical(self);
2124*0Sstevel@tonic-gate 	/*
2125*0Sstevel@tonic-gate 	 * Optimize the case of no lock statistics and only a single thread.
2126*0Sstevel@tonic-gate 	 * (Most likely a traditional single-threaded application.)
2127*0Sstevel@tonic-gate 	 */
2128*0Sstevel@tonic-gate 	if (udp->uberflags.uf_all == 0) {
2129*0Sstevel@tonic-gate 		/*
2130*0Sstevel@tonic-gate 		 * Only one thread exists; the mutex must be free.
2131*0Sstevel@tonic-gate 		 */
2132*0Sstevel@tonic-gate 		ASSERT(mp->mutex_lockw == 0);
2133*0Sstevel@tonic-gate 		mp->mutex_lockw = LOCKSET;
2134*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
2135*0Sstevel@tonic-gate 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2136*0Sstevel@tonic-gate 	} else {
2137*0Sstevel@tonic-gate 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2138*0Sstevel@tonic-gate 
2139*0Sstevel@tonic-gate 		if (!self->ul_schedctl_called)
2140*0Sstevel@tonic-gate 			(void) setup_schedctl();
2141*0Sstevel@tonic-gate 
2142*0Sstevel@tonic-gate 		if (set_lock_byte(&mp->mutex_lockw) == 0) {
2143*0Sstevel@tonic-gate 			mp->mutex_owner = (uintptr_t)self;
2144*0Sstevel@tonic-gate 			DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2145*0Sstevel@tonic-gate 		} else if (mutex_trylock_adaptive(mp) != 0) {
2146*0Sstevel@tonic-gate 			(void) mutex_lock_queue(self, msp, mp, NULL);
2147*0Sstevel@tonic-gate 		}
2148*0Sstevel@tonic-gate 
2149*0Sstevel@tonic-gate 		if (msp)
2150*0Sstevel@tonic-gate 			record_begin_hold(msp);
2151*0Sstevel@tonic-gate 	}
2152*0Sstevel@tonic-gate }
2153*0Sstevel@tonic-gate 
2154*0Sstevel@tonic-gate void
2155*0Sstevel@tonic-gate lmutex_unlock(mutex_t *mp)
2156*0Sstevel@tonic-gate {
2157*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2158*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2159*0Sstevel@tonic-gate 
2160*0Sstevel@tonic-gate 	ASSERT(mp->mutex_type == USYNC_THREAD);
2161*0Sstevel@tonic-gate 
2162*0Sstevel@tonic-gate 	/*
2163*0Sstevel@tonic-gate 	 * Optimize the case of no lock statistics and only a single thread.
2164*0Sstevel@tonic-gate 	 * (Most likely a traditional single-threaded application.)
2165*0Sstevel@tonic-gate 	 */
2166*0Sstevel@tonic-gate 	if (udp->uberflags.uf_all == 0) {
2167*0Sstevel@tonic-gate 		/*
2168*0Sstevel@tonic-gate 		 * Only one thread exists so there can be no waiters.
2169*0Sstevel@tonic-gate 		 */
2170*0Sstevel@tonic-gate 		mp->mutex_owner = 0;
2171*0Sstevel@tonic-gate 		mp->mutex_lockword = 0;
2172*0Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2173*0Sstevel@tonic-gate 	} else {
2174*0Sstevel@tonic-gate 		tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2175*0Sstevel@tonic-gate 		lwpid_t lwpid;
2176*0Sstevel@tonic-gate 
2177*0Sstevel@tonic-gate 		if (msp)
2178*0Sstevel@tonic-gate 			(void) record_hold_time(msp);
2179*0Sstevel@tonic-gate 		if ((lwpid = mutex_unlock_queue(mp)) != 0) {
2180*0Sstevel@tonic-gate 			(void) __lwp_unpark(lwpid);
2181*0Sstevel@tonic-gate 			preempt(self);
2182*0Sstevel@tonic-gate 		}
2183*0Sstevel@tonic-gate 	}
2184*0Sstevel@tonic-gate 	exit_critical(self);
2185*0Sstevel@tonic-gate }
2186*0Sstevel@tonic-gate 
2187*0Sstevel@tonic-gate static int
2188*0Sstevel@tonic-gate shared_mutex_held(mutex_t *mparg)
2189*0Sstevel@tonic-gate {
2190*0Sstevel@tonic-gate 	/*
2191*0Sstevel@tonic-gate 	 * There is an inherent data race in the current ownership design.
2192*0Sstevel@tonic-gate 	 * The mutex_owner and mutex_ownerpid fields cannot be set or tested
2193*0Sstevel@tonic-gate 	 * atomically as a pair. The original implementation tested each
2194*0Sstevel@tonic-gate 	 * field just once. This was exposed to trivial false positives in
2195*0Sstevel@tonic-gate 	 * the case of multiple multithreaded processes with thread addresses
2196*0Sstevel@tonic-gate 	 * in common. To close the window to an acceptable level we now use a
2197*0Sstevel@tonic-gate 	 * sequence of five tests: pid-thr-pid-thr-pid. This ensures that any
2198*0Sstevel@tonic-gate 	 * single interruption will still leave one uninterrupted sequence of
2199*0Sstevel@tonic-gate 	 * pid-thr-pid tests intact.
2200*0Sstevel@tonic-gate 	 *
2201*0Sstevel@tonic-gate 	 * It is assumed that all updates are always ordered thr-pid and that
2202*0Sstevel@tonic-gate 	 * we have TSO hardware.
2203*0Sstevel@tonic-gate 	 */
2204*0Sstevel@tonic-gate 	volatile mutex_t *mp = (volatile mutex_t *)mparg;
2205*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2206*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2207*0Sstevel@tonic-gate 
2208*0Sstevel@tonic-gate 	if (mp->mutex_ownerpid != udp->pid)
2209*0Sstevel@tonic-gate 		return (0);
2210*0Sstevel@tonic-gate 
2211*0Sstevel@tonic-gate 	if (!MUTEX_OWNED(mp, self))
2212*0Sstevel@tonic-gate 		return (0);
2213*0Sstevel@tonic-gate 
2214*0Sstevel@tonic-gate 	if (mp->mutex_ownerpid != udp->pid)
2215*0Sstevel@tonic-gate 		return (0);
2216*0Sstevel@tonic-gate 
2217*0Sstevel@tonic-gate 	if (!MUTEX_OWNED(mp, self))
2218*0Sstevel@tonic-gate 		return (0);
2219*0Sstevel@tonic-gate 
2220*0Sstevel@tonic-gate 	if (mp->mutex_ownerpid != udp->pid)
2221*0Sstevel@tonic-gate 		return (0);
2222*0Sstevel@tonic-gate 
2223*0Sstevel@tonic-gate 	return (1);
2224*0Sstevel@tonic-gate }
2225*0Sstevel@tonic-gate 
2226*0Sstevel@tonic-gate /*
2227*0Sstevel@tonic-gate  * Some crufty old programs define their own version of _mutex_held()
2228*0Sstevel@tonic-gate  * to be simply return(1).  This breaks internal libc logic, so we
2229*0Sstevel@tonic-gate  * define a private version for exclusive use by libc, mutex_is_held(),
2230*0Sstevel@tonic-gate  * and also a new public function, __mutex_held(), to be used in new
2231*0Sstevel@tonic-gate  * code to circumvent these crufty old programs.
2232*0Sstevel@tonic-gate  */
2233*0Sstevel@tonic-gate #pragma weak mutex_held = mutex_is_held
2234*0Sstevel@tonic-gate #pragma weak _mutex_held = mutex_is_held
2235*0Sstevel@tonic-gate #pragma weak __mutex_held = mutex_is_held
2236*0Sstevel@tonic-gate int
2237*0Sstevel@tonic-gate mutex_is_held(mutex_t *mp)
2238*0Sstevel@tonic-gate {
2239*0Sstevel@tonic-gate 	if (mp->mutex_type & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))
2240*0Sstevel@tonic-gate 		return (shared_mutex_held(mp));
2241*0Sstevel@tonic-gate 	return (MUTEX_OWNED(mp, curthread));
2242*0Sstevel@tonic-gate }
2243*0Sstevel@tonic-gate 
2244*0Sstevel@tonic-gate #pragma weak _private_mutex_destroy = __mutex_destroy
2245*0Sstevel@tonic-gate #pragma weak mutex_destroy = __mutex_destroy
2246*0Sstevel@tonic-gate #pragma weak _mutex_destroy = __mutex_destroy
2247*0Sstevel@tonic-gate #pragma weak pthread_mutex_destroy = __mutex_destroy
2248*0Sstevel@tonic-gate #pragma weak _pthread_mutex_destroy = __mutex_destroy
2249*0Sstevel@tonic-gate int
2250*0Sstevel@tonic-gate __mutex_destroy(mutex_t *mp)
2251*0Sstevel@tonic-gate {
2252*0Sstevel@tonic-gate 	mp->mutex_magic = 0;
2253*0Sstevel@tonic-gate 	mp->mutex_flag &= ~LOCK_INITED;
2254*0Sstevel@tonic-gate 	tdb_sync_obj_deregister(mp);
2255*0Sstevel@tonic-gate 	return (0);
2256*0Sstevel@tonic-gate }
2257*0Sstevel@tonic-gate 
2258*0Sstevel@tonic-gate /*
2259*0Sstevel@tonic-gate  * Spin locks are separate from ordinary mutexes,
2260*0Sstevel@tonic-gate  * but we use the same data structure for them.
2261*0Sstevel@tonic-gate  */
2262*0Sstevel@tonic-gate 
2263*0Sstevel@tonic-gate #pragma weak pthread_spin_init = _pthread_spin_init
2264*0Sstevel@tonic-gate int
2265*0Sstevel@tonic-gate _pthread_spin_init(pthread_spinlock_t *lock, int pshared)
2266*0Sstevel@tonic-gate {
2267*0Sstevel@tonic-gate 	mutex_t *mp = (mutex_t *)lock;
2268*0Sstevel@tonic-gate 
2269*0Sstevel@tonic-gate 	(void) _memset(mp, 0, sizeof (*mp));
2270*0Sstevel@tonic-gate 	if (pshared == PTHREAD_PROCESS_SHARED)
2271*0Sstevel@tonic-gate 		mp->mutex_type = USYNC_PROCESS;
2272*0Sstevel@tonic-gate 	else
2273*0Sstevel@tonic-gate 		mp->mutex_type = USYNC_THREAD;
2274*0Sstevel@tonic-gate 	mp->mutex_flag = LOCK_INITED;
2275*0Sstevel@tonic-gate 	mp->mutex_magic = MUTEX_MAGIC;
2276*0Sstevel@tonic-gate 	return (0);
2277*0Sstevel@tonic-gate }
2278*0Sstevel@tonic-gate 
2279*0Sstevel@tonic-gate #pragma weak pthread_spin_destroy = _pthread_spin_destroy
2280*0Sstevel@tonic-gate int
2281*0Sstevel@tonic-gate _pthread_spin_destroy(pthread_spinlock_t *lock)
2282*0Sstevel@tonic-gate {
2283*0Sstevel@tonic-gate 	(void) _memset(lock, 0, sizeof (*lock));
2284*0Sstevel@tonic-gate 	return (0);
2285*0Sstevel@tonic-gate }
2286*0Sstevel@tonic-gate 
2287*0Sstevel@tonic-gate #pragma weak pthread_spin_trylock = _pthread_spin_trylock
2288*0Sstevel@tonic-gate int
2289*0Sstevel@tonic-gate _pthread_spin_trylock(pthread_spinlock_t *lock)
2290*0Sstevel@tonic-gate {
2291*0Sstevel@tonic-gate 	mutex_t *mp = (mutex_t *)lock;
2292*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2293*0Sstevel@tonic-gate 	int error = 0;
2294*0Sstevel@tonic-gate 
2295*0Sstevel@tonic-gate 	no_preempt(self);
2296*0Sstevel@tonic-gate 	if (set_lock_byte(&mp->mutex_lockw) != 0)
2297*0Sstevel@tonic-gate 		error = EBUSY;
2298*0Sstevel@tonic-gate 	else {
2299*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
2300*0Sstevel@tonic-gate 		if (mp->mutex_type == USYNC_PROCESS)
2301*0Sstevel@tonic-gate 			mp->mutex_ownerpid = self->ul_uberdata->pid;
2302*0Sstevel@tonic-gate 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2303*0Sstevel@tonic-gate 	}
2304*0Sstevel@tonic-gate 	preempt(self);
2305*0Sstevel@tonic-gate 	return (error);
2306*0Sstevel@tonic-gate }
2307*0Sstevel@tonic-gate 
2308*0Sstevel@tonic-gate #pragma weak pthread_spin_lock = _pthread_spin_lock
2309*0Sstevel@tonic-gate int
2310*0Sstevel@tonic-gate _pthread_spin_lock(pthread_spinlock_t *lock)
2311*0Sstevel@tonic-gate {
2312*0Sstevel@tonic-gate 	volatile uint8_t *lockp =
2313*0Sstevel@tonic-gate 		(volatile uint8_t *)&((mutex_t *)lock)->mutex_lockw;
2314*0Sstevel@tonic-gate 
2315*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
2316*0Sstevel@tonic-gate 	/*
2317*0Sstevel@tonic-gate 	 * We don't care whether the owner is running on a processor.
2318*0Sstevel@tonic-gate 	 * We just spin because that's what this interface requires.
2319*0Sstevel@tonic-gate 	 */
2320*0Sstevel@tonic-gate 	for (;;) {
2321*0Sstevel@tonic-gate 		if (*lockp == 0) {	/* lock byte appears to be clear */
2322*0Sstevel@tonic-gate 			if (_pthread_spin_trylock(lock) == 0)
2323*0Sstevel@tonic-gate 				return (0);
2324*0Sstevel@tonic-gate 		}
2325*0Sstevel@tonic-gate 		SMT_PAUSE();
2326*0Sstevel@tonic-gate 	}
2327*0Sstevel@tonic-gate }
2328*0Sstevel@tonic-gate 
2329*0Sstevel@tonic-gate #pragma weak pthread_spin_unlock = _pthread_spin_unlock
2330*0Sstevel@tonic-gate int
2331*0Sstevel@tonic-gate _pthread_spin_unlock(pthread_spinlock_t *lock)
2332*0Sstevel@tonic-gate {
2333*0Sstevel@tonic-gate 	mutex_t *mp = (mutex_t *)lock;
2334*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2335*0Sstevel@tonic-gate 
2336*0Sstevel@tonic-gate 	no_preempt(self);
2337*0Sstevel@tonic-gate 	mp->mutex_owner = 0;
2338*0Sstevel@tonic-gate 	mp->mutex_ownerpid = 0;
2339*0Sstevel@tonic-gate 	DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
2340*0Sstevel@tonic-gate 	(void) swap32(&mp->mutex_lockword, 0);
2341*0Sstevel@tonic-gate 	preempt(self);
2342*0Sstevel@tonic-gate 	return (0);
2343*0Sstevel@tonic-gate }
2344*0Sstevel@tonic-gate 
2345*0Sstevel@tonic-gate #pragma weak cond_init = _cond_init
2346*0Sstevel@tonic-gate /* ARGSUSED2 */
2347*0Sstevel@tonic-gate int
2348*0Sstevel@tonic-gate _cond_init(cond_t *cvp, int type, void *arg)
2349*0Sstevel@tonic-gate {
2350*0Sstevel@tonic-gate 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2351*0Sstevel@tonic-gate 		return (EINVAL);
2352*0Sstevel@tonic-gate 	(void) _memset(cvp, 0, sizeof (*cvp));
2353*0Sstevel@tonic-gate 	cvp->cond_type = (uint16_t)type;
2354*0Sstevel@tonic-gate 	cvp->cond_magic = COND_MAGIC;
2355*0Sstevel@tonic-gate 	return (0);
2356*0Sstevel@tonic-gate }
2357*0Sstevel@tonic-gate 
2358*0Sstevel@tonic-gate /*
2359*0Sstevel@tonic-gate  * cond_sleep_queue(): utility function for cond_wait_queue().
2360*0Sstevel@tonic-gate  *
2361*0Sstevel@tonic-gate  * Go to sleep on a condvar sleep queue, expect to be waked up
2362*0Sstevel@tonic-gate  * by someone calling cond_signal() or cond_broadcast() or due
2363*0Sstevel@tonic-gate  * to receiving a UNIX signal or being cancelled, or just simply
2364*0Sstevel@tonic-gate  * due to a spurious wakeup (like someome calling forkall()).
2365*0Sstevel@tonic-gate  *
2366*0Sstevel@tonic-gate  * The associated mutex is *not* reacquired before returning.
2367*0Sstevel@tonic-gate  * That must be done by the caller of cond_sleep_queue().
2368*0Sstevel@tonic-gate  */
2369*0Sstevel@tonic-gate int
2370*0Sstevel@tonic-gate cond_sleep_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2371*0Sstevel@tonic-gate {
2372*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2373*0Sstevel@tonic-gate 	queue_head_t *qp;
2374*0Sstevel@tonic-gate 	queue_head_t *mqp;
2375*0Sstevel@tonic-gate 	lwpid_t lwpid;
2376*0Sstevel@tonic-gate 	int signalled;
2377*0Sstevel@tonic-gate 	int error;
2378*0Sstevel@tonic-gate 
2379*0Sstevel@tonic-gate 	/*
2380*0Sstevel@tonic-gate 	 * Put ourself on the CV sleep queue, unlock the mutex, then
2381*0Sstevel@tonic-gate 	 * park ourself and unpark a candidate lwp to grab the mutex.
2382*0Sstevel@tonic-gate 	 * We must go onto the CV sleep queue before dropping the
2383*0Sstevel@tonic-gate 	 * mutex in order to guarantee atomicity of the operation.
2384*0Sstevel@tonic-gate 	 */
2385*0Sstevel@tonic-gate 	self->ul_sp = stkptr();
2386*0Sstevel@tonic-gate 	qp = queue_lock(cvp, CV);
2387*0Sstevel@tonic-gate 	enqueue(qp, self, cvp, CV);
2388*0Sstevel@tonic-gate 	cvp->cond_waiters_user = 1;
2389*0Sstevel@tonic-gate 	self->ul_cvmutex = mp;
2390*0Sstevel@tonic-gate 	self->ul_cv_wake = (tsp != NULL);
2391*0Sstevel@tonic-gate 	self->ul_signalled = 0;
2392*0Sstevel@tonic-gate 	lwpid = mutex_unlock_queue(mp);
2393*0Sstevel@tonic-gate 	for (;;) {
2394*0Sstevel@tonic-gate 		set_parking_flag(self, 1);
2395*0Sstevel@tonic-gate 		queue_unlock(qp);
2396*0Sstevel@tonic-gate 		if (lwpid != 0) {
2397*0Sstevel@tonic-gate 			lwpid = preempt_unpark(self, lwpid);
2398*0Sstevel@tonic-gate 			preempt(self);
2399*0Sstevel@tonic-gate 		}
2400*0Sstevel@tonic-gate 		/*
2401*0Sstevel@tonic-gate 		 * We may have a deferred signal present,
2402*0Sstevel@tonic-gate 		 * in which case we should return EINTR.
2403*0Sstevel@tonic-gate 		 * Also, we may have received a SIGCANCEL; if so
2404*0Sstevel@tonic-gate 		 * and we are cancelable we should return EINTR.
2405*0Sstevel@tonic-gate 		 * We force an immediate EINTR return from
2406*0Sstevel@tonic-gate 		 * __lwp_park() by turning our parking flag off.
2407*0Sstevel@tonic-gate 		 */
2408*0Sstevel@tonic-gate 		if (self->ul_cursig != 0 ||
2409*0Sstevel@tonic-gate 		    (self->ul_cancelable && self->ul_cancel_pending))
2410*0Sstevel@tonic-gate 			set_parking_flag(self, 0);
2411*0Sstevel@tonic-gate 		/*
2412*0Sstevel@tonic-gate 		 * __lwp_park() will return the residual time in tsp
2413*0Sstevel@tonic-gate 		 * if we are unparked before the timeout expires.
2414*0Sstevel@tonic-gate 		 */
2415*0Sstevel@tonic-gate 		error = __lwp_park(tsp, lwpid);
2416*0Sstevel@tonic-gate 		set_parking_flag(self, 0);
2417*0Sstevel@tonic-gate 		lwpid = 0;	/* unpark the other lwp only once */
2418*0Sstevel@tonic-gate 		/*
2419*0Sstevel@tonic-gate 		 * We were waked up by cond_signal(), cond_broadcast(),
2420*0Sstevel@tonic-gate 		 * by an interrupt or timeout (EINTR or ETIME),
2421*0Sstevel@tonic-gate 		 * or we may just have gotten a spurious wakeup.
2422*0Sstevel@tonic-gate 		 */
2423*0Sstevel@tonic-gate 		qp = queue_lock(cvp, CV);
2424*0Sstevel@tonic-gate 		mqp = queue_lock(mp, MX);
2425*0Sstevel@tonic-gate 		if (self->ul_sleepq == NULL)
2426*0Sstevel@tonic-gate 			break;
2427*0Sstevel@tonic-gate 		/*
2428*0Sstevel@tonic-gate 		 * We are on either the condvar sleep queue or the
2429*0Sstevel@tonic-gate 		 * mutex sleep queue.  If we are on the mutex sleep
2430*0Sstevel@tonic-gate 		 * queue, continue sleeping.  If we are on the condvar
2431*0Sstevel@tonic-gate 		 * sleep queue, break out of the sleep if we were
2432*0Sstevel@tonic-gate 		 * interrupted or we timed out (EINTR or ETIME).
2433*0Sstevel@tonic-gate 		 * Else this is a spurious wakeup; continue the loop.
2434*0Sstevel@tonic-gate 		 */
2435*0Sstevel@tonic-gate 		if (self->ul_sleepq == mqp)		/* mutex queue */
2436*0Sstevel@tonic-gate 			tsp = NULL;
2437*0Sstevel@tonic-gate 		else if (self->ul_sleepq == qp) {	/* condvar queue */
2438*0Sstevel@tonic-gate 			if (error) {
2439*0Sstevel@tonic-gate 				cvp->cond_waiters_user = dequeue_self(qp, cvp);
2440*0Sstevel@tonic-gate 				break;
2441*0Sstevel@tonic-gate 			}
2442*0Sstevel@tonic-gate 			/*
2443*0Sstevel@tonic-gate 			 * Else a spurious wakeup on the condvar queue.
2444*0Sstevel@tonic-gate 			 * __lwp_park() has already adjusted the timeout.
2445*0Sstevel@tonic-gate 			 */
2446*0Sstevel@tonic-gate 		} else {
2447*0Sstevel@tonic-gate 			thr_panic("cond_sleep_queue(): thread not on queue");
2448*0Sstevel@tonic-gate 		}
2449*0Sstevel@tonic-gate 		queue_unlock(mqp);
2450*0Sstevel@tonic-gate 	}
2451*0Sstevel@tonic-gate 
2452*0Sstevel@tonic-gate 	self->ul_sp = 0;
2453*0Sstevel@tonic-gate 	ASSERT(self->ul_cvmutex == NULL && self->ul_cv_wake == 0);
2454*0Sstevel@tonic-gate 	ASSERT(self->ul_sleepq == NULL && self->ul_link == NULL &&
2455*0Sstevel@tonic-gate 	    self->ul_wchan == NULL);
2456*0Sstevel@tonic-gate 
2457*0Sstevel@tonic-gate 	signalled = self->ul_signalled;
2458*0Sstevel@tonic-gate 	self->ul_signalled = 0;
2459*0Sstevel@tonic-gate 	queue_unlock(qp);
2460*0Sstevel@tonic-gate 	queue_unlock(mqp);
2461*0Sstevel@tonic-gate 
2462*0Sstevel@tonic-gate 	/*
2463*0Sstevel@tonic-gate 	 * If we were concurrently cond_signal()d and any of:
2464*0Sstevel@tonic-gate 	 * received a UNIX signal, were cancelled, or got a timeout,
2465*0Sstevel@tonic-gate 	 * then perform another cond_signal() to avoid consuming it.
2466*0Sstevel@tonic-gate 	 */
2467*0Sstevel@tonic-gate 	if (error && signalled)
2468*0Sstevel@tonic-gate 		(void) cond_signal_internal(cvp);
2469*0Sstevel@tonic-gate 
2470*0Sstevel@tonic-gate 	return (error);
2471*0Sstevel@tonic-gate }
2472*0Sstevel@tonic-gate 
2473*0Sstevel@tonic-gate int
2474*0Sstevel@tonic-gate cond_wait_queue(cond_t *cvp, mutex_t *mp, timespec_t *tsp,
2475*0Sstevel@tonic-gate 	tdb_mutex_stats_t *msp)
2476*0Sstevel@tonic-gate {
2477*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2478*0Sstevel@tonic-gate 	int error;
2479*0Sstevel@tonic-gate 
2480*0Sstevel@tonic-gate 	/*
2481*0Sstevel@tonic-gate 	 * The old thread library was programmed to defer signals
2482*0Sstevel@tonic-gate 	 * while in cond_wait() so that the associated mutex would
2483*0Sstevel@tonic-gate 	 * be guaranteed to be held when the application signal
2484*0Sstevel@tonic-gate 	 * handler was invoked.
2485*0Sstevel@tonic-gate 	 *
2486*0Sstevel@tonic-gate 	 * We do not behave this way by default; the state of the
2487*0Sstevel@tonic-gate 	 * associated mutex in the signal handler is undefined.
2488*0Sstevel@tonic-gate 	 *
2489*0Sstevel@tonic-gate 	 * To accommodate applications that depend on the old
2490*0Sstevel@tonic-gate 	 * behavior, the _THREAD_COND_WAIT_DEFER environment
2491*0Sstevel@tonic-gate 	 * variable can be set to 1 and we will behave in the
2492*0Sstevel@tonic-gate 	 * old way with respect to cond_wait().
2493*0Sstevel@tonic-gate 	 */
2494*0Sstevel@tonic-gate 	if (self->ul_cond_wait_defer)
2495*0Sstevel@tonic-gate 		sigoff(self);
2496*0Sstevel@tonic-gate 
2497*0Sstevel@tonic-gate 	error = cond_sleep_queue(cvp, mp, tsp);
2498*0Sstevel@tonic-gate 
2499*0Sstevel@tonic-gate 	/*
2500*0Sstevel@tonic-gate 	 * Reacquire the mutex.
2501*0Sstevel@tonic-gate 	 */
2502*0Sstevel@tonic-gate 	if (set_lock_byte(&mp->mutex_lockw) == 0) {
2503*0Sstevel@tonic-gate 		mp->mutex_owner = (uintptr_t)self;
2504*0Sstevel@tonic-gate 		DTRACE_PROBE3(plockstat, mutex__acquire, mp, 0, 0);
2505*0Sstevel@tonic-gate 	} else if (mutex_trylock_adaptive(mp) != 0) {
2506*0Sstevel@tonic-gate 		(void) mutex_lock_queue(self, msp, mp, NULL);
2507*0Sstevel@tonic-gate 	}
2508*0Sstevel@tonic-gate 
2509*0Sstevel@tonic-gate 	if (msp)
2510*0Sstevel@tonic-gate 		record_begin_hold(msp);
2511*0Sstevel@tonic-gate 
2512*0Sstevel@tonic-gate 	/*
2513*0Sstevel@tonic-gate 	 * Take any deferred signal now, after we have reacquired the mutex.
2514*0Sstevel@tonic-gate 	 */
2515*0Sstevel@tonic-gate 	if (self->ul_cond_wait_defer)
2516*0Sstevel@tonic-gate 		sigon(self);
2517*0Sstevel@tonic-gate 
2518*0Sstevel@tonic-gate 	return (error);
2519*0Sstevel@tonic-gate }
2520*0Sstevel@tonic-gate 
2521*0Sstevel@tonic-gate /*
2522*0Sstevel@tonic-gate  * cond_sleep_kernel(): utility function for cond_wait_kernel().
2523*0Sstevel@tonic-gate  * See the comment ahead of cond_sleep_queue(), above.
2524*0Sstevel@tonic-gate  */
2525*0Sstevel@tonic-gate int
2526*0Sstevel@tonic-gate cond_sleep_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2527*0Sstevel@tonic-gate {
2528*0Sstevel@tonic-gate 	int mtype = mp->mutex_type;
2529*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2530*0Sstevel@tonic-gate 	int error;
2531*0Sstevel@tonic-gate 
2532*0Sstevel@tonic-gate 	if (mtype & PTHREAD_PRIO_PROTECT) {
2533*0Sstevel@tonic-gate 		if (_ceil_mylist_del(mp))
2534*0Sstevel@tonic-gate 			_ceil_prio_waive();
2535*0Sstevel@tonic-gate 	}
2536*0Sstevel@tonic-gate 
2537*0Sstevel@tonic-gate 	self->ul_sp = stkptr();
2538*0Sstevel@tonic-gate 	self->ul_wchan = cvp;
2539*0Sstevel@tonic-gate 	mp->mutex_owner = 0;
2540*0Sstevel@tonic-gate 	mp->mutex_ownerpid = 0;
2541*0Sstevel@tonic-gate 	if (mtype & PTHREAD_PRIO_INHERIT)
2542*0Sstevel@tonic-gate 		mp->mutex_lockw = LOCKCLEAR;
2543*0Sstevel@tonic-gate 	/*
2544*0Sstevel@tonic-gate 	 * ___lwp_cond_wait() returns immediately with EINTR if
2545*0Sstevel@tonic-gate 	 * set_parking_flag(self,0) is called on this lwp before it
2546*0Sstevel@tonic-gate 	 * goes to sleep in the kernel.  sigacthandler() calls this
2547*0Sstevel@tonic-gate 	 * when a deferred signal is noted.  This assures that we don't
2548*0Sstevel@tonic-gate 	 * get stuck in ___lwp_cond_wait() with all signals blocked
2549*0Sstevel@tonic-gate 	 * due to taking a deferred signal before going to sleep.
2550*0Sstevel@tonic-gate 	 */
2551*0Sstevel@tonic-gate 	set_parking_flag(self, 1);
2552*0Sstevel@tonic-gate 	if (self->ul_cursig != 0 ||
2553*0Sstevel@tonic-gate 	    (self->ul_cancelable && self->ul_cancel_pending))
2554*0Sstevel@tonic-gate 		set_parking_flag(self, 0);
2555*0Sstevel@tonic-gate 	error = ___lwp_cond_wait(cvp, mp, tsp, 1);
2556*0Sstevel@tonic-gate 	set_parking_flag(self, 0);
2557*0Sstevel@tonic-gate 	self->ul_sp = 0;
2558*0Sstevel@tonic-gate 	self->ul_wchan = NULL;
2559*0Sstevel@tonic-gate 	return (error);
2560*0Sstevel@tonic-gate }
2561*0Sstevel@tonic-gate 
2562*0Sstevel@tonic-gate int
2563*0Sstevel@tonic-gate cond_wait_kernel(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2564*0Sstevel@tonic-gate {
2565*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2566*0Sstevel@tonic-gate 	int error;
2567*0Sstevel@tonic-gate 	int merror;
2568*0Sstevel@tonic-gate 
2569*0Sstevel@tonic-gate 	/*
2570*0Sstevel@tonic-gate 	 * See the large comment in cond_wait_queue(), above.
2571*0Sstevel@tonic-gate 	 */
2572*0Sstevel@tonic-gate 	if (self->ul_cond_wait_defer)
2573*0Sstevel@tonic-gate 		sigoff(self);
2574*0Sstevel@tonic-gate 
2575*0Sstevel@tonic-gate 	error = cond_sleep_kernel(cvp, mp, tsp);
2576*0Sstevel@tonic-gate 
2577*0Sstevel@tonic-gate 	/*
2578*0Sstevel@tonic-gate 	 * Override the return code from ___lwp_cond_wait()
2579*0Sstevel@tonic-gate 	 * with any non-zero return code from mutex_lock().
2580*0Sstevel@tonic-gate 	 * This addresses robust lock failures in particular;
2581*0Sstevel@tonic-gate 	 * the caller must see the EOWNERDEAD or ENOTRECOVERABLE
2582*0Sstevel@tonic-gate 	 * errors in order to take corrective action.
2583*0Sstevel@tonic-gate 	 */
2584*0Sstevel@tonic-gate 	if ((merror = _private_mutex_lock(mp)) != 0)
2585*0Sstevel@tonic-gate 		error = merror;
2586*0Sstevel@tonic-gate 
2587*0Sstevel@tonic-gate 	/*
2588*0Sstevel@tonic-gate 	 * Take any deferred signal now, after we have reacquired the mutex.
2589*0Sstevel@tonic-gate 	 */
2590*0Sstevel@tonic-gate 	if (self->ul_cond_wait_defer)
2591*0Sstevel@tonic-gate 		sigon(self);
2592*0Sstevel@tonic-gate 
2593*0Sstevel@tonic-gate 	return (error);
2594*0Sstevel@tonic-gate }
2595*0Sstevel@tonic-gate 
2596*0Sstevel@tonic-gate /*
2597*0Sstevel@tonic-gate  * Common code for _cond_wait() and _cond_timedwait()
2598*0Sstevel@tonic-gate  */
2599*0Sstevel@tonic-gate int
2600*0Sstevel@tonic-gate cond_wait_common(cond_t *cvp, mutex_t *mp, timespec_t *tsp)
2601*0Sstevel@tonic-gate {
2602*0Sstevel@tonic-gate 	int mtype = mp->mutex_type;
2603*0Sstevel@tonic-gate 	hrtime_t begin_sleep = 0;
2604*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2605*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2606*0Sstevel@tonic-gate 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
2607*0Sstevel@tonic-gate 	tdb_mutex_stats_t *msp = MUTEX_STATS(mp, udp);
2608*0Sstevel@tonic-gate 	uint8_t rcount;
2609*0Sstevel@tonic-gate 	int error = 0;
2610*0Sstevel@tonic-gate 
2611*0Sstevel@tonic-gate 	/*
2612*0Sstevel@tonic-gate 	 * The SUSV3 Posix spec for pthread_cond_timedwait() states:
2613*0Sstevel@tonic-gate 	 *	Except in the case of [ETIMEDOUT], all these error checks
2614*0Sstevel@tonic-gate 	 *	shall act as if they were performed immediately at the
2615*0Sstevel@tonic-gate 	 *	beginning of processing for the function and shall cause
2616*0Sstevel@tonic-gate 	 *	an error return, in effect, prior to modifying the state
2617*0Sstevel@tonic-gate 	 *	of the mutex specified by mutex or the condition variable
2618*0Sstevel@tonic-gate 	 *	specified by cond.
2619*0Sstevel@tonic-gate 	 * Therefore, we must return EINVAL now if the timout is invalid.
2620*0Sstevel@tonic-gate 	 */
2621*0Sstevel@tonic-gate 	if (tsp != NULL &&
2622*0Sstevel@tonic-gate 	    (tsp->tv_sec < 0 || (ulong_t)tsp->tv_nsec >= NANOSEC))
2623*0Sstevel@tonic-gate 		return (EINVAL);
2624*0Sstevel@tonic-gate 
2625*0Sstevel@tonic-gate 	if (__td_event_report(self, TD_SLEEP, udp)) {
2626*0Sstevel@tonic-gate 		self->ul_sp = stkptr();
2627*0Sstevel@tonic-gate 		self->ul_wchan = cvp;
2628*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventnum = TD_SLEEP;
2629*0Sstevel@tonic-gate 		self->ul_td_evbuf.eventdata = cvp;
2630*0Sstevel@tonic-gate 		tdb_event(TD_SLEEP, udp);
2631*0Sstevel@tonic-gate 		self->ul_sp = 0;
2632*0Sstevel@tonic-gate 	}
2633*0Sstevel@tonic-gate 	if (csp) {
2634*0Sstevel@tonic-gate 		if (tsp)
2635*0Sstevel@tonic-gate 			tdb_incr(csp->cond_timedwait);
2636*0Sstevel@tonic-gate 		else
2637*0Sstevel@tonic-gate 			tdb_incr(csp->cond_wait);
2638*0Sstevel@tonic-gate 	}
2639*0Sstevel@tonic-gate 	if (msp)
2640*0Sstevel@tonic-gate 		begin_sleep = record_hold_time(msp);
2641*0Sstevel@tonic-gate 	else if (csp)
2642*0Sstevel@tonic-gate 		begin_sleep = gethrtime();
2643*0Sstevel@tonic-gate 
2644*0Sstevel@tonic-gate 	if (self->ul_error_detection) {
2645*0Sstevel@tonic-gate 		if (!mutex_is_held(mp))
2646*0Sstevel@tonic-gate 			lock_error(mp, "cond_wait", cvp, NULL);
2647*0Sstevel@tonic-gate 		if ((mtype & LOCK_RECURSIVE) && mp->mutex_rcount != 0)
2648*0Sstevel@tonic-gate 			lock_error(mp, "recursive mutex in cond_wait",
2649*0Sstevel@tonic-gate 				cvp, NULL);
2650*0Sstevel@tonic-gate 		if (cvp->cond_type & USYNC_PROCESS) {
2651*0Sstevel@tonic-gate 			if (!(mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST)))
2652*0Sstevel@tonic-gate 				lock_error(mp, "cond_wait", cvp,
2653*0Sstevel@tonic-gate 					"condvar process-shared, "
2654*0Sstevel@tonic-gate 					"mutex process-private");
2655*0Sstevel@tonic-gate 		} else {
2656*0Sstevel@tonic-gate 			if (mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST))
2657*0Sstevel@tonic-gate 				lock_error(mp, "cond_wait", cvp,
2658*0Sstevel@tonic-gate 					"condvar process-private, "
2659*0Sstevel@tonic-gate 					"mutex process-shared");
2660*0Sstevel@tonic-gate 		}
2661*0Sstevel@tonic-gate 	}
2662*0Sstevel@tonic-gate 
2663*0Sstevel@tonic-gate 	/*
2664*0Sstevel@tonic-gate 	 * We deal with recursive mutexes by completely
2665*0Sstevel@tonic-gate 	 * dropping the lock and restoring the recursion
2666*0Sstevel@tonic-gate 	 * count after waking up.  This is arguably wrong,
2667*0Sstevel@tonic-gate 	 * but it obeys the principle of least astonishment.
2668*0Sstevel@tonic-gate 	 */
2669*0Sstevel@tonic-gate 	rcount = mp->mutex_rcount;
2670*0Sstevel@tonic-gate 	mp->mutex_rcount = 0;
2671*0Sstevel@tonic-gate 	if ((mtype & (USYNC_PROCESS | USYNC_PROCESS_ROBUST |
2672*0Sstevel@tonic-gate 	    PTHREAD_PRIO_INHERIT | PTHREAD_PRIO_PROTECT)) |
2673*0Sstevel@tonic-gate 	    (cvp->cond_type & USYNC_PROCESS))
2674*0Sstevel@tonic-gate 		error = cond_wait_kernel(cvp, mp, tsp);
2675*0Sstevel@tonic-gate 	else
2676*0Sstevel@tonic-gate 		error = cond_wait_queue(cvp, mp, tsp, msp);
2677*0Sstevel@tonic-gate 	mp->mutex_rcount = rcount;
2678*0Sstevel@tonic-gate 
2679*0Sstevel@tonic-gate 	if (csp) {
2680*0Sstevel@tonic-gate 		hrtime_t lapse = gethrtime() - begin_sleep;
2681*0Sstevel@tonic-gate 		if (tsp == NULL)
2682*0Sstevel@tonic-gate 			csp->cond_wait_sleep_time += lapse;
2683*0Sstevel@tonic-gate 		else {
2684*0Sstevel@tonic-gate 			csp->cond_timedwait_sleep_time += lapse;
2685*0Sstevel@tonic-gate 			if (error == ETIME)
2686*0Sstevel@tonic-gate 				tdb_incr(csp->cond_timedwait_timeout);
2687*0Sstevel@tonic-gate 		}
2688*0Sstevel@tonic-gate 	}
2689*0Sstevel@tonic-gate 	return (error);
2690*0Sstevel@tonic-gate }
2691*0Sstevel@tonic-gate 
2692*0Sstevel@tonic-gate /*
2693*0Sstevel@tonic-gate  * cond_wait() is a cancellation point but _cond_wait() is not.
2694*0Sstevel@tonic-gate  * System libraries call the non-cancellation version.
2695*0Sstevel@tonic-gate  * It is expected that only applications call the cancellation version.
2696*0Sstevel@tonic-gate  */
2697*0Sstevel@tonic-gate int
2698*0Sstevel@tonic-gate _cond_wait(cond_t *cvp, mutex_t *mp)
2699*0Sstevel@tonic-gate {
2700*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2701*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2702*0Sstevel@tonic-gate 	uberflags_t *gflags;
2703*0Sstevel@tonic-gate 
2704*0Sstevel@tonic-gate 	/*
2705*0Sstevel@tonic-gate 	 * Optimize the common case of USYNC_THREAD plus
2706*0Sstevel@tonic-gate 	 * no error detection, no lock statistics, and no event tracing.
2707*0Sstevel@tonic-gate 	 */
2708*0Sstevel@tonic-gate 	if ((gflags = self->ul_schedctl_called) != NULL &&
2709*0Sstevel@tonic-gate 	    (cvp->cond_type | mp->mutex_type | gflags->uf_trs_ted |
2710*0Sstevel@tonic-gate 	    self->ul_td_events_enable |
2711*0Sstevel@tonic-gate 	    udp->tdb.tdb_ev_global_mask.event_bits[0]) == 0)
2712*0Sstevel@tonic-gate 		return (cond_wait_queue(cvp, mp, NULL, NULL));
2713*0Sstevel@tonic-gate 
2714*0Sstevel@tonic-gate 	/*
2715*0Sstevel@tonic-gate 	 * Else do it the long way.
2716*0Sstevel@tonic-gate 	 */
2717*0Sstevel@tonic-gate 	return (cond_wait_common(cvp, mp, NULL));
2718*0Sstevel@tonic-gate }
2719*0Sstevel@tonic-gate 
2720*0Sstevel@tonic-gate int
2721*0Sstevel@tonic-gate cond_wait(cond_t *cvp, mutex_t *mp)
2722*0Sstevel@tonic-gate {
2723*0Sstevel@tonic-gate 	int error;
2724*0Sstevel@tonic-gate 
2725*0Sstevel@tonic-gate 	_cancelon();
2726*0Sstevel@tonic-gate 	error = _cond_wait(cvp, mp);
2727*0Sstevel@tonic-gate 	if (error == EINTR)
2728*0Sstevel@tonic-gate 		_canceloff();
2729*0Sstevel@tonic-gate 	else
2730*0Sstevel@tonic-gate 		_canceloff_nocancel();
2731*0Sstevel@tonic-gate 	return (error);
2732*0Sstevel@tonic-gate }
2733*0Sstevel@tonic-gate 
2734*0Sstevel@tonic-gate #pragma weak pthread_cond_wait = _pthread_cond_wait
2735*0Sstevel@tonic-gate int
2736*0Sstevel@tonic-gate _pthread_cond_wait(cond_t *cvp, mutex_t *mp)
2737*0Sstevel@tonic-gate {
2738*0Sstevel@tonic-gate 	int error;
2739*0Sstevel@tonic-gate 
2740*0Sstevel@tonic-gate 	error = cond_wait(cvp, mp);
2741*0Sstevel@tonic-gate 	return ((error == EINTR)? 0 : error);
2742*0Sstevel@tonic-gate }
2743*0Sstevel@tonic-gate 
2744*0Sstevel@tonic-gate /*
2745*0Sstevel@tonic-gate  * cond_timedwait() is a cancellation point but _cond_timedwait() is not.
2746*0Sstevel@tonic-gate  * System libraries call the non-cancellation version.
2747*0Sstevel@tonic-gate  * It is expected that only applications call the cancellation version.
2748*0Sstevel@tonic-gate  */
2749*0Sstevel@tonic-gate int
2750*0Sstevel@tonic-gate _cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
2751*0Sstevel@tonic-gate {
2752*0Sstevel@tonic-gate 	clockid_t clock_id = cvp->cond_clockid;
2753*0Sstevel@tonic-gate 	timespec_t reltime;
2754*0Sstevel@tonic-gate 	int error;
2755*0Sstevel@tonic-gate 
2756*0Sstevel@tonic-gate 	if (clock_id != CLOCK_REALTIME && clock_id != CLOCK_HIGHRES)
2757*0Sstevel@tonic-gate 		clock_id = CLOCK_REALTIME;
2758*0Sstevel@tonic-gate 	abstime_to_reltime(clock_id, abstime, &reltime);
2759*0Sstevel@tonic-gate 	error = cond_wait_common(cvp, mp, &reltime);
2760*0Sstevel@tonic-gate 	if (error == ETIME && clock_id == CLOCK_HIGHRES) {
2761*0Sstevel@tonic-gate 		/*
2762*0Sstevel@tonic-gate 		 * Don't return ETIME if we didn't really get a timeout.
2763*0Sstevel@tonic-gate 		 * This can happen if we return because someone resets
2764*0Sstevel@tonic-gate 		 * the system clock.  Just return zero in this case,
2765*0Sstevel@tonic-gate 		 * giving a spurious wakeup but not a timeout.
2766*0Sstevel@tonic-gate 		 */
2767*0Sstevel@tonic-gate 		if ((hrtime_t)(uint32_t)abstime->tv_sec * NANOSEC +
2768*0Sstevel@tonic-gate 		    abstime->tv_nsec > gethrtime())
2769*0Sstevel@tonic-gate 			error = 0;
2770*0Sstevel@tonic-gate 	}
2771*0Sstevel@tonic-gate 	return (error);
2772*0Sstevel@tonic-gate }
2773*0Sstevel@tonic-gate 
2774*0Sstevel@tonic-gate int
2775*0Sstevel@tonic-gate cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
2776*0Sstevel@tonic-gate {
2777*0Sstevel@tonic-gate 	int error;
2778*0Sstevel@tonic-gate 
2779*0Sstevel@tonic-gate 	_cancelon();
2780*0Sstevel@tonic-gate 	error = _cond_timedwait(cvp, mp, abstime);
2781*0Sstevel@tonic-gate 	if (error == EINTR)
2782*0Sstevel@tonic-gate 		_canceloff();
2783*0Sstevel@tonic-gate 	else
2784*0Sstevel@tonic-gate 		_canceloff_nocancel();
2785*0Sstevel@tonic-gate 	return (error);
2786*0Sstevel@tonic-gate }
2787*0Sstevel@tonic-gate 
2788*0Sstevel@tonic-gate #pragma weak pthread_cond_timedwait = _pthread_cond_timedwait
2789*0Sstevel@tonic-gate int
2790*0Sstevel@tonic-gate _pthread_cond_timedwait(cond_t *cvp, mutex_t *mp, const timespec_t *abstime)
2791*0Sstevel@tonic-gate {
2792*0Sstevel@tonic-gate 	int error;
2793*0Sstevel@tonic-gate 
2794*0Sstevel@tonic-gate 	error = cond_timedwait(cvp, mp, abstime);
2795*0Sstevel@tonic-gate 	if (error == ETIME)
2796*0Sstevel@tonic-gate 		error = ETIMEDOUT;
2797*0Sstevel@tonic-gate 	else if (error == EINTR)
2798*0Sstevel@tonic-gate 		error = 0;
2799*0Sstevel@tonic-gate 	return (error);
2800*0Sstevel@tonic-gate }
2801*0Sstevel@tonic-gate 
2802*0Sstevel@tonic-gate /*
2803*0Sstevel@tonic-gate  * cond_reltimedwait() is a cancellation point but _cond_reltimedwait()
2804*0Sstevel@tonic-gate  * is not.  System libraries call the non-cancellation version.
2805*0Sstevel@tonic-gate  * It is expected that only applications call the cancellation version.
2806*0Sstevel@tonic-gate  */
2807*0Sstevel@tonic-gate int
2808*0Sstevel@tonic-gate _cond_reltimedwait(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
2809*0Sstevel@tonic-gate {
2810*0Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
2811*0Sstevel@tonic-gate 
2812*0Sstevel@tonic-gate 	return (cond_wait_common(cvp, mp, &tslocal));
2813*0Sstevel@tonic-gate }
2814*0Sstevel@tonic-gate 
2815*0Sstevel@tonic-gate #pragma weak cond_reltimedwait = _cond_reltimedwait_cancel
2816*0Sstevel@tonic-gate int
2817*0Sstevel@tonic-gate _cond_reltimedwait_cancel(cond_t *cvp, mutex_t *mp, const timespec_t *reltime)
2818*0Sstevel@tonic-gate {
2819*0Sstevel@tonic-gate 	int error;
2820*0Sstevel@tonic-gate 
2821*0Sstevel@tonic-gate 	_cancelon();
2822*0Sstevel@tonic-gate 	error = _cond_reltimedwait(cvp, mp, reltime);
2823*0Sstevel@tonic-gate 	if (error == EINTR)
2824*0Sstevel@tonic-gate 		_canceloff();
2825*0Sstevel@tonic-gate 	else
2826*0Sstevel@tonic-gate 		_canceloff_nocancel();
2827*0Sstevel@tonic-gate 	return (error);
2828*0Sstevel@tonic-gate }
2829*0Sstevel@tonic-gate 
2830*0Sstevel@tonic-gate #pragma weak pthread_cond_reltimedwait_np = _pthread_cond_reltimedwait_np
2831*0Sstevel@tonic-gate int
2832*0Sstevel@tonic-gate _pthread_cond_reltimedwait_np(cond_t *cvp, mutex_t *mp,
2833*0Sstevel@tonic-gate 	const timespec_t *reltime)
2834*0Sstevel@tonic-gate {
2835*0Sstevel@tonic-gate 	int error;
2836*0Sstevel@tonic-gate 
2837*0Sstevel@tonic-gate 	error = _cond_reltimedwait_cancel(cvp, mp, reltime);
2838*0Sstevel@tonic-gate 	if (error == ETIME)
2839*0Sstevel@tonic-gate 		error = ETIMEDOUT;
2840*0Sstevel@tonic-gate 	else if (error == EINTR)
2841*0Sstevel@tonic-gate 		error = 0;
2842*0Sstevel@tonic-gate 	return (error);
2843*0Sstevel@tonic-gate }
2844*0Sstevel@tonic-gate 
2845*0Sstevel@tonic-gate #pragma weak pthread_cond_signal = cond_signal_internal
2846*0Sstevel@tonic-gate #pragma weak _pthread_cond_signal = cond_signal_internal
2847*0Sstevel@tonic-gate #pragma weak cond_signal = cond_signal_internal
2848*0Sstevel@tonic-gate #pragma weak _cond_signal = cond_signal_internal
2849*0Sstevel@tonic-gate int
2850*0Sstevel@tonic-gate cond_signal_internal(cond_t *cvp)
2851*0Sstevel@tonic-gate {
2852*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2853*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2854*0Sstevel@tonic-gate 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
2855*0Sstevel@tonic-gate 	int error = 0;
2856*0Sstevel@tonic-gate 	queue_head_t *qp;
2857*0Sstevel@tonic-gate 	mutex_t *mp;
2858*0Sstevel@tonic-gate 	queue_head_t *mqp;
2859*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
2860*0Sstevel@tonic-gate 	ulwp_t *ulwp;
2861*0Sstevel@tonic-gate 	ulwp_t *prev = NULL;
2862*0Sstevel@tonic-gate 	ulwp_t *next;
2863*0Sstevel@tonic-gate 	ulwp_t **suspp = NULL;
2864*0Sstevel@tonic-gate 	ulwp_t *susprev;
2865*0Sstevel@tonic-gate 
2866*0Sstevel@tonic-gate 	if (csp)
2867*0Sstevel@tonic-gate 		tdb_incr(csp->cond_signal);
2868*0Sstevel@tonic-gate 
2869*0Sstevel@tonic-gate 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
2870*0Sstevel@tonic-gate 		error = __lwp_cond_signal(cvp);
2871*0Sstevel@tonic-gate 
2872*0Sstevel@tonic-gate 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
2873*0Sstevel@tonic-gate 		return (error);
2874*0Sstevel@tonic-gate 
2875*0Sstevel@tonic-gate 	/*
2876*0Sstevel@tonic-gate 	 * Move someone from the condvar sleep queue to the mutex sleep
2877*0Sstevel@tonic-gate 	 * queue for the mutex that he will acquire on being waked up.
2878*0Sstevel@tonic-gate 	 * We can do this only if we own the mutex he will acquire.
2879*0Sstevel@tonic-gate 	 * If we do not own the mutex, or if his ul_cv_wake flag
2880*0Sstevel@tonic-gate 	 * is set, just dequeue and unpark him.
2881*0Sstevel@tonic-gate 	 */
2882*0Sstevel@tonic-gate 	qp = queue_lock(cvp, CV);
2883*0Sstevel@tonic-gate 	for (ulwpp = &qp->qh_head; (ulwp = *ulwpp) != NULL;
2884*0Sstevel@tonic-gate 	    prev = ulwp, ulwpp = &ulwp->ul_link) {
2885*0Sstevel@tonic-gate 		if (ulwp->ul_wchan == cvp) {
2886*0Sstevel@tonic-gate 			if (!ulwp->ul_stop)
2887*0Sstevel@tonic-gate 				break;
2888*0Sstevel@tonic-gate 			/*
2889*0Sstevel@tonic-gate 			 * Try not to dequeue a suspended thread.
2890*0Sstevel@tonic-gate 			 * This mimics the old libthread's behavior.
2891*0Sstevel@tonic-gate 			 */
2892*0Sstevel@tonic-gate 			if (suspp == NULL) {
2893*0Sstevel@tonic-gate 				suspp = ulwpp;
2894*0Sstevel@tonic-gate 				susprev = prev;
2895*0Sstevel@tonic-gate 			}
2896*0Sstevel@tonic-gate 		}
2897*0Sstevel@tonic-gate 	}
2898*0Sstevel@tonic-gate 	if (ulwp == NULL && suspp != NULL) {
2899*0Sstevel@tonic-gate 		ulwp = *(ulwpp = suspp);
2900*0Sstevel@tonic-gate 		prev = susprev;
2901*0Sstevel@tonic-gate 		suspp = NULL;
2902*0Sstevel@tonic-gate 	}
2903*0Sstevel@tonic-gate 	if (ulwp == NULL) {	/* no one on the sleep queue */
2904*0Sstevel@tonic-gate 		cvp->cond_waiters_user = 0;
2905*0Sstevel@tonic-gate 		queue_unlock(qp);
2906*0Sstevel@tonic-gate 		return (error);
2907*0Sstevel@tonic-gate 	}
2908*0Sstevel@tonic-gate 	/*
2909*0Sstevel@tonic-gate 	 * Scan the remainder of the CV queue for another waiter.
2910*0Sstevel@tonic-gate 	 */
2911*0Sstevel@tonic-gate 	if (suspp != NULL) {
2912*0Sstevel@tonic-gate 		next = *suspp;
2913*0Sstevel@tonic-gate 	} else {
2914*0Sstevel@tonic-gate 		for (next = ulwp->ul_link; next != NULL; next = next->ul_link)
2915*0Sstevel@tonic-gate 			if (next->ul_wchan == cvp)
2916*0Sstevel@tonic-gate 				break;
2917*0Sstevel@tonic-gate 	}
2918*0Sstevel@tonic-gate 	if (next == NULL)
2919*0Sstevel@tonic-gate 		cvp->cond_waiters_user = 0;
2920*0Sstevel@tonic-gate 
2921*0Sstevel@tonic-gate 	/*
2922*0Sstevel@tonic-gate 	 * Inform the thread that he was the recipient of a cond_signal().
2923*0Sstevel@tonic-gate 	 * This lets him deal with cond_signal() and, concurrently,
2924*0Sstevel@tonic-gate 	 * one or more of a cancellation, a UNIX signal, or a timeout.
2925*0Sstevel@tonic-gate 	 * These latter conditions must not consume a cond_signal().
2926*0Sstevel@tonic-gate 	 */
2927*0Sstevel@tonic-gate 	ulwp->ul_signalled = 1;
2928*0Sstevel@tonic-gate 
2929*0Sstevel@tonic-gate 	/*
2930*0Sstevel@tonic-gate 	 * Dequeue the waiter but leave his ul_sleepq non-NULL
2931*0Sstevel@tonic-gate 	 * while we move him to the mutex queue so that he can
2932*0Sstevel@tonic-gate 	 * deal properly with spurious wakeups.
2933*0Sstevel@tonic-gate 	 */
2934*0Sstevel@tonic-gate 	*ulwpp = ulwp->ul_link;
2935*0Sstevel@tonic-gate 	if (qp->qh_tail == ulwp)
2936*0Sstevel@tonic-gate 		qp->qh_tail = prev;
2937*0Sstevel@tonic-gate 	qp->qh_qlen--;
2938*0Sstevel@tonic-gate 	ulwp->ul_link = NULL;
2939*0Sstevel@tonic-gate 
2940*0Sstevel@tonic-gate 	mp = ulwp->ul_cvmutex;		/* the mutex he will acquire */
2941*0Sstevel@tonic-gate 	ulwp->ul_cvmutex = NULL;
2942*0Sstevel@tonic-gate 	ASSERT(mp != NULL);
2943*0Sstevel@tonic-gate 
2944*0Sstevel@tonic-gate 	if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
2945*0Sstevel@tonic-gate 		lwpid_t lwpid = ulwp->ul_lwpid;
2946*0Sstevel@tonic-gate 
2947*0Sstevel@tonic-gate 		no_preempt(self);
2948*0Sstevel@tonic-gate 		ulwp->ul_sleepq = NULL;
2949*0Sstevel@tonic-gate 		ulwp->ul_wchan = NULL;
2950*0Sstevel@tonic-gate 		ulwp->ul_cv_wake = 0;
2951*0Sstevel@tonic-gate 		queue_unlock(qp);
2952*0Sstevel@tonic-gate 		(void) __lwp_unpark(lwpid);
2953*0Sstevel@tonic-gate 		preempt(self);
2954*0Sstevel@tonic-gate 	} else {
2955*0Sstevel@tonic-gate 		mqp = queue_lock(mp, MX);
2956*0Sstevel@tonic-gate 		enqueue(mqp, ulwp, mp, MX);
2957*0Sstevel@tonic-gate 		mp->mutex_waiters = 1;
2958*0Sstevel@tonic-gate 		queue_unlock(mqp);
2959*0Sstevel@tonic-gate 		queue_unlock(qp);
2960*0Sstevel@tonic-gate 	}
2961*0Sstevel@tonic-gate 
2962*0Sstevel@tonic-gate 	return (error);
2963*0Sstevel@tonic-gate }
2964*0Sstevel@tonic-gate 
2965*0Sstevel@tonic-gate #define	MAXLWPS	128	/* max remembered lwpids before overflow */
2966*0Sstevel@tonic-gate #define	NEWLWPS	2048	/* max remembered lwpids at first overflow */
2967*0Sstevel@tonic-gate 
2968*0Sstevel@tonic-gate #pragma weak pthread_cond_broadcast = cond_broadcast_internal
2969*0Sstevel@tonic-gate #pragma weak _pthread_cond_broadcast = cond_broadcast_internal
2970*0Sstevel@tonic-gate #pragma weak cond_broadcast = cond_broadcast_internal
2971*0Sstevel@tonic-gate #pragma weak _cond_broadcast = cond_broadcast_internal
2972*0Sstevel@tonic-gate int
2973*0Sstevel@tonic-gate cond_broadcast_internal(cond_t *cvp)
2974*0Sstevel@tonic-gate {
2975*0Sstevel@tonic-gate 	ulwp_t *self = curthread;
2976*0Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
2977*0Sstevel@tonic-gate 	tdb_cond_stats_t *csp = COND_STATS(cvp, udp);
2978*0Sstevel@tonic-gate 	int error = 0;
2979*0Sstevel@tonic-gate 	queue_head_t *qp;
2980*0Sstevel@tonic-gate 	mutex_t *mp;
2981*0Sstevel@tonic-gate 	queue_head_t *mqp;
2982*0Sstevel@tonic-gate 	mutex_t *mp_cache = NULL;
2983*0Sstevel@tonic-gate 	queue_head_t *mqp_cache = NULL;
2984*0Sstevel@tonic-gate 	ulwp_t **ulwpp;
2985*0Sstevel@tonic-gate 	ulwp_t *ulwp;
2986*0Sstevel@tonic-gate 	ulwp_t *prev = NULL;
2987*0Sstevel@tonic-gate 	lwpid_t buffer[MAXLWPS];
2988*0Sstevel@tonic-gate 	lwpid_t *lwpid = buffer;
2989*0Sstevel@tonic-gate 	int nlwpid = 0;
2990*0Sstevel@tonic-gate 	int maxlwps = MAXLWPS;
2991*0Sstevel@tonic-gate 
2992*0Sstevel@tonic-gate 	if (csp)
2993*0Sstevel@tonic-gate 		tdb_incr(csp->cond_broadcast);
2994*0Sstevel@tonic-gate 
2995*0Sstevel@tonic-gate 	if (cvp->cond_waiters_kernel)	/* someone sleeping in the kernel? */
2996*0Sstevel@tonic-gate 		error = __lwp_cond_broadcast(cvp);
2997*0Sstevel@tonic-gate 
2998*0Sstevel@tonic-gate 	if (!cvp->cond_waiters_user)	/* no one sleeping at user-level */
2999*0Sstevel@tonic-gate 		return (error);
3000*0Sstevel@tonic-gate 
3001*0Sstevel@tonic-gate 	/*
3002*0Sstevel@tonic-gate 	 * Move everyone from the condvar sleep queue to the mutex sleep
3003*0Sstevel@tonic-gate 	 * queue for the mutex that they will acquire on being waked up.
3004*0Sstevel@tonic-gate 	 * We can do this only if we own the mutex they will acquire.
3005*0Sstevel@tonic-gate 	 * If we do not own the mutex, or if their ul_cv_wake flag
3006*0Sstevel@tonic-gate 	 * is set, just dequeue and unpark them.
3007*0Sstevel@tonic-gate 	 *
3008*0Sstevel@tonic-gate 	 * We keep track of lwpids that are to be unparked in lwpid[].
3009*0Sstevel@tonic-gate 	 * __lwp_unpark_all() is called to unpark all of them after
3010*0Sstevel@tonic-gate 	 * they have been removed from the sleep queue and the sleep
3011*0Sstevel@tonic-gate 	 * queue lock has been dropped.  If we run out of space in our
3012*0Sstevel@tonic-gate 	 * on-stack buffer, we need to allocate more but we can't call
3013*0Sstevel@tonic-gate 	 * lmalloc() because we are holding a queue lock when the overflow
3014*0Sstevel@tonic-gate 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
3015*0Sstevel@tonic-gate 	 * either because the application may have allocated a small stack
3016*0Sstevel@tonic-gate 	 * and we don't want to overrun the stack.  So we use the mmap()
3017*0Sstevel@tonic-gate 	 * system call directly since that path acquires no locks.
3018*0Sstevel@tonic-gate 	 */
3019*0Sstevel@tonic-gate 	qp = queue_lock(cvp, CV);
3020*0Sstevel@tonic-gate 	cvp->cond_waiters_user = 0;
3021*0Sstevel@tonic-gate 	ulwpp = &qp->qh_head;
3022*0Sstevel@tonic-gate 	while ((ulwp = *ulwpp) != NULL) {
3023*0Sstevel@tonic-gate 
3024*0Sstevel@tonic-gate 		if (ulwp->ul_wchan != cvp) {
3025*0Sstevel@tonic-gate 			prev = ulwp;
3026*0Sstevel@tonic-gate 			ulwpp = &ulwp->ul_link;
3027*0Sstevel@tonic-gate 			continue;
3028*0Sstevel@tonic-gate 		}
3029*0Sstevel@tonic-gate 
3030*0Sstevel@tonic-gate 		*ulwpp = ulwp->ul_link;
3031*0Sstevel@tonic-gate 		if (qp->qh_tail == ulwp)
3032*0Sstevel@tonic-gate 			qp->qh_tail = prev;
3033*0Sstevel@tonic-gate 		qp->qh_qlen--;
3034*0Sstevel@tonic-gate 		ulwp->ul_link = NULL;
3035*0Sstevel@tonic-gate 
3036*0Sstevel@tonic-gate 		mp = ulwp->ul_cvmutex;		/* his mutex */
3037*0Sstevel@tonic-gate 		ulwp->ul_cvmutex = NULL;
3038*0Sstevel@tonic-gate 		ASSERT(mp != NULL);
3039*0Sstevel@tonic-gate 
3040*0Sstevel@tonic-gate 		if (ulwp->ul_cv_wake || !MUTEX_OWNED(mp, self)) {
3041*0Sstevel@tonic-gate 			ulwp->ul_sleepq = NULL;
3042*0Sstevel@tonic-gate 			ulwp->ul_wchan = NULL;
3043*0Sstevel@tonic-gate 			ulwp->ul_cv_wake = 0;
3044*0Sstevel@tonic-gate 			if (nlwpid == maxlwps) {
3045*0Sstevel@tonic-gate 				/*
3046*0Sstevel@tonic-gate 				 * Allocate NEWLWPS ids on the first overflow.
3047*0Sstevel@tonic-gate 				 * Double the allocation each time after that.
3048*0Sstevel@tonic-gate 				 */
3049*0Sstevel@tonic-gate 				int newlwps = (lwpid == buffer)? NEWLWPS :
3050*0Sstevel@tonic-gate 						2 * maxlwps;
3051*0Sstevel@tonic-gate 				void *vaddr = _private_mmap(NULL,
3052*0Sstevel@tonic-gate 					newlwps * sizeof (lwpid_t),
3053*0Sstevel@tonic-gate 					PROT_READ|PROT_WRITE,
3054*0Sstevel@tonic-gate 					MAP_PRIVATE|MAP_ANON, -1, (off_t)0);
3055*0Sstevel@tonic-gate 				if (vaddr == MAP_FAILED) {
3056*0Sstevel@tonic-gate 					/*
3057*0Sstevel@tonic-gate 					 * Let's hope this never happens.
3058*0Sstevel@tonic-gate 					 * If it does, then we have a terrible
3059*0Sstevel@tonic-gate 					 * thundering herd on our hands.
3060*0Sstevel@tonic-gate 					 */
3061*0Sstevel@tonic-gate 					(void) __lwp_unpark_all(lwpid, nlwpid);
3062*0Sstevel@tonic-gate 					nlwpid = 0;
3063*0Sstevel@tonic-gate 				} else {
3064*0Sstevel@tonic-gate 					(void) _memcpy(vaddr, lwpid,
3065*0Sstevel@tonic-gate 						maxlwps * sizeof (lwpid_t));
3066*0Sstevel@tonic-gate 					if (lwpid != buffer)
3067*0Sstevel@tonic-gate 						(void) _private_munmap(lwpid,
3068*0Sstevel@tonic-gate 						    maxlwps * sizeof (lwpid_t));
3069*0Sstevel@tonic-gate 					lwpid = vaddr;
3070*0Sstevel@tonic-gate 					maxlwps = newlwps;
3071*0Sstevel@tonic-gate 				}
3072*0Sstevel@tonic-gate 			}
3073*0Sstevel@tonic-gate 			lwpid[nlwpid++] = ulwp->ul_lwpid;
3074*0Sstevel@tonic-gate 		} else {
3075*0Sstevel@tonic-gate 			if (mp != mp_cache) {
3076*0Sstevel@tonic-gate 				if (mqp_cache != NULL)
3077*0Sstevel@tonic-gate 					queue_unlock(mqp_cache);
3078*0Sstevel@tonic-gate 				mqp_cache = queue_lock(mp, MX);
3079*0Sstevel@tonic-gate 				mp_cache = mp;
3080*0Sstevel@tonic-gate 			}
3081*0Sstevel@tonic-gate 			mqp = mqp_cache;
3082*0Sstevel@tonic-gate 			enqueue(mqp, ulwp, mp, MX);
3083*0Sstevel@tonic-gate 			mp->mutex_waiters = 1;
3084*0Sstevel@tonic-gate 		}
3085*0Sstevel@tonic-gate 	}
3086*0Sstevel@tonic-gate 	if (mqp_cache != NULL)
3087*0Sstevel@tonic-gate 		queue_unlock(mqp_cache);
3088*0Sstevel@tonic-gate 	queue_unlock(qp);
3089*0Sstevel@tonic-gate 	if (nlwpid) {
3090*0Sstevel@tonic-gate 		if (nlwpid == 1)
3091*0Sstevel@tonic-gate 			(void) __lwp_unpark(lwpid[0]);
3092*0Sstevel@tonic-gate 		else
3093*0Sstevel@tonic-gate 			(void) __lwp_unpark_all(lwpid, nlwpid);
3094*0Sstevel@tonic-gate 	}
3095*0Sstevel@tonic-gate 	if (lwpid != buffer)
3096*0Sstevel@tonic-gate 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
3097*0Sstevel@tonic-gate 
3098*0Sstevel@tonic-gate 	return (error);
3099*0Sstevel@tonic-gate }
3100*0Sstevel@tonic-gate 
3101*0Sstevel@tonic-gate #pragma weak pthread_cond_destroy = _cond_destroy
3102*0Sstevel@tonic-gate #pragma weak _pthread_cond_destroy = _cond_destroy
3103*0Sstevel@tonic-gate #pragma weak cond_destroy = _cond_destroy
3104*0Sstevel@tonic-gate int
3105*0Sstevel@tonic-gate _cond_destroy(cond_t *cvp)
3106*0Sstevel@tonic-gate {
3107*0Sstevel@tonic-gate 	cvp->cond_magic = 0;
3108*0Sstevel@tonic-gate 	tdb_sync_obj_deregister(cvp);
3109*0Sstevel@tonic-gate 	return (0);
3110*0Sstevel@tonic-gate }
3111*0Sstevel@tonic-gate 
3112*0Sstevel@tonic-gate #if defined(THREAD_DEBUG)
3113*0Sstevel@tonic-gate void
3114*0Sstevel@tonic-gate assert_no_libc_locks_held(void)
3115*0Sstevel@tonic-gate {
3116*0Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
3117*0Sstevel@tonic-gate }
3118*0Sstevel@tonic-gate #endif
3119*0Sstevel@tonic-gate 
3120*0Sstevel@tonic-gate /* protected by link_lock */
3121*0Sstevel@tonic-gate uint64_t spin_lock_spin;
3122*0Sstevel@tonic-gate uint64_t spin_lock_spin2;
3123*0Sstevel@tonic-gate uint64_t spin_lock_sleep;
3124*0Sstevel@tonic-gate uint64_t spin_lock_wakeup;
3125*0Sstevel@tonic-gate 
3126*0Sstevel@tonic-gate /*
3127*0Sstevel@tonic-gate  * Record spin lock statistics.
3128*0Sstevel@tonic-gate  * Called by a thread exiting itself in thrp_exit().
3129*0Sstevel@tonic-gate  * Also called via atexit() from the thread calling
3130*0Sstevel@tonic-gate  * exit() to do all the other threads as well.
3131*0Sstevel@tonic-gate  */
3132*0Sstevel@tonic-gate void
3133*0Sstevel@tonic-gate record_spin_locks(ulwp_t *ulwp)
3134*0Sstevel@tonic-gate {
3135*0Sstevel@tonic-gate 	spin_lock_spin += ulwp->ul_spin_lock_spin;
3136*0Sstevel@tonic-gate 	spin_lock_spin2 += ulwp->ul_spin_lock_spin2;
3137*0Sstevel@tonic-gate 	spin_lock_sleep += ulwp->ul_spin_lock_sleep;
3138*0Sstevel@tonic-gate 	spin_lock_wakeup += ulwp->ul_spin_lock_wakeup;
3139*0Sstevel@tonic-gate 	ulwp->ul_spin_lock_spin = 0;
3140*0Sstevel@tonic-gate 	ulwp->ul_spin_lock_spin2 = 0;
3141*0Sstevel@tonic-gate 	ulwp->ul_spin_lock_sleep = 0;
3142*0Sstevel@tonic-gate 	ulwp->ul_spin_lock_wakeup = 0;
3143*0Sstevel@tonic-gate }
3144*0Sstevel@tonic-gate 
3145*0Sstevel@tonic-gate /*
3146*0Sstevel@tonic-gate  * atexit function:  dump the queue statistics to stderr.
3147*0Sstevel@tonic-gate  */
3148*0Sstevel@tonic-gate #include <stdio.h>
3149*0Sstevel@tonic-gate void
3150*0Sstevel@tonic-gate dump_queue_statistics(void)
3151*0Sstevel@tonic-gate {
3152*0Sstevel@tonic-gate 	uberdata_t *udp = curthread->ul_uberdata;
3153*0Sstevel@tonic-gate 	queue_head_t *qp;
3154*0Sstevel@tonic-gate 	int qn;
3155*0Sstevel@tonic-gate 	uint64_t spin_lock_total = 0;
3156*0Sstevel@tonic-gate 
3157*0Sstevel@tonic-gate 	if (udp->queue_head == NULL || thread_queue_dump == 0)
3158*0Sstevel@tonic-gate 		return;
3159*0Sstevel@tonic-gate 
3160*0Sstevel@tonic-gate 	if (fprintf(stderr, "\n%5d mutex queues:\n", QHASHSIZE) < 0 ||
3161*0Sstevel@tonic-gate 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3162*0Sstevel@tonic-gate 		return;
3163*0Sstevel@tonic-gate 	for (qn = 0, qp = udp->queue_head; qn < QHASHSIZE; qn++, qp++) {
3164*0Sstevel@tonic-gate 		if (qp->qh_lockcount == 0)
3165*0Sstevel@tonic-gate 			continue;
3166*0Sstevel@tonic-gate 		spin_lock_total += qp->qh_lockcount;
3167*0Sstevel@tonic-gate 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3168*0Sstevel@tonic-gate 			(u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3169*0Sstevel@tonic-gate 				return;
3170*0Sstevel@tonic-gate 	}
3171*0Sstevel@tonic-gate 
3172*0Sstevel@tonic-gate 	if (fprintf(stderr, "\n%5d condvar queues:\n", QHASHSIZE) < 0 ||
3173*0Sstevel@tonic-gate 	    fprintf(stderr, "queue#   lockcount    max qlen\n") < 0)
3174*0Sstevel@tonic-gate 		return;
3175*0Sstevel@tonic-gate 	for (qn = 0; qn < QHASHSIZE; qn++, qp++) {
3176*0Sstevel@tonic-gate 		if (qp->qh_lockcount == 0)
3177*0Sstevel@tonic-gate 			continue;
3178*0Sstevel@tonic-gate 		spin_lock_total += qp->qh_lockcount;
3179*0Sstevel@tonic-gate 		if (fprintf(stderr, "%5d %12llu%12u\n", qn,
3180*0Sstevel@tonic-gate 			(u_longlong_t)qp->qh_lockcount, qp->qh_qmax) < 0)
3181*0Sstevel@tonic-gate 				return;
3182*0Sstevel@tonic-gate 	}
3183*0Sstevel@tonic-gate 
3184*0Sstevel@tonic-gate 	(void) fprintf(stderr, "\n  spin_lock_total  = %10llu\n",
3185*0Sstevel@tonic-gate 		(u_longlong_t)spin_lock_total);
3186*0Sstevel@tonic-gate 	(void) fprintf(stderr, "  spin_lock_spin   = %10llu\n",
3187*0Sstevel@tonic-gate 		(u_longlong_t)spin_lock_spin);
3188*0Sstevel@tonic-gate 	(void) fprintf(stderr, "  spin_lock_spin2  = %10llu\n",
3189*0Sstevel@tonic-gate 		(u_longlong_t)spin_lock_spin2);
3190*0Sstevel@tonic-gate 	(void) fprintf(stderr, "  spin_lock_sleep  = %10llu\n",
3191*0Sstevel@tonic-gate 		(u_longlong_t)spin_lock_sleep);
3192*0Sstevel@tonic-gate 	(void) fprintf(stderr, "  spin_lock_wakeup = %10llu\n",
3193*0Sstevel@tonic-gate 		(u_longlong_t)spin_lock_wakeup);
3194*0Sstevel@tonic-gate }
3195