xref: /onnv-gate/usr/src/lib/libc/port/threads/rwlock.c (revision 4570:f93b74ddbdd5)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*4570Sraf  * Common Development and Distribution License (the "License").
6*4570Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
21*4570Sraf 
220Sstevel@tonic-gate /*
23*4570Sraf  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate #include "lint.h"
300Sstevel@tonic-gate #include "thr_uberdata.h"
310Sstevel@tonic-gate #include <sys/sdt.h>
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #define	TRY_FLAG		0x10
340Sstevel@tonic-gate #define	READ_LOCK		0
350Sstevel@tonic-gate #define	WRITE_LOCK		1
360Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
370Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
380Sstevel@tonic-gate 
390Sstevel@tonic-gate #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
400Sstevel@tonic-gate 
41*4570Sraf #define	ASSERT_CONSISTENT_STATE(readers)		\
42*4570Sraf 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
43*4570Sraf 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
44*4570Sraf 
450Sstevel@tonic-gate /*
460Sstevel@tonic-gate  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
47*4570Sraf  * We must be deferring signals for this to be safe.
48*4570Sraf  * Else if we are returning an entry with ul_rdlocks == 0,
49*4570Sraf  * it could be reassigned behind our back in a signal handler.
500Sstevel@tonic-gate  */
510Sstevel@tonic-gate static readlock_t *
520Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp)
530Sstevel@tonic-gate {
540Sstevel@tonic-gate 	ulwp_t *self = curthread;
550Sstevel@tonic-gate 	readlock_t *remembered = NULL;
560Sstevel@tonic-gate 	readlock_t *readlockp;
570Sstevel@tonic-gate 	uint_t nlocks;
580Sstevel@tonic-gate 
59*4570Sraf 	/* we must be deferring signals */
60*4570Sraf 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
61*4570Sraf 
620Sstevel@tonic-gate 	if ((nlocks = self->ul_rdlocks) != 0)
630Sstevel@tonic-gate 		readlockp = self->ul_readlock.array;
640Sstevel@tonic-gate 	else {
650Sstevel@tonic-gate 		nlocks = 1;
660Sstevel@tonic-gate 		readlockp = &self->ul_readlock.single;
670Sstevel@tonic-gate 	}
680Sstevel@tonic-gate 
690Sstevel@tonic-gate 	for (; nlocks; nlocks--, readlockp++) {
700Sstevel@tonic-gate 		if (readlockp->rd_rwlock == rwlp)
710Sstevel@tonic-gate 			return (readlockp);
720Sstevel@tonic-gate 		if (readlockp->rd_count == 0 && remembered == NULL)
730Sstevel@tonic-gate 			remembered = readlockp;
740Sstevel@tonic-gate 	}
750Sstevel@tonic-gate 	if (remembered != NULL) {
760Sstevel@tonic-gate 		remembered->rd_rwlock = rwlp;
770Sstevel@tonic-gate 		return (remembered);
780Sstevel@tonic-gate 	}
790Sstevel@tonic-gate 
800Sstevel@tonic-gate 	/*
810Sstevel@tonic-gate 	 * No entry available.  Allocate more space, converting the single
820Sstevel@tonic-gate 	 * readlock_t entry into an array of readlock_t entries if necessary.
830Sstevel@tonic-gate 	 */
840Sstevel@tonic-gate 	if ((nlocks = self->ul_rdlocks) == 0) {
850Sstevel@tonic-gate 		/*
860Sstevel@tonic-gate 		 * Initial allocation of the readlock_t array.
870Sstevel@tonic-gate 		 * Convert the single entry into an array.
880Sstevel@tonic-gate 		 */
890Sstevel@tonic-gate 		self->ul_rdlocks = nlocks = NLOCKS;
900Sstevel@tonic-gate 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
910Sstevel@tonic-gate 		/*
920Sstevel@tonic-gate 		 * The single readlock_t becomes the first entry in the array.
930Sstevel@tonic-gate 		 */
940Sstevel@tonic-gate 		*readlockp = self->ul_readlock.single;
950Sstevel@tonic-gate 		self->ul_readlock.single.rd_count = 0;
960Sstevel@tonic-gate 		self->ul_readlock.array = readlockp;
970Sstevel@tonic-gate 		/*
980Sstevel@tonic-gate 		 * Return the next available entry in the array.
990Sstevel@tonic-gate 		 */
1000Sstevel@tonic-gate 		(++readlockp)->rd_rwlock = rwlp;
1010Sstevel@tonic-gate 		return (readlockp);
1020Sstevel@tonic-gate 	}
1030Sstevel@tonic-gate 	/*
1040Sstevel@tonic-gate 	 * Reallocate the array, double the size each time.
1050Sstevel@tonic-gate 	 */
1060Sstevel@tonic-gate 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
1070Sstevel@tonic-gate 	(void) _memcpy(readlockp, self->ul_readlock.array,
1080Sstevel@tonic-gate 		nlocks * sizeof (readlock_t));
1090Sstevel@tonic-gate 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
1100Sstevel@tonic-gate 	self->ul_readlock.array = readlockp;
1110Sstevel@tonic-gate 	self->ul_rdlocks *= 2;
1120Sstevel@tonic-gate 	/*
1130Sstevel@tonic-gate 	 * Return the next available entry in the newly allocated array.
1140Sstevel@tonic-gate 	 */
1150Sstevel@tonic-gate 	(readlockp += nlocks)->rd_rwlock = rwlp;
1160Sstevel@tonic-gate 	return (readlockp);
1170Sstevel@tonic-gate }
1180Sstevel@tonic-gate 
1190Sstevel@tonic-gate /*
1200Sstevel@tonic-gate  * Free the array of rwlocks held for reading.
1210Sstevel@tonic-gate  */
1220Sstevel@tonic-gate void
1230Sstevel@tonic-gate rwl_free(ulwp_t *ulwp)
1240Sstevel@tonic-gate {
1250Sstevel@tonic-gate 	uint_t nlocks;
1260Sstevel@tonic-gate 
1270Sstevel@tonic-gate 	if ((nlocks = ulwp->ul_rdlocks) != 0)
1280Sstevel@tonic-gate 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
1290Sstevel@tonic-gate 	ulwp->ul_rdlocks = 0;
1300Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_rwlock = NULL;
1310Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_count = 0;
1320Sstevel@tonic-gate }
1330Sstevel@tonic-gate 
1340Sstevel@tonic-gate /*
1350Sstevel@tonic-gate  * Check if a reader version of the lock is held by the current thread.
1360Sstevel@tonic-gate  * rw_read_is_held() is private to libc.
1370Sstevel@tonic-gate  */
1380Sstevel@tonic-gate #pragma weak rw_read_is_held = _rw_read_held
1390Sstevel@tonic-gate #pragma weak rw_read_held = _rw_read_held
1400Sstevel@tonic-gate int
1410Sstevel@tonic-gate _rw_read_held(rwlock_t *rwlp)
1420Sstevel@tonic-gate {
143*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
144*4570Sraf 	uint32_t readers;
145*4570Sraf 	ulwp_t *self = curthread;
1460Sstevel@tonic-gate 	readlock_t *readlockp;
1470Sstevel@tonic-gate 	uint_t nlocks;
148*4570Sraf 	int rval = 0;
1490Sstevel@tonic-gate 
150*4570Sraf 	no_preempt(self);
151*4570Sraf 
152*4570Sraf 	readers = *rwstate;
153*4570Sraf 	ASSERT_CONSISTENT_STATE(readers);
154*4570Sraf 	if (!(readers & URW_WRITE_LOCKED) &&
155*4570Sraf 	    (readers & URW_READERS_MASK) != 0) {
156*4570Sraf 		/*
157*4570Sraf 		 * The lock is held for reading by some thread.
158*4570Sraf 		 * Search our array of rwlocks held for reading for a match.
159*4570Sraf 		 */
160*4570Sraf 		if ((nlocks = self->ul_rdlocks) != 0)
161*4570Sraf 			readlockp = self->ul_readlock.array;
162*4570Sraf 		else {
163*4570Sraf 			nlocks = 1;
164*4570Sraf 			readlockp = &self->ul_readlock.single;
165*4570Sraf 		}
166*4570Sraf 		for (; nlocks; nlocks--, readlockp++) {
167*4570Sraf 			if (readlockp->rd_rwlock == rwlp) {
168*4570Sraf 				if (readlockp->rd_count)
169*4570Sraf 					rval = 1;
170*4570Sraf 				break;
171*4570Sraf 			}
172*4570Sraf 		}
1730Sstevel@tonic-gate 	}
1740Sstevel@tonic-gate 
175*4570Sraf 	preempt(self);
176*4570Sraf 	return (rval);
1770Sstevel@tonic-gate }
1780Sstevel@tonic-gate 
1790Sstevel@tonic-gate /*
1800Sstevel@tonic-gate  * Check if a writer version of the lock is held by the current thread.
1810Sstevel@tonic-gate  * rw_write_is_held() is private to libc.
1820Sstevel@tonic-gate  */
1830Sstevel@tonic-gate #pragma weak rw_write_is_held = _rw_write_held
1840Sstevel@tonic-gate #pragma weak rw_write_held = _rw_write_held
1850Sstevel@tonic-gate int
1860Sstevel@tonic-gate _rw_write_held(rwlock_t *rwlp)
1870Sstevel@tonic-gate {
188*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
189*4570Sraf 	uint32_t readers;
1900Sstevel@tonic-gate 	ulwp_t *self = curthread;
191*4570Sraf 	int rval;
192*4570Sraf 
193*4570Sraf 	no_preempt(self);
1940Sstevel@tonic-gate 
195*4570Sraf 	readers = *rwstate;
196*4570Sraf 	ASSERT_CONSISTENT_STATE(readers);
197*4570Sraf 	rval = ((readers & URW_WRITE_LOCKED) &&
198*4570Sraf 	    rwlp->rwlock_owner == (uintptr_t)self &&
199*4570Sraf 	    (rwlp->rwlock_type == USYNC_THREAD ||
200*4570Sraf 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
2010Sstevel@tonic-gate 
202*4570Sraf 	preempt(self);
203*4570Sraf 	return (rval);
2040Sstevel@tonic-gate }
2050Sstevel@tonic-gate 
2060Sstevel@tonic-gate #pragma weak rwlock_init = __rwlock_init
2070Sstevel@tonic-gate #pragma weak _rwlock_init = __rwlock_init
2080Sstevel@tonic-gate /* ARGSUSED2 */
2090Sstevel@tonic-gate int
2100Sstevel@tonic-gate __rwlock_init(rwlock_t *rwlp, int type, void *arg)
2110Sstevel@tonic-gate {
2120Sstevel@tonic-gate 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2130Sstevel@tonic-gate 		return (EINVAL);
2140Sstevel@tonic-gate 	/*
2150Sstevel@tonic-gate 	 * Once reinitialized, we can no longer be holding a read or write lock.
2160Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2170Sstevel@tonic-gate 	 */
218*4570Sraf 	sigoff(curthread);
219*4570Sraf 	rwl_entry(rwlp)->rd_count = 0;
220*4570Sraf 	sigon(curthread);
2210Sstevel@tonic-gate 	(void) _memset(rwlp, 0, sizeof (*rwlp));
2220Sstevel@tonic-gate 	rwlp->rwlock_type = (uint16_t)type;
2230Sstevel@tonic-gate 	rwlp->rwlock_magic = RWL_MAGIC;
2240Sstevel@tonic-gate 	rwlp->mutex.mutex_type = (uint8_t)type;
2250Sstevel@tonic-gate 	rwlp->mutex.mutex_flag = LOCK_INITED;
2260Sstevel@tonic-gate 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
2270Sstevel@tonic-gate 	return (0);
2280Sstevel@tonic-gate }
2290Sstevel@tonic-gate 
2300Sstevel@tonic-gate #pragma weak rwlock_destroy = __rwlock_destroy
2310Sstevel@tonic-gate #pragma weak _rwlock_destroy = __rwlock_destroy
2320Sstevel@tonic-gate #pragma weak pthread_rwlock_destroy = __rwlock_destroy
2330Sstevel@tonic-gate #pragma weak _pthread_rwlock_destroy = __rwlock_destroy
2340Sstevel@tonic-gate int
2350Sstevel@tonic-gate __rwlock_destroy(rwlock_t *rwlp)
2360Sstevel@tonic-gate {
2370Sstevel@tonic-gate 	/*
2380Sstevel@tonic-gate 	 * Once destroyed, we can no longer be holding a read or write lock.
2390Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2400Sstevel@tonic-gate 	 */
241*4570Sraf 	sigoff(curthread);
242*4570Sraf 	rwl_entry(rwlp)->rd_count = 0;
243*4570Sraf 	sigon(curthread);
2440Sstevel@tonic-gate 	rwlp->rwlock_magic = 0;
2450Sstevel@tonic-gate 	tdb_sync_obj_deregister(rwlp);
2460Sstevel@tonic-gate 	return (0);
2470Sstevel@tonic-gate }
2480Sstevel@tonic-gate 
2490Sstevel@tonic-gate /*
250*4570Sraf  * Attempt to acquire a readers lock.  Return true on success.
251*4570Sraf  */
252*4570Sraf static int
253*4570Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
254*4570Sraf {
255*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
256*4570Sraf 	uint32_t mask = ignore_waiters_flag?
257*4570Sraf 		URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
258*4570Sraf 	uint32_t readers;
259*4570Sraf 	ulwp_t *self = curthread;
260*4570Sraf 
261*4570Sraf 	no_preempt(self);
262*4570Sraf 	while (((readers = *rwstate) & mask) == 0) {
263*4570Sraf 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
264*4570Sraf 			preempt(self);
265*4570Sraf 			return (1);
266*4570Sraf 		}
267*4570Sraf 	}
268*4570Sraf 	preempt(self);
269*4570Sraf 	return (0);
270*4570Sraf }
271*4570Sraf 
272*4570Sraf /*
273*4570Sraf  * Attempt to release a reader lock.  Return true on success.
274*4570Sraf  */
275*4570Sraf static int
276*4570Sraf read_unlock_try(rwlock_t *rwlp)
277*4570Sraf {
278*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
279*4570Sraf 	uint32_t readers;
280*4570Sraf 	ulwp_t *self = curthread;
281*4570Sraf 
282*4570Sraf 	no_preempt(self);
283*4570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
284*4570Sraf 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
285*4570Sraf 			preempt(self);
286*4570Sraf 			return (1);
287*4570Sraf 		}
288*4570Sraf 	}
289*4570Sraf 	preempt(self);
290*4570Sraf 	return (0);
291*4570Sraf }
292*4570Sraf 
293*4570Sraf /*
294*4570Sraf  * Attempt to acquire a writer lock.  Return true on success.
295*4570Sraf  */
296*4570Sraf static int
297*4570Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
298*4570Sraf {
299*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
300*4570Sraf 	uint32_t mask = ignore_waiters_flag?
301*4570Sraf 		(URW_WRITE_LOCKED | URW_READERS_MASK) :
302*4570Sraf 		(URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
303*4570Sraf 	ulwp_t *self = curthread;
304*4570Sraf 	uint32_t readers;
305*4570Sraf 
306*4570Sraf 	no_preempt(self);
307*4570Sraf 	while (((readers = *rwstate) & mask) == 0) {
308*4570Sraf 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
309*4570Sraf 		    == readers) {
310*4570Sraf 			preempt(self);
311*4570Sraf 			return (1);
312*4570Sraf 		}
313*4570Sraf 	}
314*4570Sraf 	preempt(self);
315*4570Sraf 	return (0);
316*4570Sraf }
317*4570Sraf 
318*4570Sraf /*
319*4570Sraf  * Attempt to release a writer lock.  Return true on success.
320*4570Sraf  */
321*4570Sraf static int
322*4570Sraf write_unlock_try(rwlock_t *rwlp)
323*4570Sraf {
324*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
325*4570Sraf 	uint32_t readers;
326*4570Sraf 	ulwp_t *self = curthread;
327*4570Sraf 
328*4570Sraf 	no_preempt(self);
329*4570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
330*4570Sraf 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
331*4570Sraf 			preempt(self);
332*4570Sraf 			return (1);
333*4570Sraf 		}
334*4570Sraf 	}
335*4570Sraf 	preempt(self);
336*4570Sraf 	return (0);
337*4570Sraf }
338*4570Sraf 
339*4570Sraf /*
340*4570Sraf  * Wake up thread(s) sleeping on the rwlock queue and then
3410Sstevel@tonic-gate  * drop the queue lock.  Return non-zero if we wake up someone.
342*4570Sraf  * This is called when a thread releases a lock that appears to have waiters.
3430Sstevel@tonic-gate  */
3440Sstevel@tonic-gate static int
3450Sstevel@tonic-gate rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
3460Sstevel@tonic-gate {
347*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
348*4570Sraf 	uint32_t readers;
349*4570Sraf 	uint32_t writers;
350*4570Sraf 	int nlwpid = 0;
351*4570Sraf 	int maxlwps = MAXLWPS;
352*4570Sraf 	ulwp_t *self;
353*4570Sraf 	ulwp_t **ulwpp;
3540Sstevel@tonic-gate 	ulwp_t *ulwp;
355*4570Sraf 	ulwp_t *prev = NULL;
356*4570Sraf 	lwpid_t buffer[MAXLWPS];
357*4570Sraf 	lwpid_t *lwpid = buffer;
358*4570Sraf 
359*4570Sraf 	readers = *rwstate;
360*4570Sraf 	ASSERT_CONSISTENT_STATE(readers);
361*4570Sraf 	if (!(readers & URW_HAS_WAITERS)) {
362*4570Sraf 		queue_unlock(qp);
363*4570Sraf 		return (0);
364*4570Sraf 	}
365*4570Sraf 	readers &= URW_READERS_MASK;
366*4570Sraf 	writers = 0;
3670Sstevel@tonic-gate 
368*4570Sraf 	/*
369*4570Sraf 	 * Walk the list of waiters and prepare to wake up as
370*4570Sraf 	 * many readers as we encounter before encountering
371*4570Sraf 	 * a writer.  If the first thread on the list is a
372*4570Sraf 	 * writer, stop there and wake it up.
373*4570Sraf 	 *
374*4570Sraf 	 * We keep track of lwpids that are to be unparked in lwpid[].
375*4570Sraf 	 * __lwp_unpark_all() is called to unpark all of them after
376*4570Sraf 	 * they have been removed from the sleep queue and the sleep
377*4570Sraf 	 * queue lock has been dropped.  If we run out of space in our
378*4570Sraf 	 * on-stack buffer, we need to allocate more but we can't call
379*4570Sraf 	 * lmalloc() because we are holding a queue lock when the overflow
380*4570Sraf 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
381*4570Sraf 	 * either because the application may have allocated a small
382*4570Sraf 	 * stack and we don't want to overrun the stack.  So we call
383*4570Sraf 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
384*4570Sraf 	 * system call directly since that path acquires no locks.
385*4570Sraf 	 */
386*4570Sraf 	ulwpp = &qp->qh_head;
387*4570Sraf 	while ((ulwp = *ulwpp) != NULL) {
388*4570Sraf 		if (ulwp->ul_wchan != rwlp) {
389*4570Sraf 			prev = ulwp;
390*4570Sraf 			ulwpp = &ulwp->ul_link;
391*4570Sraf 			continue;
3920Sstevel@tonic-gate 		}
393*4570Sraf 		if (ulwp->ul_writer) {
394*4570Sraf 			if (writers != 0 || readers != 0)
395*4570Sraf 				break;
396*4570Sraf 			/* one writer to wake */
397*4570Sraf 			writers++;
398*4570Sraf 		} else {
399*4570Sraf 			if (writers != 0)
400*4570Sraf 				break;
401*4570Sraf 			/* at least one reader to wake */
402*4570Sraf 			readers++;
403*4570Sraf 			if (nlwpid == maxlwps)
404*4570Sraf 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
405*4570Sraf 		}
406*4570Sraf 		(void) queue_unlink(qp, ulwpp, prev);
407*4570Sraf 		lwpid[nlwpid++] = ulwp->ul_lwpid;
4080Sstevel@tonic-gate 	}
409*4570Sraf 	if (ulwp == NULL)
410*4570Sraf 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
411*4570Sraf 	if (nlwpid == 0) {
412*4570Sraf 		queue_unlock(qp);
413*4570Sraf 	} else {
414*4570Sraf 		self = curthread;
415*4570Sraf 		no_preempt(self);
416*4570Sraf 		queue_unlock(qp);
417*4570Sraf 		if (nlwpid == 1)
418*4570Sraf 			(void) __lwp_unpark(lwpid[0]);
419*4570Sraf 		else
420*4570Sraf 			(void) __lwp_unpark_all(lwpid, nlwpid);
421*4570Sraf 		preempt(self);
422*4570Sraf 	}
423*4570Sraf 	if (lwpid != buffer)
424*4570Sraf 		(void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t));
425*4570Sraf 	return (nlwpid != 0);
4260Sstevel@tonic-gate }
4270Sstevel@tonic-gate 
4280Sstevel@tonic-gate /*
4290Sstevel@tonic-gate  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
4300Sstevel@tonic-gate  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
4310Sstevel@tonic-gate  *
4320Sstevel@tonic-gate  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
4330Sstevel@tonic-gate  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
4340Sstevel@tonic-gate  * released, and if they need to sleep will release the mutex first. In the
4350Sstevel@tonic-gate  * event of a spurious wakeup, these will return EAGAIN (because it is much
4360Sstevel@tonic-gate  * easier for us to re-acquire the mutex here).
4370Sstevel@tonic-gate  */
4380Sstevel@tonic-gate int
4390Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
4400Sstevel@tonic-gate {
441*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
442*4570Sraf 	mutex_t *mp = &rwlp->mutex;
443*4570Sraf 	/* LINTED set but not used */
444*4570Sraf 	uint32_t readers;
445*4570Sraf 	int try_flag;
446*4570Sraf 	int error;
447*4570Sraf 
448*4570Sraf 	try_flag = (rd_wr & TRY_FLAG);
449*4570Sraf 	rd_wr &= ~TRY_FLAG;
450*4570Sraf 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
451*4570Sraf 
452*4570Sraf 	if (!try_flag) {
453*4570Sraf 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
454*4570Sraf 	}
455*4570Sraf 
456*4570Sraf 	do {
457*4570Sraf 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
458*4570Sraf 			error = EBUSY;
459*4570Sraf 			break;
460*4570Sraf 		}
461*4570Sraf 		if ((error = _private_mutex_lock(mp)) != 0)
462*4570Sraf 			break;
463*4570Sraf 		if (rd_wr == READ_LOCK) {
464*4570Sraf 			if (read_lock_try(rwlp, 0)) {
465*4570Sraf 				(void) _private_mutex_unlock(mp);
466*4570Sraf 				break;
467*4570Sraf 			}
468*4570Sraf 		} else {
469*4570Sraf 			if (write_lock_try(rwlp, 0)) {
470*4570Sraf 				(void) _private_mutex_unlock(mp);
471*4570Sraf 				break;
472*4570Sraf 			}
473*4570Sraf 		}
474*4570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
475*4570Sraf 		readers = *rwstate;
476*4570Sraf 		ASSERT_CONSISTENT_STATE(readers);
477*4570Sraf 		/*
478*4570Sraf 		 * The calls to __lwp_rwlock_*() below will release the mutex,
479*4570Sraf 		 * so we need a dtrace probe here.
480*4570Sraf 		 */
481*4570Sraf 		mp->mutex_owner = 0;
482*4570Sraf 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
483*4570Sraf 		/*
484*4570Sraf 		 * The waiters bit may be inaccurate.
485*4570Sraf 		 * Only the kernel knows for sure.
486*4570Sraf 		 */
487*4570Sraf 		if (rd_wr == READ_LOCK) {
488*4570Sraf 			if (try_flag)
489*4570Sraf 				error = __lwp_rwlock_tryrdlock(rwlp);
490*4570Sraf 			else
491*4570Sraf 				error = __lwp_rwlock_rdlock(rwlp, tsp);
492*4570Sraf 		} else {
493*4570Sraf 			if (try_flag)
494*4570Sraf 				error = __lwp_rwlock_trywrlock(rwlp);
495*4570Sraf 			else
496*4570Sraf 				error = __lwp_rwlock_wrlock(rwlp, tsp);
497*4570Sraf 		}
498*4570Sraf 	} while (error == EAGAIN || error == EINTR);
499*4570Sraf 
500*4570Sraf 	if (!try_flag) {
501*4570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
502*4570Sraf 	}
503*4570Sraf 
504*4570Sraf 	return (error);
505*4570Sraf }
506*4570Sraf 
507*4570Sraf /*
508*4570Sraf  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
509*4570Sraf  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
510*4570Sraf  */
511*4570Sraf int
512*4570Sraf rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
513*4570Sraf {
514*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
515*4570Sraf 	uint32_t readers;
5160Sstevel@tonic-gate 	ulwp_t *self = curthread;
517*4570Sraf 	queue_head_t *qp;
518*4570Sraf 	ulwp_t *ulwp;
5190Sstevel@tonic-gate 	int try_flag;
5200Sstevel@tonic-gate 	int error = 0;
5210Sstevel@tonic-gate 
5220Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
5230Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
5240Sstevel@tonic-gate 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5250Sstevel@tonic-gate 
5260Sstevel@tonic-gate 	if (!try_flag) {
5270Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5280Sstevel@tonic-gate 	}
5290Sstevel@tonic-gate 
530*4570Sraf 	qp = queue_lock(rwlp, MX);
531*4570Sraf retry:
532*4570Sraf 	while (error == 0) {
5330Sstevel@tonic-gate 		if (rd_wr == READ_LOCK) {
534*4570Sraf 			if (read_lock_try(rwlp, 0))
535*4570Sraf 				goto out;
5360Sstevel@tonic-gate 		} else {
537*4570Sraf 			if (write_lock_try(rwlp, 0))
538*4570Sraf 				goto out;
5390Sstevel@tonic-gate 		}
540*4570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
541*4570Sraf 		readers = *rwstate;
542*4570Sraf 		ASSERT_CONSISTENT_STATE(readers);
543*4570Sraf 		if ((readers & URW_WRITE_LOCKED) ||
544*4570Sraf 		    (rd_wr == WRITE_LOCK &&
545*4570Sraf 		    (readers & URW_READERS_MASK) != 0))
5460Sstevel@tonic-gate 			/* EMPTY */;	/* somebody holds the lock */
5470Sstevel@tonic-gate 		else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) {
548*4570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
5490Sstevel@tonic-gate 			break;		/* no queued waiters */
5500Sstevel@tonic-gate 		} else {
5510Sstevel@tonic-gate 			int our_pri = real_priority(self);
5520Sstevel@tonic-gate 			int his_pri = real_priority(ulwp);
5530Sstevel@tonic-gate 
5540Sstevel@tonic-gate 			if (rd_wr == WRITE_LOCK) {
5550Sstevel@tonic-gate 				/*
5560Sstevel@tonic-gate 				 * We defer to a queued thread that has
5570Sstevel@tonic-gate 				 * a higher priority than ours.
5580Sstevel@tonic-gate 				 */
5590Sstevel@tonic-gate 				if (his_pri <= our_pri)
5600Sstevel@tonic-gate 					break;
5610Sstevel@tonic-gate 			} else {
5620Sstevel@tonic-gate 				/*
5630Sstevel@tonic-gate 				 * We defer to a queued thread that has
5640Sstevel@tonic-gate 				 * a higher priority than ours or that
5650Sstevel@tonic-gate 				 * is a writer whose priority equals ours.
5660Sstevel@tonic-gate 				 */
5670Sstevel@tonic-gate 				if (his_pri < our_pri ||
5680Sstevel@tonic-gate 				    (his_pri == our_pri && !ulwp->ul_writer))
5690Sstevel@tonic-gate 					break;
5700Sstevel@tonic-gate 			}
5710Sstevel@tonic-gate 		}
5720Sstevel@tonic-gate 		/*
5730Sstevel@tonic-gate 		 * We are about to block.
5740Sstevel@tonic-gate 		 * If we're doing a trylock, return EBUSY instead.
5750Sstevel@tonic-gate 		 */
5760Sstevel@tonic-gate 		if (try_flag) {
5770Sstevel@tonic-gate 			error = EBUSY;
5780Sstevel@tonic-gate 			break;
5790Sstevel@tonic-gate 		}
5800Sstevel@tonic-gate 		/*
5810Sstevel@tonic-gate 		 * Enqueue writers ahead of readers of the
5820Sstevel@tonic-gate 		 * same priority.
5830Sstevel@tonic-gate 		 */
5840Sstevel@tonic-gate 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
5850Sstevel@tonic-gate 		enqueue(qp, self, rwlp, MX);
5860Sstevel@tonic-gate 		set_parking_flag(self, 1);
5870Sstevel@tonic-gate 		queue_unlock(qp);
5880Sstevel@tonic-gate 		if ((error = __lwp_park(tsp, 0)) == EINTR)
5890Sstevel@tonic-gate 			error = 0;
5900Sstevel@tonic-gate 		self->ul_writer = 0;
5910Sstevel@tonic-gate 		set_parking_flag(self, 0);
5920Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
593*4570Sraf 		if (self->ul_sleepq && dequeue_self(qp, rwlp) == 0)
594*4570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
5950Sstevel@tonic-gate 	}
5960Sstevel@tonic-gate 
5970Sstevel@tonic-gate 	if (error == 0) {
598*4570Sraf 		if (rd_wr == READ_LOCK) {
599*4570Sraf 			if (!read_lock_try(rwlp, 1))
600*4570Sraf 				goto retry;
601*4570Sraf 		} else {
602*4570Sraf 			if (!write_lock_try(rwlp, 1))
603*4570Sraf 				goto retry;
6040Sstevel@tonic-gate 		}
6050Sstevel@tonic-gate 	}
6060Sstevel@tonic-gate 
607*4570Sraf out:
608*4570Sraf 	queue_unlock(qp);
609*4570Sraf 
610*4570Sraf 	if (!try_flag) {
611*4570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
612*4570Sraf 	}
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate 	return (error);
6150Sstevel@tonic-gate }
6160Sstevel@tonic-gate 
6170Sstevel@tonic-gate int
6180Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
6190Sstevel@tonic-gate {
6200Sstevel@tonic-gate 	ulwp_t *self = curthread;
6210Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
6220Sstevel@tonic-gate 	readlock_t *readlockp;
6230Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
6240Sstevel@tonic-gate 	int error;
6250Sstevel@tonic-gate 
6260Sstevel@tonic-gate 	/*
6270Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
6280Sstevel@tonic-gate 	 * just increment our reference count and return.
6290Sstevel@tonic-gate 	 */
630*4570Sraf 	sigoff(self);
6310Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
6320Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
633*4570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
634*4570Sraf 			sigon(self);
635*4570Sraf 			error = EAGAIN;
636*4570Sraf 			goto out;
637*4570Sraf 		}
638*4570Sraf 		sigon(self);
639*4570Sraf 		error = 0;
640*4570Sraf 		goto out;
6410Sstevel@tonic-gate 	}
642*4570Sraf 	sigon(self);
6430Sstevel@tonic-gate 
6440Sstevel@tonic-gate 	/*
6450Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
6460Sstevel@tonic-gate 	 */
6470Sstevel@tonic-gate 	if (rw_write_is_held(rwlp)) {
6480Sstevel@tonic-gate 		if (self->ul_error_detection)
6490Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_rdlock",
6500Sstevel@tonic-gate 			    "calling thread owns the writer lock");
651*4570Sraf 		error = EDEADLK;
652*4570Sraf 		goto out;
6530Sstevel@tonic-gate 	}
6540Sstevel@tonic-gate 
655*4570Sraf 	if (read_lock_try(rwlp, 0))
656*4570Sraf 		error = 0;
657*4570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
6580Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
6590Sstevel@tonic-gate 	else						/* user-level */
6600Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
6610Sstevel@tonic-gate 
662*4570Sraf out:
6630Sstevel@tonic-gate 	if (error == 0) {
664*4570Sraf 		sigoff(self);
665*4570Sraf 		rwl_entry(rwlp)->rd_count++;
666*4570Sraf 		sigon(self);
6670Sstevel@tonic-gate 		if (rwsp)
6680Sstevel@tonic-gate 			tdb_incr(rwsp->rw_rdlock);
669*4570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
670*4570Sraf 	} else {
671*4570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
6720Sstevel@tonic-gate 	}
6730Sstevel@tonic-gate 
6740Sstevel@tonic-gate 	return (error);
6750Sstevel@tonic-gate }
6760Sstevel@tonic-gate 
6770Sstevel@tonic-gate #pragma weak rw_rdlock = __rw_rdlock
6780Sstevel@tonic-gate #pragma weak _rw_rdlock = __rw_rdlock
6790Sstevel@tonic-gate #pragma weak pthread_rwlock_rdlock = __rw_rdlock
6800Sstevel@tonic-gate #pragma weak _pthread_rwlock_rdlock = __rw_rdlock
6810Sstevel@tonic-gate int
6820Sstevel@tonic-gate __rw_rdlock(rwlock_t *rwlp)
6830Sstevel@tonic-gate {
6840Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
6850Sstevel@tonic-gate 	return (rw_rdlock_impl(rwlp, NULL));
6860Sstevel@tonic-gate }
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate void
6890Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp)
6900Sstevel@tonic-gate {
6910Sstevel@tonic-gate 	enter_critical(curthread);
6920Sstevel@tonic-gate 	(void) rw_rdlock_impl(rwlp, NULL);
6930Sstevel@tonic-gate }
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedrdlock_np = \
6960Sstevel@tonic-gate 	_pthread_rwlock_reltimedrdlock_np
6970Sstevel@tonic-gate int
6980Sstevel@tonic-gate _pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime)
6990Sstevel@tonic-gate {
7000Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
7010Sstevel@tonic-gate 	int error;
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7040Sstevel@tonic-gate 	error = rw_rdlock_impl(rwlp, &tslocal);
7050Sstevel@tonic-gate 	if (error == ETIME)
7060Sstevel@tonic-gate 		error = ETIMEDOUT;
7070Sstevel@tonic-gate 	return (error);
7080Sstevel@tonic-gate }
7090Sstevel@tonic-gate 
7100Sstevel@tonic-gate #pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock
7110Sstevel@tonic-gate int
7120Sstevel@tonic-gate _pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime)
7130Sstevel@tonic-gate {
7140Sstevel@tonic-gate 	timespec_t tslocal;
7150Sstevel@tonic-gate 	int error;
7160Sstevel@tonic-gate 
7170Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7180Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
7190Sstevel@tonic-gate 	error = rw_rdlock_impl(rwlp, &tslocal);
7200Sstevel@tonic-gate 	if (error == ETIME)
7210Sstevel@tonic-gate 		error = ETIMEDOUT;
7220Sstevel@tonic-gate 	return (error);
7230Sstevel@tonic-gate }
7240Sstevel@tonic-gate 
7250Sstevel@tonic-gate int
7260Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
7270Sstevel@tonic-gate {
7280Sstevel@tonic-gate 	ulwp_t *self = curthread;
7290Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
7300Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
7310Sstevel@tonic-gate 	int error;
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	/*
7340Sstevel@tonic-gate 	 * If we hold a readers lock on this rwlock, bail out.
7350Sstevel@tonic-gate 	 */
7360Sstevel@tonic-gate 	if (rw_read_is_held(rwlp)) {
7370Sstevel@tonic-gate 		if (self->ul_error_detection)
7380Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7390Sstevel@tonic-gate 			    "calling thread owns the readers lock");
740*4570Sraf 		error = EDEADLK;
741*4570Sraf 		goto out;
7420Sstevel@tonic-gate 	}
7430Sstevel@tonic-gate 
7440Sstevel@tonic-gate 	/*
7450Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
7460Sstevel@tonic-gate 	 */
7470Sstevel@tonic-gate 	if (rw_write_is_held(rwlp)) {
7480Sstevel@tonic-gate 		if (self->ul_error_detection)
7490Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7500Sstevel@tonic-gate 			    "calling thread owns the writer lock");
751*4570Sraf 		error = EDEADLK;
752*4570Sraf 		goto out;
7530Sstevel@tonic-gate 	}
7540Sstevel@tonic-gate 
755*4570Sraf 	if (write_lock_try(rwlp, 0))
756*4570Sraf 		error = 0;
757*4570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
7580Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
759*4570Sraf 	else						/* user-level */
7600Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
7610Sstevel@tonic-gate 
762*4570Sraf out:
763*4570Sraf 	if (error == 0) {
764*4570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
765*4570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
766*4570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
767*4570Sraf 		if (rwsp) {
768*4570Sraf 			tdb_incr(rwsp->rw_wrlock);
769*4570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
770*4570Sraf 		}
771*4570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
772*4570Sraf 	} else {
773*4570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
7740Sstevel@tonic-gate 	}
7750Sstevel@tonic-gate 	return (error);
7760Sstevel@tonic-gate }
7770Sstevel@tonic-gate 
7780Sstevel@tonic-gate #pragma weak rw_wrlock = __rw_wrlock
7790Sstevel@tonic-gate #pragma weak _rw_wrlock = __rw_wrlock
7800Sstevel@tonic-gate #pragma weak pthread_rwlock_wrlock = __rw_wrlock
7810Sstevel@tonic-gate #pragma weak _pthread_rwlock_wrlock = __rw_wrlock
7820Sstevel@tonic-gate int
7830Sstevel@tonic-gate __rw_wrlock(rwlock_t *rwlp)
7840Sstevel@tonic-gate {
7850Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7860Sstevel@tonic-gate 	return (rw_wrlock_impl(rwlp, NULL));
7870Sstevel@tonic-gate }
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate void
7900Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp)
7910Sstevel@tonic-gate {
7920Sstevel@tonic-gate 	enter_critical(curthread);
7930Sstevel@tonic-gate 	(void) rw_wrlock_impl(rwlp, NULL);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedwrlock_np = \
7970Sstevel@tonic-gate 	_pthread_rwlock_reltimedwrlock_np
7980Sstevel@tonic-gate int
7990Sstevel@tonic-gate _pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime)
8000Sstevel@tonic-gate {
8010Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
8020Sstevel@tonic-gate 	int error;
8030Sstevel@tonic-gate 
8040Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8050Sstevel@tonic-gate 	error = rw_wrlock_impl(rwlp, &tslocal);
8060Sstevel@tonic-gate 	if (error == ETIME)
8070Sstevel@tonic-gate 		error = ETIMEDOUT;
8080Sstevel@tonic-gate 	return (error);
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate #pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock
8120Sstevel@tonic-gate int
8130Sstevel@tonic-gate _pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime)
8140Sstevel@tonic-gate {
8150Sstevel@tonic-gate 	timespec_t tslocal;
8160Sstevel@tonic-gate 	int error;
8170Sstevel@tonic-gate 
8180Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8190Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
8200Sstevel@tonic-gate 	error = rw_wrlock_impl(rwlp, &tslocal);
8210Sstevel@tonic-gate 	if (error == ETIME)
8220Sstevel@tonic-gate 		error = ETIMEDOUT;
8230Sstevel@tonic-gate 	return (error);
8240Sstevel@tonic-gate }
8250Sstevel@tonic-gate 
8260Sstevel@tonic-gate #pragma weak rw_tryrdlock = __rw_tryrdlock
8270Sstevel@tonic-gate #pragma weak _rw_tryrdlock = __rw_tryrdlock
8280Sstevel@tonic-gate #pragma weak pthread_rwlock_tryrdlock = __rw_tryrdlock
8290Sstevel@tonic-gate #pragma weak _pthread_rwlock_tryrdlock = __rw_tryrdlock
8300Sstevel@tonic-gate int
8310Sstevel@tonic-gate __rw_tryrdlock(rwlock_t *rwlp)
8320Sstevel@tonic-gate {
8330Sstevel@tonic-gate 	ulwp_t *self = curthread;
8340Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8350Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8360Sstevel@tonic-gate 	readlock_t *readlockp;
8370Sstevel@tonic-gate 	int error;
8380Sstevel@tonic-gate 
8390Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8400Sstevel@tonic-gate 
8410Sstevel@tonic-gate 	if (rwsp)
8420Sstevel@tonic-gate 		tdb_incr(rwsp->rw_rdlock_try);
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate 	/*
8450Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
8460Sstevel@tonic-gate 	 * just increment our reference count and return.
8470Sstevel@tonic-gate 	 */
848*4570Sraf 	sigoff(self);
8490Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
8500Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
851*4570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
852*4570Sraf 			sigon(self);
853*4570Sraf 			error = EAGAIN;
854*4570Sraf 			goto out;
855*4570Sraf 		}
856*4570Sraf 		sigon(self);
857*4570Sraf 		error = 0;
858*4570Sraf 		goto out;
8590Sstevel@tonic-gate 	}
860*4570Sraf 	sigon(self);
8610Sstevel@tonic-gate 
862*4570Sraf 	if (read_lock_try(rwlp, 0))
863*4570Sraf 		error = 0;
864*4570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8650Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8660Sstevel@tonic-gate 	else						/* user-level */
8670Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8680Sstevel@tonic-gate 
869*4570Sraf out:
870*4570Sraf 	if (error == 0) {
871*4570Sraf 		sigoff(self);
872*4570Sraf 		rwl_entry(rwlp)->rd_count++;
873*4570Sraf 		sigon(self);
874*4570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
875*4570Sraf 	} else {
876*4570Sraf 		if (rwsp)
877*4570Sraf 			tdb_incr(rwsp->rw_rdlock_try_fail);
878*4570Sraf 		if (error != EBUSY) {
879*4570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
880*4570Sraf 			    error);
881*4570Sraf 		}
882*4570Sraf 	}
8830Sstevel@tonic-gate 
8840Sstevel@tonic-gate 	return (error);
8850Sstevel@tonic-gate }
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate #pragma weak rw_trywrlock = __rw_trywrlock
8880Sstevel@tonic-gate #pragma weak _rw_trywrlock = __rw_trywrlock
8890Sstevel@tonic-gate #pragma weak pthread_rwlock_trywrlock = __rw_trywrlock
8900Sstevel@tonic-gate #pragma weak _pthread_rwlock_trywrlock = __rw_trywrlock
8910Sstevel@tonic-gate int
8920Sstevel@tonic-gate __rw_trywrlock(rwlock_t *rwlp)
8930Sstevel@tonic-gate {
8940Sstevel@tonic-gate 	ulwp_t *self = curthread;
8950Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8960Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8970Sstevel@tonic-gate 	int error;
8980Sstevel@tonic-gate 
899*4570Sraf 	ASSERT(!self->ul_critical || self->ul_bindflags);
9000Sstevel@tonic-gate 
9010Sstevel@tonic-gate 	if (rwsp)
9020Sstevel@tonic-gate 		tdb_incr(rwsp->rw_wrlock_try);
9030Sstevel@tonic-gate 
904*4570Sraf 	if (write_lock_try(rwlp, 0))
905*4570Sraf 		error = 0;
906*4570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
9070Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
908*4570Sraf 	else						/* user-level */
9090Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
910*4570Sraf 
911*4570Sraf 	if (error == 0) {
912*4570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
913*4570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
914*4570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
915*4570Sraf 		if (rwsp)
916*4570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
917*4570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
918*4570Sraf 	} else {
919*4570Sraf 		if (rwsp)
9200Sstevel@tonic-gate 			tdb_incr(rwsp->rw_wrlock_try_fail);
921*4570Sraf 		if (error != EBUSY) {
922*4570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
923*4570Sraf 			    error);
924*4570Sraf 		}
9250Sstevel@tonic-gate 	}
9260Sstevel@tonic-gate 	return (error);
9270Sstevel@tonic-gate }
9280Sstevel@tonic-gate 
9290Sstevel@tonic-gate #pragma weak rw_unlock = __rw_unlock
9300Sstevel@tonic-gate #pragma weak _rw_unlock = __rw_unlock
9310Sstevel@tonic-gate #pragma weak pthread_rwlock_unlock = __rw_unlock
9320Sstevel@tonic-gate #pragma weak _pthread_rwlock_unlock = __rw_unlock
9330Sstevel@tonic-gate int
9340Sstevel@tonic-gate __rw_unlock(rwlock_t *rwlp)
9350Sstevel@tonic-gate {
936*4570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
937*4570Sraf 	uint32_t readers;
9380Sstevel@tonic-gate 	ulwp_t *self = curthread;
9390Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
9400Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp;
941*4570Sraf 	queue_head_t *qp;
942*4570Sraf 	int rd_wr;
943*4570Sraf 	int waked = 0;
9440Sstevel@tonic-gate 
945*4570Sraf 	readers = *rwstate;
946*4570Sraf 	ASSERT_CONSISTENT_STATE(readers);
947*4570Sraf 	if (readers & URW_WRITE_LOCKED) {
948*4570Sraf 		rd_wr = WRITE_LOCK;
949*4570Sraf 		readers = 0;
950*4570Sraf 	} else {
951*4570Sraf 		rd_wr = READ_LOCK;
952*4570Sraf 		readers &= URW_READERS_MASK;
9530Sstevel@tonic-gate 	}
9540Sstevel@tonic-gate 
955*4570Sraf 	if (rd_wr == WRITE_LOCK) {
9560Sstevel@tonic-gate 		/*
9570Sstevel@tonic-gate 		 * Since the writer lock is held, we'd better be
9580Sstevel@tonic-gate 		 * holding it, else we cannot legitimately be here.
9590Sstevel@tonic-gate 		 */
9600Sstevel@tonic-gate 		if (!rw_write_is_held(rwlp)) {
9610Sstevel@tonic-gate 			if (self->ul_error_detection)
9620Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9630Sstevel@tonic-gate 				    "writer lock held, "
9640Sstevel@tonic-gate 				    "but not by the calling thread");
9650Sstevel@tonic-gate 			return (EPERM);
9660Sstevel@tonic-gate 		}
9670Sstevel@tonic-gate 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
9680Sstevel@tonic-gate 			if (rwsp->rw_wrlock_begin_hold)
9690Sstevel@tonic-gate 				rwsp->rw_wrlock_hold_time +=
9700Sstevel@tonic-gate 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
9710Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = 0;
9720Sstevel@tonic-gate 		}
973*4570Sraf 		rwlp->rwlock_owner = 0;
974*4570Sraf 		rwlp->rwlock_ownerpid = 0;
975*4570Sraf 	} else if (readers > 0) {
9760Sstevel@tonic-gate 		/*
9770Sstevel@tonic-gate 		 * A readers lock is held; if we don't hold one, bail out.
9780Sstevel@tonic-gate 		 */
979*4570Sraf 		readlock_t *readlockp;
980*4570Sraf 
981*4570Sraf 		sigoff(self);
982*4570Sraf 		readlockp = rwl_entry(rwlp);
9830Sstevel@tonic-gate 		if (readlockp->rd_count == 0) {
984*4570Sraf 			sigon(self);
9850Sstevel@tonic-gate 			if (self->ul_error_detection)
9860Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9870Sstevel@tonic-gate 				    "readers lock held, "
9880Sstevel@tonic-gate 				    "but not by the calling thread");
9890Sstevel@tonic-gate 			return (EPERM);
9900Sstevel@tonic-gate 		}
9910Sstevel@tonic-gate 		/*
9920Sstevel@tonic-gate 		 * If we hold more than one readers lock on this rwlock,
9930Sstevel@tonic-gate 		 * just decrement our reference count and return.
9940Sstevel@tonic-gate 		 */
9950Sstevel@tonic-gate 		if (--readlockp->rd_count != 0) {
996*4570Sraf 			sigon(self);
997*4570Sraf 			goto out;
9980Sstevel@tonic-gate 		}
999*4570Sraf 		sigon(self);
10000Sstevel@tonic-gate 	} else {
10010Sstevel@tonic-gate 		/*
10020Sstevel@tonic-gate 		 * This is a usage error.
10030Sstevel@tonic-gate 		 * No thread should release an unowned lock.
10040Sstevel@tonic-gate 		 */
10050Sstevel@tonic-gate 		if (self->ul_error_detection)
10060Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
10070Sstevel@tonic-gate 		return (EPERM);
10080Sstevel@tonic-gate 	}
10090Sstevel@tonic-gate 
1010*4570Sraf 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
1011*4570Sraf 		/* EMPTY */;
1012*4570Sraf 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
1013*4570Sraf 		/* EMPTY */;
1014*4570Sraf 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
1015*4570Sraf 		(void) _private_mutex_lock(&rwlp->mutex);
1016*4570Sraf 		(void) __lwp_rwlock_unlock(rwlp);
1017*4570Sraf 		(void) _private_mutex_unlock(&rwlp->mutex);
1018*4570Sraf 		waked = 1;
1019*4570Sraf 	} else {
10200Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
1021*4570Sraf 		if (rd_wr == READ_LOCK)
1022*4570Sraf 			atomic_dec_32(rwstate);
1023*4570Sraf 		else
1024*4570Sraf 			atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
10250Sstevel@tonic-gate 		waked = rw_queue_release(qp, rwlp);
10260Sstevel@tonic-gate 	}
10270Sstevel@tonic-gate 
1028*4570Sraf out:
1029*4570Sraf 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
1030*4570Sraf 
10310Sstevel@tonic-gate 	/*
10320Sstevel@tonic-gate 	 * Yield to the thread we just waked up, just in case we might
10330Sstevel@tonic-gate 	 * be about to grab the rwlock again immediately upon return.
10340Sstevel@tonic-gate 	 * This is pretty weak but it helps on a uniprocessor and also
10350Sstevel@tonic-gate 	 * when cpu affinity has assigned both ourself and the other
10360Sstevel@tonic-gate 	 * thread to the same CPU.  Note that lwp_yield() will yield
10370Sstevel@tonic-gate 	 * the processor only if the writer is at the same or higher
10380Sstevel@tonic-gate 	 * priority than ourself.  This provides more balanced program
10390Sstevel@tonic-gate 	 * behavior; it doesn't guarantee acquisition of the lock by
10400Sstevel@tonic-gate 	 * the pending writer.
10410Sstevel@tonic-gate 	 */
10420Sstevel@tonic-gate 	if (waked)
10430Sstevel@tonic-gate 		lwp_yield();
10440Sstevel@tonic-gate 	return (0);
10450Sstevel@tonic-gate }
10460Sstevel@tonic-gate 
10470Sstevel@tonic-gate void
10480Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp)
10490Sstevel@tonic-gate {
10500Sstevel@tonic-gate 	(void) __rw_unlock(rwlp);
10510Sstevel@tonic-gate 	exit_critical(curthread);
10520Sstevel@tonic-gate }
1053