xref: /onnv-gate/usr/src/lib/libc/port/threads/rwlock.c (revision 10637:f6eac4af74a8)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54570Sraf  * Common Development and Distribution License (the "License").
64570Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
214570Sraf 
220Sstevel@tonic-gate /*
23*10637SRoger.Faulkner@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #include "lint.h"
280Sstevel@tonic-gate #include "thr_uberdata.h"
290Sstevel@tonic-gate #include <sys/sdt.h>
300Sstevel@tonic-gate 
310Sstevel@tonic-gate #define	TRY_FLAG		0x10
320Sstevel@tonic-gate #define	READ_LOCK		0
330Sstevel@tonic-gate #define	WRITE_LOCK		1
340Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
350Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
360Sstevel@tonic-gate 
370Sstevel@tonic-gate #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
380Sstevel@tonic-gate 
394570Sraf #define	ASSERT_CONSISTENT_STATE(readers)		\
404570Sraf 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
414570Sraf 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
424570Sraf 
430Sstevel@tonic-gate /*
440Sstevel@tonic-gate  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
454570Sraf  * We must be deferring signals for this to be safe.
464574Sraf  * Else if we are returning an entry with ul_rdlockcnt == 0,
474570Sraf  * it could be reassigned behind our back in a signal handler.
480Sstevel@tonic-gate  */
490Sstevel@tonic-gate static readlock_t *
500Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp)
510Sstevel@tonic-gate {
520Sstevel@tonic-gate 	ulwp_t *self = curthread;
530Sstevel@tonic-gate 	readlock_t *remembered = NULL;
540Sstevel@tonic-gate 	readlock_t *readlockp;
550Sstevel@tonic-gate 	uint_t nlocks;
560Sstevel@tonic-gate 
574570Sraf 	/* we must be deferring signals */
584570Sraf 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
594570Sraf 
604574Sraf 	if ((nlocks = self->ul_rdlockcnt) != 0)
610Sstevel@tonic-gate 		readlockp = self->ul_readlock.array;
620Sstevel@tonic-gate 	else {
630Sstevel@tonic-gate 		nlocks = 1;
640Sstevel@tonic-gate 		readlockp = &self->ul_readlock.single;
650Sstevel@tonic-gate 	}
660Sstevel@tonic-gate 
670Sstevel@tonic-gate 	for (; nlocks; nlocks--, readlockp++) {
680Sstevel@tonic-gate 		if (readlockp->rd_rwlock == rwlp)
690Sstevel@tonic-gate 			return (readlockp);
700Sstevel@tonic-gate 		if (readlockp->rd_count == 0 && remembered == NULL)
710Sstevel@tonic-gate 			remembered = readlockp;
720Sstevel@tonic-gate 	}
730Sstevel@tonic-gate 	if (remembered != NULL) {
740Sstevel@tonic-gate 		remembered->rd_rwlock = rwlp;
750Sstevel@tonic-gate 		return (remembered);
760Sstevel@tonic-gate 	}
770Sstevel@tonic-gate 
780Sstevel@tonic-gate 	/*
790Sstevel@tonic-gate 	 * No entry available.  Allocate more space, converting the single
800Sstevel@tonic-gate 	 * readlock_t entry into an array of readlock_t entries if necessary.
810Sstevel@tonic-gate 	 */
824574Sraf 	if ((nlocks = self->ul_rdlockcnt) == 0) {
830Sstevel@tonic-gate 		/*
840Sstevel@tonic-gate 		 * Initial allocation of the readlock_t array.
850Sstevel@tonic-gate 		 * Convert the single entry into an array.
860Sstevel@tonic-gate 		 */
874574Sraf 		self->ul_rdlockcnt = nlocks = NLOCKS;
880Sstevel@tonic-gate 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
890Sstevel@tonic-gate 		/*
900Sstevel@tonic-gate 		 * The single readlock_t becomes the first entry in the array.
910Sstevel@tonic-gate 		 */
920Sstevel@tonic-gate 		*readlockp = self->ul_readlock.single;
930Sstevel@tonic-gate 		self->ul_readlock.single.rd_count = 0;
940Sstevel@tonic-gate 		self->ul_readlock.array = readlockp;
950Sstevel@tonic-gate 		/*
960Sstevel@tonic-gate 		 * Return the next available entry in the array.
970Sstevel@tonic-gate 		 */
980Sstevel@tonic-gate 		(++readlockp)->rd_rwlock = rwlp;
990Sstevel@tonic-gate 		return (readlockp);
1000Sstevel@tonic-gate 	}
1010Sstevel@tonic-gate 	/*
1020Sstevel@tonic-gate 	 * Reallocate the array, double the size each time.
1030Sstevel@tonic-gate 	 */
1040Sstevel@tonic-gate 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
1056515Sraf 	(void) memcpy(readlockp, self->ul_readlock.array,
1066247Sraf 	    nlocks * sizeof (readlock_t));
1070Sstevel@tonic-gate 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
1080Sstevel@tonic-gate 	self->ul_readlock.array = readlockp;
1094574Sraf 	self->ul_rdlockcnt *= 2;
1100Sstevel@tonic-gate 	/*
1110Sstevel@tonic-gate 	 * Return the next available entry in the newly allocated array.
1120Sstevel@tonic-gate 	 */
1130Sstevel@tonic-gate 	(readlockp += nlocks)->rd_rwlock = rwlp;
1140Sstevel@tonic-gate 	return (readlockp);
1150Sstevel@tonic-gate }
1160Sstevel@tonic-gate 
1170Sstevel@tonic-gate /*
1180Sstevel@tonic-gate  * Free the array of rwlocks held for reading.
1190Sstevel@tonic-gate  */
1200Sstevel@tonic-gate void
1210Sstevel@tonic-gate rwl_free(ulwp_t *ulwp)
1220Sstevel@tonic-gate {
1230Sstevel@tonic-gate 	uint_t nlocks;
1240Sstevel@tonic-gate 
1254574Sraf 	if ((nlocks = ulwp->ul_rdlockcnt) != 0)
1260Sstevel@tonic-gate 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
1274574Sraf 	ulwp->ul_rdlockcnt = 0;
1280Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_rwlock = NULL;
1290Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_count = 0;
1300Sstevel@tonic-gate }
1310Sstevel@tonic-gate 
1320Sstevel@tonic-gate /*
1330Sstevel@tonic-gate  * Check if a reader version of the lock is held by the current thread.
1340Sstevel@tonic-gate  */
1356812Sraf #pragma weak _rw_read_held = rw_read_held
1360Sstevel@tonic-gate int
1376812Sraf rw_read_held(rwlock_t *rwlp)
1380Sstevel@tonic-gate {
1394570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1404570Sraf 	uint32_t readers;
1414570Sraf 	ulwp_t *self = curthread;
1420Sstevel@tonic-gate 	readlock_t *readlockp;
1430Sstevel@tonic-gate 	uint_t nlocks;
1444570Sraf 	int rval = 0;
1450Sstevel@tonic-gate 
1464570Sraf 	no_preempt(self);
1474570Sraf 
1484570Sraf 	readers = *rwstate;
1494570Sraf 	ASSERT_CONSISTENT_STATE(readers);
1504570Sraf 	if (!(readers & URW_WRITE_LOCKED) &&
1514570Sraf 	    (readers & URW_READERS_MASK) != 0) {
1524570Sraf 		/*
1534570Sraf 		 * The lock is held for reading by some thread.
1544570Sraf 		 * Search our array of rwlocks held for reading for a match.
1554570Sraf 		 */
1564574Sraf 		if ((nlocks = self->ul_rdlockcnt) != 0)
1574570Sraf 			readlockp = self->ul_readlock.array;
1584570Sraf 		else {
1594570Sraf 			nlocks = 1;
1604570Sraf 			readlockp = &self->ul_readlock.single;
1614570Sraf 		}
1624570Sraf 		for (; nlocks; nlocks--, readlockp++) {
1634570Sraf 			if (readlockp->rd_rwlock == rwlp) {
1644570Sraf 				if (readlockp->rd_count)
1654570Sraf 					rval = 1;
1664570Sraf 				break;
1674570Sraf 			}
1684570Sraf 		}
1690Sstevel@tonic-gate 	}
1700Sstevel@tonic-gate 
1714570Sraf 	preempt(self);
1724570Sraf 	return (rval);
1730Sstevel@tonic-gate }
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate /*
1760Sstevel@tonic-gate  * Check if a writer version of the lock is held by the current thread.
1770Sstevel@tonic-gate  */
1786812Sraf #pragma weak _rw_write_held = rw_write_held
1790Sstevel@tonic-gate int
1806812Sraf rw_write_held(rwlock_t *rwlp)
1810Sstevel@tonic-gate {
1824570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1834570Sraf 	uint32_t readers;
1840Sstevel@tonic-gate 	ulwp_t *self = curthread;
1854570Sraf 	int rval;
1864570Sraf 
1874570Sraf 	no_preempt(self);
1880Sstevel@tonic-gate 
1894570Sraf 	readers = *rwstate;
1904570Sraf 	ASSERT_CONSISTENT_STATE(readers);
1914570Sraf 	rval = ((readers & URW_WRITE_LOCKED) &&
1924570Sraf 	    rwlp->rwlock_owner == (uintptr_t)self &&
1934570Sraf 	    (rwlp->rwlock_type == USYNC_THREAD ||
1944570Sraf 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
1950Sstevel@tonic-gate 
1964570Sraf 	preempt(self);
1974570Sraf 	return (rval);
1980Sstevel@tonic-gate }
1990Sstevel@tonic-gate 
2006812Sraf #pragma weak _rwlock_init = rwlock_init
2010Sstevel@tonic-gate /* ARGSUSED2 */
2020Sstevel@tonic-gate int
2036812Sraf rwlock_init(rwlock_t *rwlp, int type, void *arg)
2040Sstevel@tonic-gate {
2057255Sraf 	ulwp_t *self = curthread;
2067255Sraf 
2070Sstevel@tonic-gate 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2080Sstevel@tonic-gate 		return (EINVAL);
2090Sstevel@tonic-gate 	/*
2100Sstevel@tonic-gate 	 * Once reinitialized, we can no longer be holding a read or write lock.
2110Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2120Sstevel@tonic-gate 	 */
2137255Sraf 	sigoff(self);
2144570Sraf 	rwl_entry(rwlp)->rd_count = 0;
2157255Sraf 	sigon(self);
2166515Sraf 	(void) memset(rwlp, 0, sizeof (*rwlp));
2170Sstevel@tonic-gate 	rwlp->rwlock_type = (uint16_t)type;
2180Sstevel@tonic-gate 	rwlp->rwlock_magic = RWL_MAGIC;
2190Sstevel@tonic-gate 	rwlp->mutex.mutex_type = (uint8_t)type;
2200Sstevel@tonic-gate 	rwlp->mutex.mutex_flag = LOCK_INITED;
2210Sstevel@tonic-gate 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
2227255Sraf 
2237255Sraf 	/*
2247255Sraf 	 * This should be at the beginning of the function,
2257255Sraf 	 * but for the sake of old broken applications that
2267255Sraf 	 * do not have proper alignment for their rwlocks
2277255Sraf 	 * (and don't check the return code from rwlock_init),
2287255Sraf 	 * we put it here, after initializing the rwlock regardless.
2297255Sraf 	 */
2307255Sraf 	if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
2317255Sraf 	    self->ul_misaligned == 0)
2327255Sraf 		return (EINVAL);
2337255Sraf 
2340Sstevel@tonic-gate 	return (0);
2350Sstevel@tonic-gate }
2360Sstevel@tonic-gate 
2376812Sraf #pragma weak pthread_rwlock_destroy = rwlock_destroy
2386812Sraf #pragma weak _rwlock_destroy = rwlock_destroy
2390Sstevel@tonic-gate int
2406812Sraf rwlock_destroy(rwlock_t *rwlp)
2410Sstevel@tonic-gate {
242*10637SRoger.Faulkner@Sun.COM 	ulwp_t *self = curthread;
243*10637SRoger.Faulkner@Sun.COM 
2440Sstevel@tonic-gate 	/*
2450Sstevel@tonic-gate 	 * Once destroyed, we can no longer be holding a read or write lock.
2460Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2470Sstevel@tonic-gate 	 */
248*10637SRoger.Faulkner@Sun.COM 	sigoff(self);
2494570Sraf 	rwl_entry(rwlp)->rd_count = 0;
250*10637SRoger.Faulkner@Sun.COM 	sigon(self);
2510Sstevel@tonic-gate 	rwlp->rwlock_magic = 0;
2520Sstevel@tonic-gate 	tdb_sync_obj_deregister(rwlp);
2530Sstevel@tonic-gate 	return (0);
2540Sstevel@tonic-gate }
2550Sstevel@tonic-gate 
2560Sstevel@tonic-gate /*
2574570Sraf  * Attempt to acquire a readers lock.  Return true on success.
2584570Sraf  */
2594570Sraf static int
2604570Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
2614570Sraf {
2624570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
2634570Sraf 	uint32_t mask = ignore_waiters_flag?
2646247Sraf 	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
2654570Sraf 	uint32_t readers;
2664570Sraf 	ulwp_t *self = curthread;
2674570Sraf 
2684570Sraf 	no_preempt(self);
2694570Sraf 	while (((readers = *rwstate) & mask) == 0) {
2704570Sraf 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
2714570Sraf 			preempt(self);
2724570Sraf 			return (1);
2734570Sraf 		}
2744570Sraf 	}
2754570Sraf 	preempt(self);
2764570Sraf 	return (0);
2774570Sraf }
2784570Sraf 
2794570Sraf /*
2804570Sraf  * Attempt to release a reader lock.  Return true on success.
2814570Sraf  */
2824570Sraf static int
2834570Sraf read_unlock_try(rwlock_t *rwlp)
2844570Sraf {
2854570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
2864570Sraf 	uint32_t readers;
2874570Sraf 	ulwp_t *self = curthread;
2884570Sraf 
2894570Sraf 	no_preempt(self);
2904570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
2914570Sraf 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
2924570Sraf 			preempt(self);
2934570Sraf 			return (1);
2944570Sraf 		}
2954570Sraf 	}
2964570Sraf 	preempt(self);
2974570Sraf 	return (0);
2984570Sraf }
2994570Sraf 
3004570Sraf /*
3014570Sraf  * Attempt to acquire a writer lock.  Return true on success.
3024570Sraf  */
3034570Sraf static int
3044570Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
3054570Sraf {
3064570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3074570Sraf 	uint32_t mask = ignore_waiters_flag?
3086247Sraf 	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
3096247Sraf 	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
3104570Sraf 	ulwp_t *self = curthread;
3114570Sraf 	uint32_t readers;
3124570Sraf 
3134570Sraf 	no_preempt(self);
3144570Sraf 	while (((readers = *rwstate) & mask) == 0) {
3154570Sraf 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
3164570Sraf 		    == readers) {
3174570Sraf 			preempt(self);
3184570Sraf 			return (1);
3194570Sraf 		}
3204570Sraf 	}
3214570Sraf 	preempt(self);
3224570Sraf 	return (0);
3234570Sraf }
3244570Sraf 
3254570Sraf /*
3264570Sraf  * Attempt to release a writer lock.  Return true on success.
3274570Sraf  */
3284570Sraf static int
3294570Sraf write_unlock_try(rwlock_t *rwlp)
3304570Sraf {
3314570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3324570Sraf 	uint32_t readers;
3334570Sraf 	ulwp_t *self = curthread;
3344570Sraf 
3354570Sraf 	no_preempt(self);
3364570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
3374570Sraf 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
3384570Sraf 			preempt(self);
3394570Sraf 			return (1);
3404570Sraf 		}
3414570Sraf 	}
3424570Sraf 	preempt(self);
3434570Sraf 	return (0);
3444570Sraf }
3454570Sraf 
3464570Sraf /*
3474570Sraf  * Wake up thread(s) sleeping on the rwlock queue and then
3480Sstevel@tonic-gate  * drop the queue lock.  Return non-zero if we wake up someone.
3494570Sraf  * This is called when a thread releases a lock that appears to have waiters.
3500Sstevel@tonic-gate  */
3510Sstevel@tonic-gate static int
3520Sstevel@tonic-gate rw_queue_release(queue_head_t *qp, rwlock_t *rwlp)
3530Sstevel@tonic-gate {
3544570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3554570Sraf 	uint32_t readers;
3564570Sraf 	uint32_t writers;
3574570Sraf 	ulwp_t **ulwpp;
3580Sstevel@tonic-gate 	ulwp_t *ulwp;
3596247Sraf 	ulwp_t *prev;
3606247Sraf 	int nlwpid = 0;
3616247Sraf 	int more;
3626247Sraf 	int maxlwps = MAXLWPS;
3634570Sraf 	lwpid_t buffer[MAXLWPS];
3644570Sraf 	lwpid_t *lwpid = buffer;
3654570Sraf 
3664570Sraf 	readers = *rwstate;
3674570Sraf 	ASSERT_CONSISTENT_STATE(readers);
3684570Sraf 	if (!(readers & URW_HAS_WAITERS)) {
3694570Sraf 		queue_unlock(qp);
3704570Sraf 		return (0);
3714570Sraf 	}
3724570Sraf 	readers &= URW_READERS_MASK;
3734570Sraf 	writers = 0;
3740Sstevel@tonic-gate 
3754570Sraf 	/*
3766247Sraf 	 * Examine the queue of waiters in priority order and prepare
3776247Sraf 	 * to wake up as many readers as we encounter before encountering
3786247Sraf 	 * a writer.  If the highest priority thread on the queue is a
3794570Sraf 	 * writer, stop there and wake it up.
3804570Sraf 	 *
3814570Sraf 	 * We keep track of lwpids that are to be unparked in lwpid[].
3824570Sraf 	 * __lwp_unpark_all() is called to unpark all of them after
3834570Sraf 	 * they have been removed from the sleep queue and the sleep
3844570Sraf 	 * queue lock has been dropped.  If we run out of space in our
3854570Sraf 	 * on-stack buffer, we need to allocate more but we can't call
3864570Sraf 	 * lmalloc() because we are holding a queue lock when the overflow
3874570Sraf 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
3884570Sraf 	 * either because the application may have allocated a small
3894570Sraf 	 * stack and we don't want to overrun the stack.  So we call
3904570Sraf 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
3914570Sraf 	 * system call directly since that path acquires no locks.
3924570Sraf 	 */
3936247Sraf 	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
3946247Sraf 		ulwp = *ulwpp;
3956247Sraf 		ASSERT(ulwp->ul_wchan == rwlp);
3964570Sraf 		if (ulwp->ul_writer) {
3974570Sraf 			if (writers != 0 || readers != 0)
3984570Sraf 				break;
3994570Sraf 			/* one writer to wake */
4004570Sraf 			writers++;
4014570Sraf 		} else {
4024570Sraf 			if (writers != 0)
4034570Sraf 				break;
4044570Sraf 			/* at least one reader to wake */
4054570Sraf 			readers++;
4064570Sraf 			if (nlwpid == maxlwps)
4074570Sraf 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
4084570Sraf 		}
4096247Sraf 		queue_unlink(qp, ulwpp, prev);
4106247Sraf 		ulwp->ul_sleepq = NULL;
4116247Sraf 		ulwp->ul_wchan = NULL;
4124570Sraf 		lwpid[nlwpid++] = ulwp->ul_lwpid;
4130Sstevel@tonic-gate 	}
4146247Sraf 	if (ulwpp == NULL)
4154570Sraf 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
4164570Sraf 	if (nlwpid == 0) {
4174570Sraf 		queue_unlock(qp);
4184570Sraf 	} else {
4196247Sraf 		ulwp_t *self = curthread;
4204570Sraf 		no_preempt(self);
4214570Sraf 		queue_unlock(qp);
4224570Sraf 		if (nlwpid == 1)
4234570Sraf 			(void) __lwp_unpark(lwpid[0]);
4244570Sraf 		else
4254570Sraf 			(void) __lwp_unpark_all(lwpid, nlwpid);
4264570Sraf 		preempt(self);
4274570Sraf 	}
4284570Sraf 	if (lwpid != buffer)
4296515Sraf 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
4304570Sraf 	return (nlwpid != 0);
4310Sstevel@tonic-gate }
4320Sstevel@tonic-gate 
4330Sstevel@tonic-gate /*
4340Sstevel@tonic-gate  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
4350Sstevel@tonic-gate  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
4360Sstevel@tonic-gate  *
4370Sstevel@tonic-gate  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
4380Sstevel@tonic-gate  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
4390Sstevel@tonic-gate  * released, and if they need to sleep will release the mutex first. In the
4400Sstevel@tonic-gate  * event of a spurious wakeup, these will return EAGAIN (because it is much
4410Sstevel@tonic-gate  * easier for us to re-acquire the mutex here).
4420Sstevel@tonic-gate  */
4430Sstevel@tonic-gate int
4440Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
4450Sstevel@tonic-gate {
4464570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
4474570Sraf 	mutex_t *mp = &rwlp->mutex;
4484570Sraf 	uint32_t readers;
4494570Sraf 	int try_flag;
4504570Sraf 	int error;
4514570Sraf 
4524570Sraf 	try_flag = (rd_wr & TRY_FLAG);
4534570Sraf 	rd_wr &= ~TRY_FLAG;
4544570Sraf 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
4554570Sraf 
4564570Sraf 	if (!try_flag) {
4574570Sraf 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
4584570Sraf 	}
4594570Sraf 
4604570Sraf 	do {
4614570Sraf 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
4624570Sraf 			error = EBUSY;
4634570Sraf 			break;
4644570Sraf 		}
4656515Sraf 		if ((error = mutex_lock(mp)) != 0)
4664570Sraf 			break;
4674570Sraf 		if (rd_wr == READ_LOCK) {
4684570Sraf 			if (read_lock_try(rwlp, 0)) {
4696515Sraf 				(void) mutex_unlock(mp);
4704570Sraf 				break;
4714570Sraf 			}
4724570Sraf 		} else {
4734570Sraf 			if (write_lock_try(rwlp, 0)) {
4746515Sraf 				(void) mutex_unlock(mp);
4754570Sraf 				break;
4764570Sraf 			}
4774570Sraf 		}
4784570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
4794570Sraf 		readers = *rwstate;
4804570Sraf 		ASSERT_CONSISTENT_STATE(readers);
4814570Sraf 		/*
4824570Sraf 		 * The calls to __lwp_rwlock_*() below will release the mutex,
4837907SRoger.Faulkner@Sun.COM 		 * so we need a dtrace probe here.  The owner field of the
4847907SRoger.Faulkner@Sun.COM 		 * mutex is cleared in the kernel when the mutex is released,
4857907SRoger.Faulkner@Sun.COM 		 * so we should not clear it here.
4864570Sraf 		 */
4874570Sraf 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
4884570Sraf 		/*
4894570Sraf 		 * The waiters bit may be inaccurate.
4904570Sraf 		 * Only the kernel knows for sure.
4914570Sraf 		 */
4924570Sraf 		if (rd_wr == READ_LOCK) {
4934570Sraf 			if (try_flag)
4944570Sraf 				error = __lwp_rwlock_tryrdlock(rwlp);
4954570Sraf 			else
4964570Sraf 				error = __lwp_rwlock_rdlock(rwlp, tsp);
4974570Sraf 		} else {
4984570Sraf 			if (try_flag)
4994570Sraf 				error = __lwp_rwlock_trywrlock(rwlp);
5004570Sraf 			else
5014570Sraf 				error = __lwp_rwlock_wrlock(rwlp, tsp);
5024570Sraf 		}
5034570Sraf 	} while (error == EAGAIN || error == EINTR);
5044570Sraf 
5054570Sraf 	if (!try_flag) {
5064570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
5074570Sraf 	}
5084570Sraf 
5094570Sraf 	return (error);
5104570Sraf }
5114570Sraf 
5124570Sraf /*
5134570Sraf  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
5144570Sraf  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
5154570Sraf  */
5164570Sraf int
5174570Sraf rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5184570Sraf {
5194570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
5204570Sraf 	uint32_t readers;
5210Sstevel@tonic-gate 	ulwp_t *self = curthread;
5224570Sraf 	queue_head_t *qp;
5234570Sraf 	ulwp_t *ulwp;
5240Sstevel@tonic-gate 	int try_flag;
5256247Sraf 	int ignore_waiters_flag;
5260Sstevel@tonic-gate 	int error = 0;
5270Sstevel@tonic-gate 
5280Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
5290Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
5300Sstevel@tonic-gate 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5310Sstevel@tonic-gate 
5320Sstevel@tonic-gate 	if (!try_flag) {
5330Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5340Sstevel@tonic-gate 	}
5350Sstevel@tonic-gate 
5364570Sraf 	qp = queue_lock(rwlp, MX);
5376247Sraf 	/* initial attempt to acquire the lock fails if there are waiters */
5386247Sraf 	ignore_waiters_flag = 0;
5394570Sraf 	while (error == 0) {
5400Sstevel@tonic-gate 		if (rd_wr == READ_LOCK) {
5416247Sraf 			if (read_lock_try(rwlp, ignore_waiters_flag))
5426247Sraf 				break;
5430Sstevel@tonic-gate 		} else {
5446247Sraf 			if (write_lock_try(rwlp, ignore_waiters_flag))
5456247Sraf 				break;
5460Sstevel@tonic-gate 		}
5476247Sraf 		/* subsequent attempts do not fail due to waiters */
5486247Sraf 		ignore_waiters_flag = 1;
5494570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
5504570Sraf 		readers = *rwstate;
5514570Sraf 		ASSERT_CONSISTENT_STATE(readers);
5524570Sraf 		if ((readers & URW_WRITE_LOCKED) ||
5534570Sraf 		    (rd_wr == WRITE_LOCK &&
5544570Sraf 		    (readers & URW_READERS_MASK) != 0))
5550Sstevel@tonic-gate 			/* EMPTY */;	/* somebody holds the lock */
5566247Sraf 		else if ((ulwp = queue_waiter(qp)) == NULL) {
5574570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
5586247Sraf 			continue;	/* no queued waiters, try again */
5590Sstevel@tonic-gate 		} else {
5606247Sraf 			/*
5616247Sraf 			 * Do a priority check on the queued waiter (the
5626247Sraf 			 * highest priority thread on the queue) to see
5636247Sraf 			 * if we should defer to him or just grab the lock.
5646247Sraf 			 */
5650Sstevel@tonic-gate 			int our_pri = real_priority(self);
5660Sstevel@tonic-gate 			int his_pri = real_priority(ulwp);
5670Sstevel@tonic-gate 
5680Sstevel@tonic-gate 			if (rd_wr == WRITE_LOCK) {
5690Sstevel@tonic-gate 				/*
5700Sstevel@tonic-gate 				 * We defer to a queued thread that has
5710Sstevel@tonic-gate 				 * a higher priority than ours.
5720Sstevel@tonic-gate 				 */
5730Sstevel@tonic-gate 				if (his_pri <= our_pri)
5746247Sraf 					continue;	/* try again */
5750Sstevel@tonic-gate 			} else {
5760Sstevel@tonic-gate 				/*
5770Sstevel@tonic-gate 				 * We defer to a queued thread that has
5780Sstevel@tonic-gate 				 * a higher priority than ours or that
5790Sstevel@tonic-gate 				 * is a writer whose priority equals ours.
5800Sstevel@tonic-gate 				 */
5810Sstevel@tonic-gate 				if (his_pri < our_pri ||
5820Sstevel@tonic-gate 				    (his_pri == our_pri && !ulwp->ul_writer))
5836247Sraf 					continue;	/* try again */
5840Sstevel@tonic-gate 			}
5850Sstevel@tonic-gate 		}
5860Sstevel@tonic-gate 		/*
5870Sstevel@tonic-gate 		 * We are about to block.
5880Sstevel@tonic-gate 		 * If we're doing a trylock, return EBUSY instead.
5890Sstevel@tonic-gate 		 */
5900Sstevel@tonic-gate 		if (try_flag) {
5910Sstevel@tonic-gate 			error = EBUSY;
5920Sstevel@tonic-gate 			break;
5930Sstevel@tonic-gate 		}
5940Sstevel@tonic-gate 		/*
5956247Sraf 		 * Enqueue writers ahead of readers.
5960Sstevel@tonic-gate 		 */
5970Sstevel@tonic-gate 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
5986247Sraf 		enqueue(qp, self, 0);
5990Sstevel@tonic-gate 		set_parking_flag(self, 1);
6000Sstevel@tonic-gate 		queue_unlock(qp);
6010Sstevel@tonic-gate 		if ((error = __lwp_park(tsp, 0)) == EINTR)
6026247Sraf 			error = ignore_waiters_flag = 0;
6030Sstevel@tonic-gate 		set_parking_flag(self, 0);
6040Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
6056247Sraf 		if (self->ul_sleepq && dequeue_self(qp) == 0)
6064570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
6076247Sraf 		self->ul_writer = 0;
6080Sstevel@tonic-gate 	}
6090Sstevel@tonic-gate 
6104570Sraf 	queue_unlock(qp);
6114570Sraf 
6124570Sraf 	if (!try_flag) {
6134570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
6144570Sraf 	}
6150Sstevel@tonic-gate 
6160Sstevel@tonic-gate 	return (error);
6170Sstevel@tonic-gate }
6180Sstevel@tonic-gate 
6190Sstevel@tonic-gate int
6200Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
6210Sstevel@tonic-gate {
6220Sstevel@tonic-gate 	ulwp_t *self = curthread;
6230Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
6240Sstevel@tonic-gate 	readlock_t *readlockp;
6250Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
6260Sstevel@tonic-gate 	int error;
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 	/*
6290Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
6300Sstevel@tonic-gate 	 * just increment our reference count and return.
6310Sstevel@tonic-gate 	 */
6324570Sraf 	sigoff(self);
6330Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
6340Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
6354570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
6364570Sraf 			sigon(self);
6374570Sraf 			error = EAGAIN;
6384570Sraf 			goto out;
6394570Sraf 		}
6404570Sraf 		sigon(self);
6414570Sraf 		error = 0;
6424570Sraf 		goto out;
6430Sstevel@tonic-gate 	}
6444570Sraf 	sigon(self);
6450Sstevel@tonic-gate 
6460Sstevel@tonic-gate 	/*
6470Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
6480Sstevel@tonic-gate 	 */
6496812Sraf 	if (rw_write_held(rwlp)) {
6500Sstevel@tonic-gate 		if (self->ul_error_detection)
6510Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_rdlock",
6520Sstevel@tonic-gate 			    "calling thread owns the writer lock");
6534570Sraf 		error = EDEADLK;
6544570Sraf 		goto out;
6550Sstevel@tonic-gate 	}
6560Sstevel@tonic-gate 
6574570Sraf 	if (read_lock_try(rwlp, 0))
6584570Sraf 		error = 0;
6594570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
6600Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
6610Sstevel@tonic-gate 	else						/* user-level */
6620Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
6630Sstevel@tonic-gate 
6644570Sraf out:
6650Sstevel@tonic-gate 	if (error == 0) {
6664570Sraf 		sigoff(self);
6674570Sraf 		rwl_entry(rwlp)->rd_count++;
6684570Sraf 		sigon(self);
6690Sstevel@tonic-gate 		if (rwsp)
6700Sstevel@tonic-gate 			tdb_incr(rwsp->rw_rdlock);
6714570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
6724570Sraf 	} else {
6734570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
6740Sstevel@tonic-gate 	}
6750Sstevel@tonic-gate 
6760Sstevel@tonic-gate 	return (error);
6770Sstevel@tonic-gate }
6780Sstevel@tonic-gate 
6796812Sraf #pragma weak pthread_rwlock_rdlock = rw_rdlock
6806812Sraf #pragma weak _rw_rdlock = rw_rdlock
6810Sstevel@tonic-gate int
6826812Sraf rw_rdlock(rwlock_t *rwlp)
6830Sstevel@tonic-gate {
6840Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
6850Sstevel@tonic-gate 	return (rw_rdlock_impl(rwlp, NULL));
6860Sstevel@tonic-gate }
6870Sstevel@tonic-gate 
6880Sstevel@tonic-gate void
6890Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp)
6900Sstevel@tonic-gate {
6910Sstevel@tonic-gate 	enter_critical(curthread);
6920Sstevel@tonic-gate 	(void) rw_rdlock_impl(rwlp, NULL);
6930Sstevel@tonic-gate }
6940Sstevel@tonic-gate 
6950Sstevel@tonic-gate int
6966812Sraf pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
6976812Sraf     const struct timespec *_RESTRICT_KYWD reltime)
6980Sstevel@tonic-gate {
6990Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
7000Sstevel@tonic-gate 	int error;
7010Sstevel@tonic-gate 
7020Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7036812Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7040Sstevel@tonic-gate 	if (error == ETIME)
7050Sstevel@tonic-gate 		error = ETIMEDOUT;
7060Sstevel@tonic-gate 	return (error);
7070Sstevel@tonic-gate }
7080Sstevel@tonic-gate 
7090Sstevel@tonic-gate int
7106812Sraf pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7116812Sraf     const struct timespec *_RESTRICT_KYWD abstime)
7120Sstevel@tonic-gate {
7130Sstevel@tonic-gate 	timespec_t tslocal;
7140Sstevel@tonic-gate 	int error;
7150Sstevel@tonic-gate 
7160Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7170Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
7186812Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7190Sstevel@tonic-gate 	if (error == ETIME)
7200Sstevel@tonic-gate 		error = ETIMEDOUT;
7210Sstevel@tonic-gate 	return (error);
7220Sstevel@tonic-gate }
7230Sstevel@tonic-gate 
7240Sstevel@tonic-gate int
7250Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
7260Sstevel@tonic-gate {
7270Sstevel@tonic-gate 	ulwp_t *self = curthread;
7280Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
7290Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
7300Sstevel@tonic-gate 	int error;
7310Sstevel@tonic-gate 
7320Sstevel@tonic-gate 	/*
7330Sstevel@tonic-gate 	 * If we hold a readers lock on this rwlock, bail out.
7340Sstevel@tonic-gate 	 */
7356812Sraf 	if (rw_read_held(rwlp)) {
7360Sstevel@tonic-gate 		if (self->ul_error_detection)
7370Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7380Sstevel@tonic-gate 			    "calling thread owns the readers lock");
7394570Sraf 		error = EDEADLK;
7404570Sraf 		goto out;
7410Sstevel@tonic-gate 	}
7420Sstevel@tonic-gate 
7430Sstevel@tonic-gate 	/*
7440Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
7450Sstevel@tonic-gate 	 */
7466812Sraf 	if (rw_write_held(rwlp)) {
7470Sstevel@tonic-gate 		if (self->ul_error_detection)
7480Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
7490Sstevel@tonic-gate 			    "calling thread owns the writer lock");
7504570Sraf 		error = EDEADLK;
7514570Sraf 		goto out;
7520Sstevel@tonic-gate 	}
7530Sstevel@tonic-gate 
7544570Sraf 	if (write_lock_try(rwlp, 0))
7554570Sraf 		error = 0;
7564570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
7570Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
7584570Sraf 	else						/* user-level */
7590Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
7600Sstevel@tonic-gate 
7614570Sraf out:
7624570Sraf 	if (error == 0) {
7634570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
7644570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
7654570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
7664570Sraf 		if (rwsp) {
7674570Sraf 			tdb_incr(rwsp->rw_wrlock);
7684570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
7694570Sraf 		}
7704570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
7714570Sraf 	} else {
7724570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
7730Sstevel@tonic-gate 	}
7740Sstevel@tonic-gate 	return (error);
7750Sstevel@tonic-gate }
7760Sstevel@tonic-gate 
7776812Sraf #pragma weak pthread_rwlock_wrlock = rw_wrlock
7786812Sraf #pragma weak _rw_wrlock = rw_wrlock
7790Sstevel@tonic-gate int
7806812Sraf rw_wrlock(rwlock_t *rwlp)
7810Sstevel@tonic-gate {
7820Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7830Sstevel@tonic-gate 	return (rw_wrlock_impl(rwlp, NULL));
7840Sstevel@tonic-gate }
7850Sstevel@tonic-gate 
7860Sstevel@tonic-gate void
7870Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp)
7880Sstevel@tonic-gate {
7890Sstevel@tonic-gate 	enter_critical(curthread);
7900Sstevel@tonic-gate 	(void) rw_wrlock_impl(rwlp, NULL);
7910Sstevel@tonic-gate }
7920Sstevel@tonic-gate 
7930Sstevel@tonic-gate int
7946812Sraf pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7956812Sraf     const struct timespec *_RESTRICT_KYWD reltime)
7960Sstevel@tonic-gate {
7970Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
7980Sstevel@tonic-gate 	int error;
7990Sstevel@tonic-gate 
8000Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8016812Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8020Sstevel@tonic-gate 	if (error == ETIME)
8030Sstevel@tonic-gate 		error = ETIMEDOUT;
8040Sstevel@tonic-gate 	return (error);
8050Sstevel@tonic-gate }
8060Sstevel@tonic-gate 
8070Sstevel@tonic-gate int
8086812Sraf pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
8090Sstevel@tonic-gate {
8100Sstevel@tonic-gate 	timespec_t tslocal;
8110Sstevel@tonic-gate 	int error;
8120Sstevel@tonic-gate 
8130Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8140Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
8156812Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8160Sstevel@tonic-gate 	if (error == ETIME)
8170Sstevel@tonic-gate 		error = ETIMEDOUT;
8180Sstevel@tonic-gate 	return (error);
8190Sstevel@tonic-gate }
8200Sstevel@tonic-gate 
8216812Sraf #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
8220Sstevel@tonic-gate int
8236812Sraf rw_tryrdlock(rwlock_t *rwlp)
8240Sstevel@tonic-gate {
8250Sstevel@tonic-gate 	ulwp_t *self = curthread;
8260Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8270Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8280Sstevel@tonic-gate 	readlock_t *readlockp;
8290Sstevel@tonic-gate 	int error;
8300Sstevel@tonic-gate 
8310Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8320Sstevel@tonic-gate 
8330Sstevel@tonic-gate 	if (rwsp)
8340Sstevel@tonic-gate 		tdb_incr(rwsp->rw_rdlock_try);
8350Sstevel@tonic-gate 
8360Sstevel@tonic-gate 	/*
8370Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
8380Sstevel@tonic-gate 	 * just increment our reference count and return.
8390Sstevel@tonic-gate 	 */
8404570Sraf 	sigoff(self);
8410Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
8420Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
8434570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
8444570Sraf 			sigon(self);
8454570Sraf 			error = EAGAIN;
8464570Sraf 			goto out;
8474570Sraf 		}
8484570Sraf 		sigon(self);
8494570Sraf 		error = 0;
8504570Sraf 		goto out;
8510Sstevel@tonic-gate 	}
8524570Sraf 	sigon(self);
8530Sstevel@tonic-gate 
8544570Sraf 	if (read_lock_try(rwlp, 0))
8554570Sraf 		error = 0;
8564570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8570Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8580Sstevel@tonic-gate 	else						/* user-level */
8590Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
8600Sstevel@tonic-gate 
8614570Sraf out:
8624570Sraf 	if (error == 0) {
8634570Sraf 		sigoff(self);
8644570Sraf 		rwl_entry(rwlp)->rd_count++;
8654570Sraf 		sigon(self);
8664570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
8674570Sraf 	} else {
8684570Sraf 		if (rwsp)
8694570Sraf 			tdb_incr(rwsp->rw_rdlock_try_fail);
8704570Sraf 		if (error != EBUSY) {
8714570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
8724570Sraf 			    error);
8734570Sraf 		}
8744570Sraf 	}
8750Sstevel@tonic-gate 
8760Sstevel@tonic-gate 	return (error);
8770Sstevel@tonic-gate }
8780Sstevel@tonic-gate 
8796812Sraf #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
8800Sstevel@tonic-gate int
8816812Sraf rw_trywrlock(rwlock_t *rwlp)
8820Sstevel@tonic-gate {
8830Sstevel@tonic-gate 	ulwp_t *self = curthread;
8840Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8850Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8860Sstevel@tonic-gate 	int error;
8870Sstevel@tonic-gate 
8884570Sraf 	ASSERT(!self->ul_critical || self->ul_bindflags);
8890Sstevel@tonic-gate 
8900Sstevel@tonic-gate 	if (rwsp)
8910Sstevel@tonic-gate 		tdb_incr(rwsp->rw_wrlock_try);
8920Sstevel@tonic-gate 
8934570Sraf 	if (write_lock_try(rwlp, 0))
8944570Sraf 		error = 0;
8954570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8960Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
8974570Sraf 	else						/* user-level */
8980Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
8994570Sraf 
9004570Sraf 	if (error == 0) {
9014570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
9024570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
9034570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
9044570Sraf 		if (rwsp)
9054570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
9064570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
9074570Sraf 	} else {
9084570Sraf 		if (rwsp)
9090Sstevel@tonic-gate 			tdb_incr(rwsp->rw_wrlock_try_fail);
9104570Sraf 		if (error != EBUSY) {
9114570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
9124570Sraf 			    error);
9134570Sraf 		}
9140Sstevel@tonic-gate 	}
9150Sstevel@tonic-gate 	return (error);
9160Sstevel@tonic-gate }
9170Sstevel@tonic-gate 
9186812Sraf #pragma weak pthread_rwlock_unlock = rw_unlock
9196812Sraf #pragma weak _rw_unlock = rw_unlock
9200Sstevel@tonic-gate int
9216812Sraf rw_unlock(rwlock_t *rwlp)
9220Sstevel@tonic-gate {
9234570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
9244570Sraf 	uint32_t readers;
9250Sstevel@tonic-gate 	ulwp_t *self = curthread;
9260Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
9270Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp;
9284570Sraf 	queue_head_t *qp;
9294570Sraf 	int rd_wr;
9304570Sraf 	int waked = 0;
9310Sstevel@tonic-gate 
9324570Sraf 	readers = *rwstate;
9334570Sraf 	ASSERT_CONSISTENT_STATE(readers);
9344570Sraf 	if (readers & URW_WRITE_LOCKED) {
9354570Sraf 		rd_wr = WRITE_LOCK;
9364570Sraf 		readers = 0;
9374570Sraf 	} else {
9384570Sraf 		rd_wr = READ_LOCK;
9394570Sraf 		readers &= URW_READERS_MASK;
9400Sstevel@tonic-gate 	}
9410Sstevel@tonic-gate 
9424570Sraf 	if (rd_wr == WRITE_LOCK) {
9430Sstevel@tonic-gate 		/*
9440Sstevel@tonic-gate 		 * Since the writer lock is held, we'd better be
9450Sstevel@tonic-gate 		 * holding it, else we cannot legitimately be here.
9460Sstevel@tonic-gate 		 */
9476812Sraf 		if (!rw_write_held(rwlp)) {
9480Sstevel@tonic-gate 			if (self->ul_error_detection)
9490Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9500Sstevel@tonic-gate 				    "writer lock held, "
9510Sstevel@tonic-gate 				    "but not by the calling thread");
9520Sstevel@tonic-gate 			return (EPERM);
9530Sstevel@tonic-gate 		}
9540Sstevel@tonic-gate 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
9550Sstevel@tonic-gate 			if (rwsp->rw_wrlock_begin_hold)
9560Sstevel@tonic-gate 				rwsp->rw_wrlock_hold_time +=
9570Sstevel@tonic-gate 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
9580Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = 0;
9590Sstevel@tonic-gate 		}
9604570Sraf 		rwlp->rwlock_owner = 0;
9614570Sraf 		rwlp->rwlock_ownerpid = 0;
9624570Sraf 	} else if (readers > 0) {
9630Sstevel@tonic-gate 		/*
9640Sstevel@tonic-gate 		 * A readers lock is held; if we don't hold one, bail out.
9650Sstevel@tonic-gate 		 */
9664570Sraf 		readlock_t *readlockp;
9674570Sraf 
9684570Sraf 		sigoff(self);
9694570Sraf 		readlockp = rwl_entry(rwlp);
9700Sstevel@tonic-gate 		if (readlockp->rd_count == 0) {
9714570Sraf 			sigon(self);
9720Sstevel@tonic-gate 			if (self->ul_error_detection)
9730Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
9740Sstevel@tonic-gate 				    "readers lock held, "
9750Sstevel@tonic-gate 				    "but not by the calling thread");
9760Sstevel@tonic-gate 			return (EPERM);
9770Sstevel@tonic-gate 		}
9780Sstevel@tonic-gate 		/*
9790Sstevel@tonic-gate 		 * If we hold more than one readers lock on this rwlock,
9800Sstevel@tonic-gate 		 * just decrement our reference count and return.
9810Sstevel@tonic-gate 		 */
9820Sstevel@tonic-gate 		if (--readlockp->rd_count != 0) {
9834570Sraf 			sigon(self);
9844570Sraf 			goto out;
9850Sstevel@tonic-gate 		}
9864570Sraf 		sigon(self);
9870Sstevel@tonic-gate 	} else {
9880Sstevel@tonic-gate 		/*
9890Sstevel@tonic-gate 		 * This is a usage error.
9900Sstevel@tonic-gate 		 * No thread should release an unowned lock.
9910Sstevel@tonic-gate 		 */
9920Sstevel@tonic-gate 		if (self->ul_error_detection)
9930Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
9940Sstevel@tonic-gate 		return (EPERM);
9950Sstevel@tonic-gate 	}
9960Sstevel@tonic-gate 
9974570Sraf 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
9984570Sraf 		/* EMPTY */;
9994570Sraf 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
10004570Sraf 		/* EMPTY */;
10014570Sraf 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
10026515Sraf 		(void) mutex_lock(&rwlp->mutex);
10034570Sraf 		(void) __lwp_rwlock_unlock(rwlp);
10046515Sraf 		(void) mutex_unlock(&rwlp->mutex);
10054570Sraf 		waked = 1;
10064570Sraf 	} else {
10070Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
10084570Sraf 		if (rd_wr == READ_LOCK)
10094570Sraf 			atomic_dec_32(rwstate);
10104570Sraf 		else
10114570Sraf 			atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
10120Sstevel@tonic-gate 		waked = rw_queue_release(qp, rwlp);
10130Sstevel@tonic-gate 	}
10140Sstevel@tonic-gate 
10154570Sraf out:
10164570Sraf 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
10174570Sraf 
10180Sstevel@tonic-gate 	/*
10190Sstevel@tonic-gate 	 * Yield to the thread we just waked up, just in case we might
10200Sstevel@tonic-gate 	 * be about to grab the rwlock again immediately upon return.
10210Sstevel@tonic-gate 	 * This is pretty weak but it helps on a uniprocessor and also
10220Sstevel@tonic-gate 	 * when cpu affinity has assigned both ourself and the other
10230Sstevel@tonic-gate 	 * thread to the same CPU.  Note that lwp_yield() will yield
10240Sstevel@tonic-gate 	 * the processor only if the writer is at the same or higher
10250Sstevel@tonic-gate 	 * priority than ourself.  This provides more balanced program
10260Sstevel@tonic-gate 	 * behavior; it doesn't guarantee acquisition of the lock by
10270Sstevel@tonic-gate 	 * the pending writer.
10280Sstevel@tonic-gate 	 */
10290Sstevel@tonic-gate 	if (waked)
10306515Sraf 		yield();
10310Sstevel@tonic-gate 	return (0);
10320Sstevel@tonic-gate }
10330Sstevel@tonic-gate 
10340Sstevel@tonic-gate void
10350Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp)
10360Sstevel@tonic-gate {
10376812Sraf 	(void) rw_unlock(rwlp);
10380Sstevel@tonic-gate 	exit_critical(curthread);
10390Sstevel@tonic-gate }
1040