xref: /onnv-gate/usr/src/lib/libc/port/threads/rwlock.c (revision 12677:c07132558fc3)
10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
54570Sraf  * Common Development and Distribution License (the "License").
64570Sraf  * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate  *
80Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate  * See the License for the specific language governing permissions
110Sstevel@tonic-gate  * and limitations under the License.
120Sstevel@tonic-gate  *
130Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate  *
190Sstevel@tonic-gate  * CDDL HEADER END
200Sstevel@tonic-gate  */
214570Sraf 
220Sstevel@tonic-gate /*
23*12677SRoger.Faulkner@Oracle.COM  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate  */
250Sstevel@tonic-gate 
260Sstevel@tonic-gate #include "lint.h"
270Sstevel@tonic-gate #include "thr_uberdata.h"
280Sstevel@tonic-gate #include <sys/sdt.h>
290Sstevel@tonic-gate 
300Sstevel@tonic-gate #define	TRY_FLAG		0x10
310Sstevel@tonic-gate #define	READ_LOCK		0
320Sstevel@tonic-gate #define	WRITE_LOCK		1
330Sstevel@tonic-gate #define	READ_LOCK_TRY		(READ_LOCK | TRY_FLAG)
340Sstevel@tonic-gate #define	WRITE_LOCK_TRY		(WRITE_LOCK | TRY_FLAG)
350Sstevel@tonic-gate 
360Sstevel@tonic-gate #define	NLOCKS	4	/* initial number of readlock_t structs allocated */
370Sstevel@tonic-gate 
384570Sraf #define	ASSERT_CONSISTENT_STATE(readers)		\
394570Sraf 	ASSERT(!((readers) & URW_WRITE_LOCKED) ||	\
404570Sraf 		((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
414570Sraf 
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate  * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
444570Sraf  * We must be deferring signals for this to be safe.
454574Sraf  * Else if we are returning an entry with ul_rdlockcnt == 0,
464570Sraf  * it could be reassigned behind our back in a signal handler.
470Sstevel@tonic-gate  */
480Sstevel@tonic-gate static readlock_t *
rwl_entry(rwlock_t * rwlp)490Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp)
500Sstevel@tonic-gate {
510Sstevel@tonic-gate 	ulwp_t *self = curthread;
520Sstevel@tonic-gate 	readlock_t *remembered = NULL;
530Sstevel@tonic-gate 	readlock_t *readlockp;
540Sstevel@tonic-gate 	uint_t nlocks;
550Sstevel@tonic-gate 
564570Sraf 	/* we must be deferring signals */
574570Sraf 	ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
584570Sraf 
594574Sraf 	if ((nlocks = self->ul_rdlockcnt) != 0)
600Sstevel@tonic-gate 		readlockp = self->ul_readlock.array;
610Sstevel@tonic-gate 	else {
620Sstevel@tonic-gate 		nlocks = 1;
630Sstevel@tonic-gate 		readlockp = &self->ul_readlock.single;
640Sstevel@tonic-gate 	}
650Sstevel@tonic-gate 
660Sstevel@tonic-gate 	for (; nlocks; nlocks--, readlockp++) {
670Sstevel@tonic-gate 		if (readlockp->rd_rwlock == rwlp)
680Sstevel@tonic-gate 			return (readlockp);
690Sstevel@tonic-gate 		if (readlockp->rd_count == 0 && remembered == NULL)
700Sstevel@tonic-gate 			remembered = readlockp;
710Sstevel@tonic-gate 	}
720Sstevel@tonic-gate 	if (remembered != NULL) {
730Sstevel@tonic-gate 		remembered->rd_rwlock = rwlp;
740Sstevel@tonic-gate 		return (remembered);
750Sstevel@tonic-gate 	}
760Sstevel@tonic-gate 
770Sstevel@tonic-gate 	/*
780Sstevel@tonic-gate 	 * No entry available.  Allocate more space, converting the single
790Sstevel@tonic-gate 	 * readlock_t entry into an array of readlock_t entries if necessary.
800Sstevel@tonic-gate 	 */
814574Sraf 	if ((nlocks = self->ul_rdlockcnt) == 0) {
820Sstevel@tonic-gate 		/*
830Sstevel@tonic-gate 		 * Initial allocation of the readlock_t array.
840Sstevel@tonic-gate 		 * Convert the single entry into an array.
850Sstevel@tonic-gate 		 */
864574Sraf 		self->ul_rdlockcnt = nlocks = NLOCKS;
870Sstevel@tonic-gate 		readlockp = lmalloc(nlocks * sizeof (readlock_t));
880Sstevel@tonic-gate 		/*
890Sstevel@tonic-gate 		 * The single readlock_t becomes the first entry in the array.
900Sstevel@tonic-gate 		 */
910Sstevel@tonic-gate 		*readlockp = self->ul_readlock.single;
920Sstevel@tonic-gate 		self->ul_readlock.single.rd_count = 0;
930Sstevel@tonic-gate 		self->ul_readlock.array = readlockp;
940Sstevel@tonic-gate 		/*
950Sstevel@tonic-gate 		 * Return the next available entry in the array.
960Sstevel@tonic-gate 		 */
970Sstevel@tonic-gate 		(++readlockp)->rd_rwlock = rwlp;
980Sstevel@tonic-gate 		return (readlockp);
990Sstevel@tonic-gate 	}
1000Sstevel@tonic-gate 	/*
1010Sstevel@tonic-gate 	 * Reallocate the array, double the size each time.
1020Sstevel@tonic-gate 	 */
1030Sstevel@tonic-gate 	readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
1046515Sraf 	(void) memcpy(readlockp, self->ul_readlock.array,
1056247Sraf 	    nlocks * sizeof (readlock_t));
1060Sstevel@tonic-gate 	lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
1070Sstevel@tonic-gate 	self->ul_readlock.array = readlockp;
1084574Sraf 	self->ul_rdlockcnt *= 2;
1090Sstevel@tonic-gate 	/*
1100Sstevel@tonic-gate 	 * Return the next available entry in the newly allocated array.
1110Sstevel@tonic-gate 	 */
1120Sstevel@tonic-gate 	(readlockp += nlocks)->rd_rwlock = rwlp;
1130Sstevel@tonic-gate 	return (readlockp);
1140Sstevel@tonic-gate }
1150Sstevel@tonic-gate 
1160Sstevel@tonic-gate /*
1170Sstevel@tonic-gate  * Free the array of rwlocks held for reading.
1180Sstevel@tonic-gate  */
1190Sstevel@tonic-gate void
rwl_free(ulwp_t * ulwp)1200Sstevel@tonic-gate rwl_free(ulwp_t *ulwp)
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate 	uint_t nlocks;
1230Sstevel@tonic-gate 
1244574Sraf 	if ((nlocks = ulwp->ul_rdlockcnt) != 0)
1250Sstevel@tonic-gate 		lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
1264574Sraf 	ulwp->ul_rdlockcnt = 0;
1270Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_rwlock = NULL;
1280Sstevel@tonic-gate 	ulwp->ul_readlock.single.rd_count = 0;
1290Sstevel@tonic-gate }
1300Sstevel@tonic-gate 
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate  * Check if a reader version of the lock is held by the current thread.
1330Sstevel@tonic-gate  */
1346812Sraf #pragma weak _rw_read_held = rw_read_held
1350Sstevel@tonic-gate int
rw_read_held(rwlock_t * rwlp)1366812Sraf rw_read_held(rwlock_t *rwlp)
1370Sstevel@tonic-gate {
1384570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1394570Sraf 	uint32_t readers;
1404570Sraf 	ulwp_t *self = curthread;
1410Sstevel@tonic-gate 	readlock_t *readlockp;
1420Sstevel@tonic-gate 	uint_t nlocks;
1434570Sraf 	int rval = 0;
1440Sstevel@tonic-gate 
1454570Sraf 	no_preempt(self);
1464570Sraf 
1474570Sraf 	readers = *rwstate;
1484570Sraf 	ASSERT_CONSISTENT_STATE(readers);
1494570Sraf 	if (!(readers & URW_WRITE_LOCKED) &&
1504570Sraf 	    (readers & URW_READERS_MASK) != 0) {
1514570Sraf 		/*
1524570Sraf 		 * The lock is held for reading by some thread.
1534570Sraf 		 * Search our array of rwlocks held for reading for a match.
1544570Sraf 		 */
1554574Sraf 		if ((nlocks = self->ul_rdlockcnt) != 0)
1564570Sraf 			readlockp = self->ul_readlock.array;
1574570Sraf 		else {
1584570Sraf 			nlocks = 1;
1594570Sraf 			readlockp = &self->ul_readlock.single;
1604570Sraf 		}
1614570Sraf 		for (; nlocks; nlocks--, readlockp++) {
1624570Sraf 			if (readlockp->rd_rwlock == rwlp) {
1634570Sraf 				if (readlockp->rd_count)
1644570Sraf 					rval = 1;
1654570Sraf 				break;
1664570Sraf 			}
1674570Sraf 		}
1680Sstevel@tonic-gate 	}
1690Sstevel@tonic-gate 
1704570Sraf 	preempt(self);
1714570Sraf 	return (rval);
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate 
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate  * Check if a writer version of the lock is held by the current thread.
1760Sstevel@tonic-gate  */
1776812Sraf #pragma weak _rw_write_held = rw_write_held
1780Sstevel@tonic-gate int
rw_write_held(rwlock_t * rwlp)1796812Sraf rw_write_held(rwlock_t *rwlp)
1800Sstevel@tonic-gate {
1814570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1824570Sraf 	uint32_t readers;
1830Sstevel@tonic-gate 	ulwp_t *self = curthread;
1844570Sraf 	int rval;
1854570Sraf 
1864570Sraf 	no_preempt(self);
1870Sstevel@tonic-gate 
1884570Sraf 	readers = *rwstate;
1894570Sraf 	ASSERT_CONSISTENT_STATE(readers);
1904570Sraf 	rval = ((readers & URW_WRITE_LOCKED) &&
1914570Sraf 	    rwlp->rwlock_owner == (uintptr_t)self &&
1924570Sraf 	    (rwlp->rwlock_type == USYNC_THREAD ||
1934570Sraf 	    rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
1940Sstevel@tonic-gate 
1954570Sraf 	preempt(self);
1964570Sraf 	return (rval);
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate 
1996812Sraf #pragma weak _rwlock_init = rwlock_init
2000Sstevel@tonic-gate /* ARGSUSED2 */
2010Sstevel@tonic-gate int
rwlock_init(rwlock_t * rwlp,int type,void * arg)2026812Sraf rwlock_init(rwlock_t *rwlp, int type, void *arg)
2030Sstevel@tonic-gate {
2047255Sraf 	ulwp_t *self = curthread;
2057255Sraf 
2060Sstevel@tonic-gate 	if (type != USYNC_THREAD && type != USYNC_PROCESS)
2070Sstevel@tonic-gate 		return (EINVAL);
2080Sstevel@tonic-gate 	/*
2090Sstevel@tonic-gate 	 * Once reinitialized, we can no longer be holding a read or write lock.
2100Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2110Sstevel@tonic-gate 	 */
2127255Sraf 	sigoff(self);
2134570Sraf 	rwl_entry(rwlp)->rd_count = 0;
2147255Sraf 	sigon(self);
2156515Sraf 	(void) memset(rwlp, 0, sizeof (*rwlp));
2160Sstevel@tonic-gate 	rwlp->rwlock_type = (uint16_t)type;
2170Sstevel@tonic-gate 	rwlp->rwlock_magic = RWL_MAGIC;
2180Sstevel@tonic-gate 	rwlp->mutex.mutex_type = (uint8_t)type;
2190Sstevel@tonic-gate 	rwlp->mutex.mutex_flag = LOCK_INITED;
2200Sstevel@tonic-gate 	rwlp->mutex.mutex_magic = MUTEX_MAGIC;
2217255Sraf 
2227255Sraf 	/*
2237255Sraf 	 * This should be at the beginning of the function,
2247255Sraf 	 * but for the sake of old broken applications that
2257255Sraf 	 * do not have proper alignment for their rwlocks
2267255Sraf 	 * (and don't check the return code from rwlock_init),
2277255Sraf 	 * we put it here, after initializing the rwlock regardless.
2287255Sraf 	 */
2297255Sraf 	if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
2307255Sraf 	    self->ul_misaligned == 0)
2317255Sraf 		return (EINVAL);
2327255Sraf 
2330Sstevel@tonic-gate 	return (0);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate 
2366812Sraf #pragma weak pthread_rwlock_destroy = rwlock_destroy
2376812Sraf #pragma weak _rwlock_destroy = rwlock_destroy
2380Sstevel@tonic-gate int
rwlock_destroy(rwlock_t * rwlp)2396812Sraf rwlock_destroy(rwlock_t *rwlp)
2400Sstevel@tonic-gate {
24110637SRoger.Faulkner@Sun.COM 	ulwp_t *self = curthread;
24210637SRoger.Faulkner@Sun.COM 
2430Sstevel@tonic-gate 	/*
2440Sstevel@tonic-gate 	 * Once destroyed, we can no longer be holding a read or write lock.
2450Sstevel@tonic-gate 	 * We can do nothing about other threads that are holding read locks.
2460Sstevel@tonic-gate 	 */
24710637SRoger.Faulkner@Sun.COM 	sigoff(self);
2484570Sraf 	rwl_entry(rwlp)->rd_count = 0;
24910637SRoger.Faulkner@Sun.COM 	sigon(self);
2500Sstevel@tonic-gate 	rwlp->rwlock_magic = 0;
2510Sstevel@tonic-gate 	tdb_sync_obj_deregister(rwlp);
2520Sstevel@tonic-gate 	return (0);
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate 
2550Sstevel@tonic-gate /*
256*12677SRoger.Faulkner@Oracle.COM  * The following four functions:
257*12677SRoger.Faulkner@Oracle.COM  *	read_lock_try()
258*12677SRoger.Faulkner@Oracle.COM  *	read_unlock_try()
259*12677SRoger.Faulkner@Oracle.COM  *	write_lock_try()
260*12677SRoger.Faulkner@Oracle.COM  *	write_unlock_try()
261*12677SRoger.Faulkner@Oracle.COM  * lie at the heart of the fast-path code for rwlocks,
262*12677SRoger.Faulkner@Oracle.COM  * both process-private and process-shared.
263*12677SRoger.Faulkner@Oracle.COM  *
264*12677SRoger.Faulkner@Oracle.COM  * They are called once without recourse to any other locking primitives.
265*12677SRoger.Faulkner@Oracle.COM  * If they succeed, we are done and the fast-path code was successful.
266*12677SRoger.Faulkner@Oracle.COM  * If they fail, we have to deal with lock queues, either to enqueue
267*12677SRoger.Faulkner@Oracle.COM  * ourself and sleep or to dequeue and wake up someone else (slow paths).
268*12677SRoger.Faulkner@Oracle.COM  *
269*12677SRoger.Faulkner@Oracle.COM  * Unless 'ignore_waiters_flag' is true (a condition that applies only
270*12677SRoger.Faulkner@Oracle.COM  * when read_lock_try() or write_lock_try() is called from code that
271*12677SRoger.Faulkner@Oracle.COM  * is already in the slow path and has already acquired the queue lock),
272*12677SRoger.Faulkner@Oracle.COM  * these functions will always fail if the waiters flag, URW_HAS_WAITERS,
273*12677SRoger.Faulkner@Oracle.COM  * is set in the 'rwstate' word.  Thus, setting the waiters flag on the
274*12677SRoger.Faulkner@Oracle.COM  * rwlock and acquiring the queue lock guarantees exclusive access to
275*12677SRoger.Faulkner@Oracle.COM  * the rwlock (and is the only way to guarantee exclusive access).
276*12677SRoger.Faulkner@Oracle.COM  */
277*12677SRoger.Faulkner@Oracle.COM 
278*12677SRoger.Faulkner@Oracle.COM /*
2794570Sraf  * Attempt to acquire a readers lock.  Return true on success.
2804570Sraf  */
2814570Sraf static int
read_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)2824570Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
2834570Sraf {
2844570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
2854570Sraf 	uint32_t mask = ignore_waiters_flag?
2866247Sraf 	    URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
2874570Sraf 	uint32_t readers;
2884570Sraf 	ulwp_t *self = curthread;
2894570Sraf 
2904570Sraf 	no_preempt(self);
2914570Sraf 	while (((readers = *rwstate) & mask) == 0) {
2924570Sraf 		if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
2934570Sraf 			preempt(self);
2944570Sraf 			return (1);
2954570Sraf 		}
2964570Sraf 	}
2974570Sraf 	preempt(self);
2984570Sraf 	return (0);
2994570Sraf }
3004570Sraf 
3014570Sraf /*
3024570Sraf  * Attempt to release a reader lock.  Return true on success.
3034570Sraf  */
3044570Sraf static int
read_unlock_try(rwlock_t * rwlp)3054570Sraf read_unlock_try(rwlock_t *rwlp)
3064570Sraf {
3074570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3084570Sraf 	uint32_t readers;
3094570Sraf 	ulwp_t *self = curthread;
3104570Sraf 
3114570Sraf 	no_preempt(self);
3124570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
3134570Sraf 		if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
3144570Sraf 			preempt(self);
3154570Sraf 			return (1);
3164570Sraf 		}
3174570Sraf 	}
3184570Sraf 	preempt(self);
3194570Sraf 	return (0);
3204570Sraf }
3214570Sraf 
3224570Sraf /*
3234570Sraf  * Attempt to acquire a writer lock.  Return true on success.
3244570Sraf  */
3254570Sraf static int
write_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)3264570Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
3274570Sraf {
3284570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3294570Sraf 	uint32_t mask = ignore_waiters_flag?
3306247Sraf 	    (URW_WRITE_LOCKED | URW_READERS_MASK) :
3316247Sraf 	    (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
3324570Sraf 	ulwp_t *self = curthread;
3334570Sraf 	uint32_t readers;
3344570Sraf 
3354570Sraf 	no_preempt(self);
3364570Sraf 	while (((readers = *rwstate) & mask) == 0) {
3374570Sraf 		if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
3384570Sraf 		    == readers) {
3394570Sraf 			preempt(self);
3404570Sraf 			return (1);
3414570Sraf 		}
3424570Sraf 	}
3434570Sraf 	preempt(self);
3444570Sraf 	return (0);
3454570Sraf }
3464570Sraf 
3474570Sraf /*
3484570Sraf  * Attempt to release a writer lock.  Return true on success.
3494570Sraf  */
3504570Sraf static int
write_unlock_try(rwlock_t * rwlp)3514570Sraf write_unlock_try(rwlock_t *rwlp)
3524570Sraf {
3534570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3544570Sraf 	uint32_t readers;
3554570Sraf 	ulwp_t *self = curthread;
3564570Sraf 
3574570Sraf 	no_preempt(self);
3584570Sraf 	while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
3594570Sraf 		if (atomic_cas_32(rwstate, readers, 0) == readers) {
3604570Sraf 			preempt(self);
3614570Sraf 			return (1);
3624570Sraf 		}
3634570Sraf 	}
3644570Sraf 	preempt(self);
3654570Sraf 	return (0);
3664570Sraf }
3674570Sraf 
3684570Sraf /*
369*12677SRoger.Faulkner@Oracle.COM  * Release a process-private rwlock and wake up any thread(s) sleeping on it.
3704570Sraf  * This is called when a thread releases a lock that appears to have waiters.
3710Sstevel@tonic-gate  */
372*12677SRoger.Faulkner@Oracle.COM static void
rw_queue_release(rwlock_t * rwlp)373*12677SRoger.Faulkner@Oracle.COM rw_queue_release(rwlock_t *rwlp)
3740Sstevel@tonic-gate {
3754570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
376*12677SRoger.Faulkner@Oracle.COM 	queue_head_t *qp;
3774570Sraf 	uint32_t readers;
378*12677SRoger.Faulkner@Oracle.COM 	uint32_t writer;
3794570Sraf 	ulwp_t **ulwpp;
3800Sstevel@tonic-gate 	ulwp_t *ulwp;
3816247Sraf 	ulwp_t *prev;
3826247Sraf 	int nlwpid = 0;
3836247Sraf 	int more;
3846247Sraf 	int maxlwps = MAXLWPS;
3854570Sraf 	lwpid_t buffer[MAXLWPS];
3864570Sraf 	lwpid_t *lwpid = buffer;
3874570Sraf 
388*12677SRoger.Faulkner@Oracle.COM 	qp = queue_lock(rwlp, MX);
389*12677SRoger.Faulkner@Oracle.COM 
390*12677SRoger.Faulkner@Oracle.COM 	/*
391*12677SRoger.Faulkner@Oracle.COM 	 * Here is where we actually drop the lock,
392*12677SRoger.Faulkner@Oracle.COM 	 * but we retain the URW_HAS_WAITERS flag, if it is already set.
393*12677SRoger.Faulkner@Oracle.COM 	 */
3944570Sraf 	readers = *rwstate;
3954570Sraf 	ASSERT_CONSISTENT_STATE(readers);
396*12677SRoger.Faulkner@Oracle.COM 	if (readers & URW_WRITE_LOCKED)	/* drop the writer lock */
397*12677SRoger.Faulkner@Oracle.COM 		atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
398*12677SRoger.Faulkner@Oracle.COM 	else				/* drop the readers lock */
399*12677SRoger.Faulkner@Oracle.COM 		atomic_dec_32(rwstate);
400*12677SRoger.Faulkner@Oracle.COM 	if (!(readers & URW_HAS_WAITERS)) {	/* no waiters */
4014570Sraf 		queue_unlock(qp);
402*12677SRoger.Faulkner@Oracle.COM 		return;
4034570Sraf 	}
404*12677SRoger.Faulkner@Oracle.COM 
405*12677SRoger.Faulkner@Oracle.COM 	/*
406*12677SRoger.Faulkner@Oracle.COM 	 * The presence of the URW_HAS_WAITERS flag causes all rwlock
407*12677SRoger.Faulkner@Oracle.COM 	 * code to go through the slow path, acquiring queue_lock(qp).
408*12677SRoger.Faulkner@Oracle.COM 	 * Therefore, the rest of this code is safe because we are
409*12677SRoger.Faulkner@Oracle.COM 	 * holding the queue lock and the URW_HAS_WAITERS flag is set.
410*12677SRoger.Faulkner@Oracle.COM 	 */
411*12677SRoger.Faulkner@Oracle.COM 
412*12677SRoger.Faulkner@Oracle.COM 	readers = *rwstate;		/* must fetch the value again */
413*12677SRoger.Faulkner@Oracle.COM 	ASSERT_CONSISTENT_STATE(readers);
414*12677SRoger.Faulkner@Oracle.COM 	ASSERT(readers & URW_HAS_WAITERS);
415*12677SRoger.Faulkner@Oracle.COM 	readers &= URW_READERS_MASK;	/* count of current readers */
416*12677SRoger.Faulkner@Oracle.COM 	writer = 0;			/* no current writer */
4170Sstevel@tonic-gate 
4184570Sraf 	/*
4196247Sraf 	 * Examine the queue of waiters in priority order and prepare
4206247Sraf 	 * to wake up as many readers as we encounter before encountering
4216247Sraf 	 * a writer.  If the highest priority thread on the queue is a
4224570Sraf 	 * writer, stop there and wake it up.
4234570Sraf 	 *
4244570Sraf 	 * We keep track of lwpids that are to be unparked in lwpid[].
4254570Sraf 	 * __lwp_unpark_all() is called to unpark all of them after
4264570Sraf 	 * they have been removed from the sleep queue and the sleep
4274570Sraf 	 * queue lock has been dropped.  If we run out of space in our
4284570Sraf 	 * on-stack buffer, we need to allocate more but we can't call
4294570Sraf 	 * lmalloc() because we are holding a queue lock when the overflow
4304570Sraf 	 * occurs and lmalloc() acquires a lock.  We can't use alloca()
4314570Sraf 	 * either because the application may have allocated a small
4324570Sraf 	 * stack and we don't want to overrun the stack.  So we call
4334570Sraf 	 * alloc_lwpids() to allocate a bigger buffer using the mmap()
4344570Sraf 	 * system call directly since that path acquires no locks.
4354570Sraf 	 */
4366247Sraf 	while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
4376247Sraf 		ulwp = *ulwpp;
4386247Sraf 		ASSERT(ulwp->ul_wchan == rwlp);
4394570Sraf 		if (ulwp->ul_writer) {
440*12677SRoger.Faulkner@Oracle.COM 			if (writer != 0 || readers != 0)
4414570Sraf 				break;
4424570Sraf 			/* one writer to wake */
443*12677SRoger.Faulkner@Oracle.COM 			writer++;
4444570Sraf 		} else {
445*12677SRoger.Faulkner@Oracle.COM 			if (writer != 0)
4464570Sraf 				break;
4474570Sraf 			/* at least one reader to wake */
4484570Sraf 			readers++;
4494570Sraf 			if (nlwpid == maxlwps)
4504570Sraf 				lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
4514570Sraf 		}
4526247Sraf 		queue_unlink(qp, ulwpp, prev);
4536247Sraf 		ulwp->ul_sleepq = NULL;
4546247Sraf 		ulwp->ul_wchan = NULL;
455*12677SRoger.Faulkner@Oracle.COM 		if (writer) {
456*12677SRoger.Faulkner@Oracle.COM 			/*
457*12677SRoger.Faulkner@Oracle.COM 			 * Hand off the lock to the writer we will be waking.
458*12677SRoger.Faulkner@Oracle.COM 			 */
459*12677SRoger.Faulkner@Oracle.COM 			ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0);
460*12677SRoger.Faulkner@Oracle.COM 			atomic_or_32(rwstate, URW_WRITE_LOCKED);
461*12677SRoger.Faulkner@Oracle.COM 			rwlp->rwlock_owner = (uintptr_t)ulwp;
462*12677SRoger.Faulkner@Oracle.COM 		}
4634570Sraf 		lwpid[nlwpid++] = ulwp->ul_lwpid;
4640Sstevel@tonic-gate 	}
465*12677SRoger.Faulkner@Oracle.COM 
466*12677SRoger.Faulkner@Oracle.COM 	/*
467*12677SRoger.Faulkner@Oracle.COM 	 * This modification of rwstate must be done last.
468*12677SRoger.Faulkner@Oracle.COM 	 * The presence of the URW_HAS_WAITERS flag causes all rwlock
469*12677SRoger.Faulkner@Oracle.COM 	 * code to go through the slow path, acquiring queue_lock(qp).
470*12677SRoger.Faulkner@Oracle.COM 	 * Otherwise the read_lock_try() and write_lock_try() fast paths
471*12677SRoger.Faulkner@Oracle.COM 	 * are effective.
472*12677SRoger.Faulkner@Oracle.COM 	 */
4736247Sraf 	if (ulwpp == NULL)
4744570Sraf 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
475*12677SRoger.Faulkner@Oracle.COM 
4764570Sraf 	if (nlwpid == 0) {
4774570Sraf 		queue_unlock(qp);
4784570Sraf 	} else {
4796247Sraf 		ulwp_t *self = curthread;
4804570Sraf 		no_preempt(self);
4814570Sraf 		queue_unlock(qp);
4824570Sraf 		if (nlwpid == 1)
4834570Sraf 			(void) __lwp_unpark(lwpid[0]);
4844570Sraf 		else
4854570Sraf 			(void) __lwp_unpark_all(lwpid, nlwpid);
4864570Sraf 		preempt(self);
4874570Sraf 	}
4884570Sraf 	if (lwpid != buffer)
4896515Sraf 		(void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
4900Sstevel@tonic-gate }
4910Sstevel@tonic-gate 
4920Sstevel@tonic-gate /*
4930Sstevel@tonic-gate  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
4940Sstevel@tonic-gate  * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
4950Sstevel@tonic-gate  *
4960Sstevel@tonic-gate  * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
4970Sstevel@tonic-gate  * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
4980Sstevel@tonic-gate  * released, and if they need to sleep will release the mutex first. In the
4990Sstevel@tonic-gate  * event of a spurious wakeup, these will return EAGAIN (because it is much
5000Sstevel@tonic-gate  * easier for us to re-acquire the mutex here).
5010Sstevel@tonic-gate  */
5020Sstevel@tonic-gate int
shared_rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)5030Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5040Sstevel@tonic-gate {
5054570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
5064570Sraf 	mutex_t *mp = &rwlp->mutex;
5074570Sraf 	uint32_t readers;
5084570Sraf 	int try_flag;
5094570Sraf 	int error;
5104570Sraf 
5114570Sraf 	try_flag = (rd_wr & TRY_FLAG);
5124570Sraf 	rd_wr &= ~TRY_FLAG;
5134570Sraf 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5144570Sraf 
5154570Sraf 	if (!try_flag) {
5164570Sraf 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5174570Sraf 	}
5184570Sraf 
5194570Sraf 	do {
5204570Sraf 		if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
5214570Sraf 			error = EBUSY;
5224570Sraf 			break;
5234570Sraf 		}
5246515Sraf 		if ((error = mutex_lock(mp)) != 0)
5254570Sraf 			break;
5264570Sraf 		if (rd_wr == READ_LOCK) {
5274570Sraf 			if (read_lock_try(rwlp, 0)) {
5286515Sraf 				(void) mutex_unlock(mp);
5294570Sraf 				break;
5304570Sraf 			}
5314570Sraf 		} else {
5324570Sraf 			if (write_lock_try(rwlp, 0)) {
5336515Sraf 				(void) mutex_unlock(mp);
5344570Sraf 				break;
5354570Sraf 			}
5364570Sraf 		}
5374570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
5384570Sraf 		readers = *rwstate;
5394570Sraf 		ASSERT_CONSISTENT_STATE(readers);
5404570Sraf 		/*
5414570Sraf 		 * The calls to __lwp_rwlock_*() below will release the mutex,
5427907SRoger.Faulkner@Sun.COM 		 * so we need a dtrace probe here.  The owner field of the
5437907SRoger.Faulkner@Sun.COM 		 * mutex is cleared in the kernel when the mutex is released,
5447907SRoger.Faulkner@Sun.COM 		 * so we should not clear it here.
5454570Sraf 		 */
5464570Sraf 		DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
5474570Sraf 		/*
5484570Sraf 		 * The waiters bit may be inaccurate.
5494570Sraf 		 * Only the kernel knows for sure.
5504570Sraf 		 */
5514570Sraf 		if (rd_wr == READ_LOCK) {
5524570Sraf 			if (try_flag)
5534570Sraf 				error = __lwp_rwlock_tryrdlock(rwlp);
5544570Sraf 			else
5554570Sraf 				error = __lwp_rwlock_rdlock(rwlp, tsp);
5564570Sraf 		} else {
5574570Sraf 			if (try_flag)
5584570Sraf 				error = __lwp_rwlock_trywrlock(rwlp);
5594570Sraf 			else
5604570Sraf 				error = __lwp_rwlock_wrlock(rwlp, tsp);
5614570Sraf 		}
5624570Sraf 	} while (error == EAGAIN || error == EINTR);
5634570Sraf 
5644570Sraf 	if (!try_flag) {
5654570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
5664570Sraf 	}
5674570Sraf 
5684570Sraf 	return (error);
5694570Sraf }
5704570Sraf 
5714570Sraf /*
5724570Sraf  * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
5734570Sraf  * and trywrlock for process-private (USYNC_THREAD) rwlocks.
5744570Sraf  */
5754570Sraf int
rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)5764570Sraf rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5774570Sraf {
5784570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
5794570Sraf 	uint32_t readers;
5800Sstevel@tonic-gate 	ulwp_t *self = curthread;
5814570Sraf 	queue_head_t *qp;
5824570Sraf 	ulwp_t *ulwp;
5830Sstevel@tonic-gate 	int try_flag;
5846247Sraf 	int ignore_waiters_flag;
5850Sstevel@tonic-gate 	int error = 0;
5860Sstevel@tonic-gate 
5870Sstevel@tonic-gate 	try_flag = (rd_wr & TRY_FLAG);
5880Sstevel@tonic-gate 	rd_wr &= ~TRY_FLAG;
5890Sstevel@tonic-gate 	ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 	if (!try_flag) {
5920Sstevel@tonic-gate 		DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5930Sstevel@tonic-gate 	}
5940Sstevel@tonic-gate 
5954570Sraf 	qp = queue_lock(rwlp, MX);
5966247Sraf 	/* initial attempt to acquire the lock fails if there are waiters */
5976247Sraf 	ignore_waiters_flag = 0;
5984570Sraf 	while (error == 0) {
5990Sstevel@tonic-gate 		if (rd_wr == READ_LOCK) {
6006247Sraf 			if (read_lock_try(rwlp, ignore_waiters_flag))
6016247Sraf 				break;
6020Sstevel@tonic-gate 		} else {
6036247Sraf 			if (write_lock_try(rwlp, ignore_waiters_flag))
6046247Sraf 				break;
6050Sstevel@tonic-gate 		}
6066247Sraf 		/* subsequent attempts do not fail due to waiters */
6076247Sraf 		ignore_waiters_flag = 1;
6084570Sraf 		atomic_or_32(rwstate, URW_HAS_WAITERS);
6094570Sraf 		readers = *rwstate;
6104570Sraf 		ASSERT_CONSISTENT_STATE(readers);
6114570Sraf 		if ((readers & URW_WRITE_LOCKED) ||
6124570Sraf 		    (rd_wr == WRITE_LOCK &&
6134570Sraf 		    (readers & URW_READERS_MASK) != 0))
6140Sstevel@tonic-gate 			/* EMPTY */;	/* somebody holds the lock */
6156247Sraf 		else if ((ulwp = queue_waiter(qp)) == NULL) {
6164570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
617*12677SRoger.Faulkner@Oracle.COM 			ignore_waiters_flag = 0;
618*12677SRoger.Faulkner@Oracle.COM 			continue;	/* no queued waiters, start over */
6190Sstevel@tonic-gate 		} else {
6206247Sraf 			/*
6216247Sraf 			 * Do a priority check on the queued waiter (the
6226247Sraf 			 * highest priority thread on the queue) to see
6236247Sraf 			 * if we should defer to him or just grab the lock.
6246247Sraf 			 */
6250Sstevel@tonic-gate 			int our_pri = real_priority(self);
6260Sstevel@tonic-gate 			int his_pri = real_priority(ulwp);
6270Sstevel@tonic-gate 
6280Sstevel@tonic-gate 			if (rd_wr == WRITE_LOCK) {
6290Sstevel@tonic-gate 				/*
6300Sstevel@tonic-gate 				 * We defer to a queued thread that has
6310Sstevel@tonic-gate 				 * a higher priority than ours.
6320Sstevel@tonic-gate 				 */
633*12677SRoger.Faulkner@Oracle.COM 				if (his_pri <= our_pri) {
634*12677SRoger.Faulkner@Oracle.COM 					/*
635*12677SRoger.Faulkner@Oracle.COM 					 * Don't defer, just grab the lock.
636*12677SRoger.Faulkner@Oracle.COM 					 */
637*12677SRoger.Faulkner@Oracle.COM 					continue;
638*12677SRoger.Faulkner@Oracle.COM 				}
6390Sstevel@tonic-gate 			} else {
6400Sstevel@tonic-gate 				/*
6410Sstevel@tonic-gate 				 * We defer to a queued thread that has
6420Sstevel@tonic-gate 				 * a higher priority than ours or that
6430Sstevel@tonic-gate 				 * is a writer whose priority equals ours.
6440Sstevel@tonic-gate 				 */
6450Sstevel@tonic-gate 				if (his_pri < our_pri ||
646*12677SRoger.Faulkner@Oracle.COM 				    (his_pri == our_pri && !ulwp->ul_writer)) {
647*12677SRoger.Faulkner@Oracle.COM 					/*
648*12677SRoger.Faulkner@Oracle.COM 					 * Don't defer, just grab the lock.
649*12677SRoger.Faulkner@Oracle.COM 					 */
650*12677SRoger.Faulkner@Oracle.COM 					continue;
651*12677SRoger.Faulkner@Oracle.COM 				}
6520Sstevel@tonic-gate 			}
6530Sstevel@tonic-gate 		}
6540Sstevel@tonic-gate 		/*
6550Sstevel@tonic-gate 		 * We are about to block.
6560Sstevel@tonic-gate 		 * If we're doing a trylock, return EBUSY instead.
6570Sstevel@tonic-gate 		 */
6580Sstevel@tonic-gate 		if (try_flag) {
6590Sstevel@tonic-gate 			error = EBUSY;
6600Sstevel@tonic-gate 			break;
6610Sstevel@tonic-gate 		}
6620Sstevel@tonic-gate 		/*
6636247Sraf 		 * Enqueue writers ahead of readers.
6640Sstevel@tonic-gate 		 */
6650Sstevel@tonic-gate 		self->ul_writer = rd_wr;	/* *must* be 0 or 1 */
6666247Sraf 		enqueue(qp, self, 0);
6670Sstevel@tonic-gate 		set_parking_flag(self, 1);
6680Sstevel@tonic-gate 		queue_unlock(qp);
6690Sstevel@tonic-gate 		if ((error = __lwp_park(tsp, 0)) == EINTR)
670*12677SRoger.Faulkner@Oracle.COM 			error = 0;
6710Sstevel@tonic-gate 		set_parking_flag(self, 0);
6720Sstevel@tonic-gate 		qp = queue_lock(rwlp, MX);
673*12677SRoger.Faulkner@Oracle.COM 		if (self->ul_sleepq && dequeue_self(qp) == 0) {
6744570Sraf 			atomic_and_32(rwstate, ~URW_HAS_WAITERS);
675*12677SRoger.Faulkner@Oracle.COM 			ignore_waiters_flag = 0;
676*12677SRoger.Faulkner@Oracle.COM 		}
6776247Sraf 		self->ul_writer = 0;
678*12677SRoger.Faulkner@Oracle.COM 		if (rd_wr == WRITE_LOCK &&
679*12677SRoger.Faulkner@Oracle.COM 		    (*rwstate & URW_WRITE_LOCKED) &&
680*12677SRoger.Faulkner@Oracle.COM 		    rwlp->rwlock_owner == (uintptr_t)self) {
681*12677SRoger.Faulkner@Oracle.COM 			/*
682*12677SRoger.Faulkner@Oracle.COM 			 * We acquired the lock by hand-off
683*12677SRoger.Faulkner@Oracle.COM 			 * from the previous owner,
684*12677SRoger.Faulkner@Oracle.COM 			 */
685*12677SRoger.Faulkner@Oracle.COM 			error = 0;	/* timedlock did not fail */
686*12677SRoger.Faulkner@Oracle.COM 			break;
687*12677SRoger.Faulkner@Oracle.COM 		}
6880Sstevel@tonic-gate 	}
6890Sstevel@tonic-gate 
690*12677SRoger.Faulkner@Oracle.COM 	/*
691*12677SRoger.Faulkner@Oracle.COM 	 * Make one final check to see if there are any threads left
692*12677SRoger.Faulkner@Oracle.COM 	 * on the rwlock queue.  Clear the URW_HAS_WAITERS flag if not.
693*12677SRoger.Faulkner@Oracle.COM 	 */
694*12677SRoger.Faulkner@Oracle.COM 	if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL)
695*12677SRoger.Faulkner@Oracle.COM 		atomic_and_32(rwstate, ~URW_HAS_WAITERS);
696*12677SRoger.Faulkner@Oracle.COM 
6974570Sraf 	queue_unlock(qp);
6984570Sraf 
6994570Sraf 	if (!try_flag) {
7004570Sraf 		DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
7014570Sraf 	}
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 	return (error);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate 
7060Sstevel@tonic-gate int
rw_rdlock_impl(rwlock_t * rwlp,timespec_t * tsp)7070Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
7080Sstevel@tonic-gate {
7090Sstevel@tonic-gate 	ulwp_t *self = curthread;
7100Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
7110Sstevel@tonic-gate 	readlock_t *readlockp;
7120Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
7130Sstevel@tonic-gate 	int error;
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate 	/*
7160Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
7170Sstevel@tonic-gate 	 * just increment our reference count and return.
7180Sstevel@tonic-gate 	 */
7194570Sraf 	sigoff(self);
7200Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
7210Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
7224570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
7234570Sraf 			sigon(self);
7244570Sraf 			error = EAGAIN;
7254570Sraf 			goto out;
7264570Sraf 		}
7274570Sraf 		sigon(self);
7284570Sraf 		error = 0;
7294570Sraf 		goto out;
7300Sstevel@tonic-gate 	}
7314570Sraf 	sigon(self);
7320Sstevel@tonic-gate 
7330Sstevel@tonic-gate 	/*
7340Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
7350Sstevel@tonic-gate 	 */
7366812Sraf 	if (rw_write_held(rwlp)) {
7370Sstevel@tonic-gate 		if (self->ul_error_detection)
7380Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_rdlock",
7390Sstevel@tonic-gate 			    "calling thread owns the writer lock");
7404570Sraf 		error = EDEADLK;
7414570Sraf 		goto out;
7420Sstevel@tonic-gate 	}
7430Sstevel@tonic-gate 
7444570Sraf 	if (read_lock_try(rwlp, 0))
7454570Sraf 		error = 0;
7464570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
7470Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
7480Sstevel@tonic-gate 	else						/* user-level */
7490Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, READ_LOCK);
7500Sstevel@tonic-gate 
7514570Sraf out:
7520Sstevel@tonic-gate 	if (error == 0) {
7534570Sraf 		sigoff(self);
7544570Sraf 		rwl_entry(rwlp)->rd_count++;
7554570Sraf 		sigon(self);
7560Sstevel@tonic-gate 		if (rwsp)
7570Sstevel@tonic-gate 			tdb_incr(rwsp->rw_rdlock);
7584570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
7594570Sraf 	} else {
7604570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
7610Sstevel@tonic-gate 	}
7620Sstevel@tonic-gate 
7630Sstevel@tonic-gate 	return (error);
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate 
7666812Sraf #pragma weak pthread_rwlock_rdlock = rw_rdlock
7676812Sraf #pragma weak _rw_rdlock = rw_rdlock
7680Sstevel@tonic-gate int
rw_rdlock(rwlock_t * rwlp)7696812Sraf rw_rdlock(rwlock_t *rwlp)
7700Sstevel@tonic-gate {
7710Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7720Sstevel@tonic-gate 	return (rw_rdlock_impl(rwlp, NULL));
7730Sstevel@tonic-gate }
7740Sstevel@tonic-gate 
7750Sstevel@tonic-gate void
lrw_rdlock(rwlock_t * rwlp)7760Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp)
7770Sstevel@tonic-gate {
7780Sstevel@tonic-gate 	enter_critical(curthread);
7790Sstevel@tonic-gate 	(void) rw_rdlock_impl(rwlp, NULL);
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate 
7820Sstevel@tonic-gate int
pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)7836812Sraf pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7846812Sraf     const struct timespec *_RESTRICT_KYWD reltime)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
7870Sstevel@tonic-gate 	int error;
7880Sstevel@tonic-gate 
7890Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7906812Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7910Sstevel@tonic-gate 	if (error == ETIME)
7920Sstevel@tonic-gate 		error = ETIMEDOUT;
7930Sstevel@tonic-gate 	return (error);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate 
7960Sstevel@tonic-gate int
pthread_rwlock_timedrdlock(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD abstime)7976812Sraf pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7986812Sraf     const struct timespec *_RESTRICT_KYWD abstime)
7990Sstevel@tonic-gate {
8000Sstevel@tonic-gate 	timespec_t tslocal;
8010Sstevel@tonic-gate 	int error;
8020Sstevel@tonic-gate 
8030Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8040Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
8056812Sraf 	error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
8060Sstevel@tonic-gate 	if (error == ETIME)
8070Sstevel@tonic-gate 		error = ETIMEDOUT;
8080Sstevel@tonic-gate 	return (error);
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate 
8110Sstevel@tonic-gate int
rw_wrlock_impl(rwlock_t * rwlp,timespec_t * tsp)8120Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
8130Sstevel@tonic-gate {
8140Sstevel@tonic-gate 	ulwp_t *self = curthread;
8150Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
8160Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8170Sstevel@tonic-gate 	int error;
8180Sstevel@tonic-gate 
8190Sstevel@tonic-gate 	/*
8200Sstevel@tonic-gate 	 * If we hold a readers lock on this rwlock, bail out.
8210Sstevel@tonic-gate 	 */
8226812Sraf 	if (rw_read_held(rwlp)) {
8230Sstevel@tonic-gate 		if (self->ul_error_detection)
8240Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
8250Sstevel@tonic-gate 			    "calling thread owns the readers lock");
8264570Sraf 		error = EDEADLK;
8274570Sraf 		goto out;
8280Sstevel@tonic-gate 	}
8290Sstevel@tonic-gate 
8300Sstevel@tonic-gate 	/*
8310Sstevel@tonic-gate 	 * If we hold the writer lock, bail out.
8320Sstevel@tonic-gate 	 */
8336812Sraf 	if (rw_write_held(rwlp)) {
8340Sstevel@tonic-gate 		if (self->ul_error_detection)
8350Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_wrlock",
8360Sstevel@tonic-gate 			    "calling thread owns the writer lock");
8374570Sraf 		error = EDEADLK;
8384570Sraf 		goto out;
8390Sstevel@tonic-gate 	}
8400Sstevel@tonic-gate 
8414570Sraf 	if (write_lock_try(rwlp, 0))
8424570Sraf 		error = 0;
8434570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
8440Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
8454570Sraf 	else						/* user-level */
8460Sstevel@tonic-gate 		error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
8470Sstevel@tonic-gate 
8484570Sraf out:
8494570Sraf 	if (error == 0) {
8504570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
8514570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
8524570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
8534570Sraf 		if (rwsp) {
8544570Sraf 			tdb_incr(rwsp->rw_wrlock);
8554570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
8564570Sraf 		}
8574570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
8584570Sraf 	} else {
8594570Sraf 		DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
8600Sstevel@tonic-gate 	}
8610Sstevel@tonic-gate 	return (error);
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate 
8646812Sraf #pragma weak pthread_rwlock_wrlock = rw_wrlock
8656812Sraf #pragma weak _rw_wrlock = rw_wrlock
8660Sstevel@tonic-gate int
rw_wrlock(rwlock_t * rwlp)8676812Sraf rw_wrlock(rwlock_t *rwlp)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8700Sstevel@tonic-gate 	return (rw_wrlock_impl(rwlp, NULL));
8710Sstevel@tonic-gate }
8720Sstevel@tonic-gate 
8730Sstevel@tonic-gate void
lrw_wrlock(rwlock_t * rwlp)8740Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp)
8750Sstevel@tonic-gate {
8760Sstevel@tonic-gate 	enter_critical(curthread);
8770Sstevel@tonic-gate 	(void) rw_wrlock_impl(rwlp, NULL);
8780Sstevel@tonic-gate }
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate int
pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)8816812Sraf pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
8826812Sraf     const struct timespec *_RESTRICT_KYWD reltime)
8830Sstevel@tonic-gate {
8840Sstevel@tonic-gate 	timespec_t tslocal = *reltime;
8850Sstevel@tonic-gate 	int error;
8860Sstevel@tonic-gate 
8870Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8886812Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8890Sstevel@tonic-gate 	if (error == ETIME)
8900Sstevel@tonic-gate 		error = ETIMEDOUT;
8910Sstevel@tonic-gate 	return (error);
8920Sstevel@tonic-gate }
8930Sstevel@tonic-gate 
8940Sstevel@tonic-gate int
pthread_rwlock_timedwrlock(pthread_rwlock_t * rwlp,const timespec_t * abstime)8956812Sraf pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
8960Sstevel@tonic-gate {
8970Sstevel@tonic-gate 	timespec_t tslocal;
8980Sstevel@tonic-gate 	int error;
8990Sstevel@tonic-gate 
9000Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
9010Sstevel@tonic-gate 	abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
9026812Sraf 	error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
9030Sstevel@tonic-gate 	if (error == ETIME)
9040Sstevel@tonic-gate 		error = ETIMEDOUT;
9050Sstevel@tonic-gate 	return (error);
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate 
9086812Sraf #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
9090Sstevel@tonic-gate int
rw_tryrdlock(rwlock_t * rwlp)9106812Sraf rw_tryrdlock(rwlock_t *rwlp)
9110Sstevel@tonic-gate {
9120Sstevel@tonic-gate 	ulwp_t *self = curthread;
9130Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
9140Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
9150Sstevel@tonic-gate 	readlock_t *readlockp;
9160Sstevel@tonic-gate 	int error;
9170Sstevel@tonic-gate 
9180Sstevel@tonic-gate 	ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
9190Sstevel@tonic-gate 
9200Sstevel@tonic-gate 	if (rwsp)
9210Sstevel@tonic-gate 		tdb_incr(rwsp->rw_rdlock_try);
9220Sstevel@tonic-gate 
9230Sstevel@tonic-gate 	/*
9240Sstevel@tonic-gate 	 * If we already hold a readers lock on this rwlock,
9250Sstevel@tonic-gate 	 * just increment our reference count and return.
9260Sstevel@tonic-gate 	 */
9274570Sraf 	sigoff(self);
9280Sstevel@tonic-gate 	readlockp = rwl_entry(rwlp);
9290Sstevel@tonic-gate 	if (readlockp->rd_count != 0) {
9304570Sraf 		if (readlockp->rd_count == READ_LOCK_MAX) {
9314570Sraf 			sigon(self);
9324570Sraf 			error = EAGAIN;
9334570Sraf 			goto out;
9344570Sraf 		}
9354570Sraf 		sigon(self);
9364570Sraf 		error = 0;
9374570Sraf 		goto out;
9380Sstevel@tonic-gate 	}
9394570Sraf 	sigon(self);
9400Sstevel@tonic-gate 
9414570Sraf 	if (read_lock_try(rwlp, 0))
9424570Sraf 		error = 0;
9434570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
9440Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
9450Sstevel@tonic-gate 	else						/* user-level */
9460Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
9470Sstevel@tonic-gate 
9484570Sraf out:
9494570Sraf 	if (error == 0) {
9504570Sraf 		sigoff(self);
9514570Sraf 		rwl_entry(rwlp)->rd_count++;
9524570Sraf 		sigon(self);
9534570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
9544570Sraf 	} else {
9554570Sraf 		if (rwsp)
9564570Sraf 			tdb_incr(rwsp->rw_rdlock_try_fail);
9574570Sraf 		if (error != EBUSY) {
9584570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
9594570Sraf 			    error);
9604570Sraf 		}
9614570Sraf 	}
9620Sstevel@tonic-gate 
9630Sstevel@tonic-gate 	return (error);
9640Sstevel@tonic-gate }
9650Sstevel@tonic-gate 
9666812Sraf #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
9670Sstevel@tonic-gate int
rw_trywrlock(rwlock_t * rwlp)9686812Sraf rw_trywrlock(rwlock_t *rwlp)
9690Sstevel@tonic-gate {
9700Sstevel@tonic-gate 	ulwp_t *self = curthread;
9710Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
9720Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
9730Sstevel@tonic-gate 	int error;
9740Sstevel@tonic-gate 
9754570Sraf 	ASSERT(!self->ul_critical || self->ul_bindflags);
9760Sstevel@tonic-gate 
9770Sstevel@tonic-gate 	if (rwsp)
9780Sstevel@tonic-gate 		tdb_incr(rwsp->rw_wrlock_try);
9790Sstevel@tonic-gate 
9804570Sraf 	if (write_lock_try(rwlp, 0))
9814570Sraf 		error = 0;
9824570Sraf 	else if (rwlp->rwlock_type == USYNC_PROCESS)	/* kernel-level */
9830Sstevel@tonic-gate 		error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
9844570Sraf 	else						/* user-level */
9850Sstevel@tonic-gate 		error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
9864570Sraf 
9874570Sraf 	if (error == 0) {
9884570Sraf 		rwlp->rwlock_owner = (uintptr_t)self;
9894570Sraf 		if (rwlp->rwlock_type == USYNC_PROCESS)
9904570Sraf 			rwlp->rwlock_ownerpid = udp->pid;
9914570Sraf 		if (rwsp)
9924570Sraf 			rwsp->rw_wrlock_begin_hold = gethrtime();
9934570Sraf 		DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
9944570Sraf 	} else {
9954570Sraf 		if (rwsp)
9960Sstevel@tonic-gate 			tdb_incr(rwsp->rw_wrlock_try_fail);
9974570Sraf 		if (error != EBUSY) {
9984570Sraf 			DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
9994570Sraf 			    error);
10004570Sraf 		}
10010Sstevel@tonic-gate 	}
10020Sstevel@tonic-gate 	return (error);
10030Sstevel@tonic-gate }
10040Sstevel@tonic-gate 
10056812Sraf #pragma weak pthread_rwlock_unlock = rw_unlock
10066812Sraf #pragma weak _rw_unlock = rw_unlock
10070Sstevel@tonic-gate int
rw_unlock(rwlock_t * rwlp)10086812Sraf rw_unlock(rwlock_t *rwlp)
10090Sstevel@tonic-gate {
10104570Sraf 	volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
10114570Sraf 	uint32_t readers;
10120Sstevel@tonic-gate 	ulwp_t *self = curthread;
10130Sstevel@tonic-gate 	uberdata_t *udp = self->ul_uberdata;
10140Sstevel@tonic-gate 	tdb_rwlock_stats_t *rwsp;
10154570Sraf 	int rd_wr;
10160Sstevel@tonic-gate 
10174570Sraf 	readers = *rwstate;
10184570Sraf 	ASSERT_CONSISTENT_STATE(readers);
10194570Sraf 	if (readers & URW_WRITE_LOCKED) {
10204570Sraf 		rd_wr = WRITE_LOCK;
10214570Sraf 		readers = 0;
10224570Sraf 	} else {
10234570Sraf 		rd_wr = READ_LOCK;
10244570Sraf 		readers &= URW_READERS_MASK;
10250Sstevel@tonic-gate 	}
10260Sstevel@tonic-gate 
10274570Sraf 	if (rd_wr == WRITE_LOCK) {
10280Sstevel@tonic-gate 		/*
10290Sstevel@tonic-gate 		 * Since the writer lock is held, we'd better be
10300Sstevel@tonic-gate 		 * holding it, else we cannot legitimately be here.
10310Sstevel@tonic-gate 		 */
10326812Sraf 		if (!rw_write_held(rwlp)) {
10330Sstevel@tonic-gate 			if (self->ul_error_detection)
10340Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
10350Sstevel@tonic-gate 				    "writer lock held, "
10360Sstevel@tonic-gate 				    "but not by the calling thread");
10370Sstevel@tonic-gate 			return (EPERM);
10380Sstevel@tonic-gate 		}
10390Sstevel@tonic-gate 		if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
10400Sstevel@tonic-gate 			if (rwsp->rw_wrlock_begin_hold)
10410Sstevel@tonic-gate 				rwsp->rw_wrlock_hold_time +=
10420Sstevel@tonic-gate 				    gethrtime() - rwsp->rw_wrlock_begin_hold;
10430Sstevel@tonic-gate 			rwsp->rw_wrlock_begin_hold = 0;
10440Sstevel@tonic-gate 		}
10454570Sraf 		rwlp->rwlock_owner = 0;
10464570Sraf 		rwlp->rwlock_ownerpid = 0;
10474570Sraf 	} else if (readers > 0) {
10480Sstevel@tonic-gate 		/*
10490Sstevel@tonic-gate 		 * A readers lock is held; if we don't hold one, bail out.
10500Sstevel@tonic-gate 		 */
10514570Sraf 		readlock_t *readlockp;
10524570Sraf 
10534570Sraf 		sigoff(self);
10544570Sraf 		readlockp = rwl_entry(rwlp);
10550Sstevel@tonic-gate 		if (readlockp->rd_count == 0) {
10564570Sraf 			sigon(self);
10570Sstevel@tonic-gate 			if (self->ul_error_detection)
10580Sstevel@tonic-gate 				rwlock_error(rwlp, "rwlock_unlock",
10590Sstevel@tonic-gate 				    "readers lock held, "
10600Sstevel@tonic-gate 				    "but not by the calling thread");
10610Sstevel@tonic-gate 			return (EPERM);
10620Sstevel@tonic-gate 		}
10630Sstevel@tonic-gate 		/*
10640Sstevel@tonic-gate 		 * If we hold more than one readers lock on this rwlock,
10650Sstevel@tonic-gate 		 * just decrement our reference count and return.
10660Sstevel@tonic-gate 		 */
10670Sstevel@tonic-gate 		if (--readlockp->rd_count != 0) {
10684570Sraf 			sigon(self);
10694570Sraf 			goto out;
10700Sstevel@tonic-gate 		}
10714570Sraf 		sigon(self);
10720Sstevel@tonic-gate 	} else {
10730Sstevel@tonic-gate 		/*
10740Sstevel@tonic-gate 		 * This is a usage error.
10750Sstevel@tonic-gate 		 * No thread should release an unowned lock.
10760Sstevel@tonic-gate 		 */
10770Sstevel@tonic-gate 		if (self->ul_error_detection)
10780Sstevel@tonic-gate 			rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
10790Sstevel@tonic-gate 		return (EPERM);
10800Sstevel@tonic-gate 	}
10810Sstevel@tonic-gate 
10824570Sraf 	if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
10834570Sraf 		/* EMPTY */;
10844570Sraf 	} else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
10854570Sraf 		/* EMPTY */;
10864570Sraf 	} else if (rwlp->rwlock_type == USYNC_PROCESS) {
10876515Sraf 		(void) mutex_lock(&rwlp->mutex);
10884570Sraf 		(void) __lwp_rwlock_unlock(rwlp);
10896515Sraf 		(void) mutex_unlock(&rwlp->mutex);
10904570Sraf 	} else {
1091*12677SRoger.Faulkner@Oracle.COM 		rw_queue_release(rwlp);
10920Sstevel@tonic-gate 	}
10930Sstevel@tonic-gate 
10944570Sraf out:
10954570Sraf 	DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
10960Sstevel@tonic-gate 	return (0);
10970Sstevel@tonic-gate }
10980Sstevel@tonic-gate 
10990Sstevel@tonic-gate void
lrw_unlock(rwlock_t * rwlp)11000Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp)
11010Sstevel@tonic-gate {
11026812Sraf 	(void) rw_unlock(rwlp);
11030Sstevel@tonic-gate 	exit_critical(curthread);
11040Sstevel@tonic-gate }
1105