10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
54570Sraf * Common Development and Distribution License (the "License").
64570Sraf * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
214570Sraf
220Sstevel@tonic-gate /*
23*12677SRoger.Faulkner@Oracle.COM * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate #include "lint.h"
270Sstevel@tonic-gate #include "thr_uberdata.h"
280Sstevel@tonic-gate #include <sys/sdt.h>
290Sstevel@tonic-gate
300Sstevel@tonic-gate #define TRY_FLAG 0x10
310Sstevel@tonic-gate #define READ_LOCK 0
320Sstevel@tonic-gate #define WRITE_LOCK 1
330Sstevel@tonic-gate #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG)
340Sstevel@tonic-gate #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG)
350Sstevel@tonic-gate
360Sstevel@tonic-gate #define NLOCKS 4 /* initial number of readlock_t structs allocated */
370Sstevel@tonic-gate
384570Sraf #define ASSERT_CONSISTENT_STATE(readers) \
394570Sraf ASSERT(!((readers) & URW_WRITE_LOCKED) || \
404570Sraf ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED)
414570Sraf
420Sstevel@tonic-gate /*
430Sstevel@tonic-gate * Find/allocate an entry for rwlp in our array of rwlocks held for reading.
444570Sraf * We must be deferring signals for this to be safe.
454574Sraf * Else if we are returning an entry with ul_rdlockcnt == 0,
464570Sraf * it could be reassigned behind our back in a signal handler.
470Sstevel@tonic-gate */
480Sstevel@tonic-gate static readlock_t *
rwl_entry(rwlock_t * rwlp)490Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp)
500Sstevel@tonic-gate {
510Sstevel@tonic-gate ulwp_t *self = curthread;
520Sstevel@tonic-gate readlock_t *remembered = NULL;
530Sstevel@tonic-gate readlock_t *readlockp;
540Sstevel@tonic-gate uint_t nlocks;
550Sstevel@tonic-gate
564570Sraf /* we must be deferring signals */
574570Sraf ASSERT((self->ul_critical + self->ul_sigdefer) != 0);
584570Sraf
594574Sraf if ((nlocks = self->ul_rdlockcnt) != 0)
600Sstevel@tonic-gate readlockp = self->ul_readlock.array;
610Sstevel@tonic-gate else {
620Sstevel@tonic-gate nlocks = 1;
630Sstevel@tonic-gate readlockp = &self->ul_readlock.single;
640Sstevel@tonic-gate }
650Sstevel@tonic-gate
660Sstevel@tonic-gate for (; nlocks; nlocks--, readlockp++) {
670Sstevel@tonic-gate if (readlockp->rd_rwlock == rwlp)
680Sstevel@tonic-gate return (readlockp);
690Sstevel@tonic-gate if (readlockp->rd_count == 0 && remembered == NULL)
700Sstevel@tonic-gate remembered = readlockp;
710Sstevel@tonic-gate }
720Sstevel@tonic-gate if (remembered != NULL) {
730Sstevel@tonic-gate remembered->rd_rwlock = rwlp;
740Sstevel@tonic-gate return (remembered);
750Sstevel@tonic-gate }
760Sstevel@tonic-gate
770Sstevel@tonic-gate /*
780Sstevel@tonic-gate * No entry available. Allocate more space, converting the single
790Sstevel@tonic-gate * readlock_t entry into an array of readlock_t entries if necessary.
800Sstevel@tonic-gate */
814574Sraf if ((nlocks = self->ul_rdlockcnt) == 0) {
820Sstevel@tonic-gate /*
830Sstevel@tonic-gate * Initial allocation of the readlock_t array.
840Sstevel@tonic-gate * Convert the single entry into an array.
850Sstevel@tonic-gate */
864574Sraf self->ul_rdlockcnt = nlocks = NLOCKS;
870Sstevel@tonic-gate readlockp = lmalloc(nlocks * sizeof (readlock_t));
880Sstevel@tonic-gate /*
890Sstevel@tonic-gate * The single readlock_t becomes the first entry in the array.
900Sstevel@tonic-gate */
910Sstevel@tonic-gate *readlockp = self->ul_readlock.single;
920Sstevel@tonic-gate self->ul_readlock.single.rd_count = 0;
930Sstevel@tonic-gate self->ul_readlock.array = readlockp;
940Sstevel@tonic-gate /*
950Sstevel@tonic-gate * Return the next available entry in the array.
960Sstevel@tonic-gate */
970Sstevel@tonic-gate (++readlockp)->rd_rwlock = rwlp;
980Sstevel@tonic-gate return (readlockp);
990Sstevel@tonic-gate }
1000Sstevel@tonic-gate /*
1010Sstevel@tonic-gate * Reallocate the array, double the size each time.
1020Sstevel@tonic-gate */
1030Sstevel@tonic-gate readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t));
1046515Sraf (void) memcpy(readlockp, self->ul_readlock.array,
1056247Sraf nlocks * sizeof (readlock_t));
1060Sstevel@tonic-gate lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t));
1070Sstevel@tonic-gate self->ul_readlock.array = readlockp;
1084574Sraf self->ul_rdlockcnt *= 2;
1090Sstevel@tonic-gate /*
1100Sstevel@tonic-gate * Return the next available entry in the newly allocated array.
1110Sstevel@tonic-gate */
1120Sstevel@tonic-gate (readlockp += nlocks)->rd_rwlock = rwlp;
1130Sstevel@tonic-gate return (readlockp);
1140Sstevel@tonic-gate }
1150Sstevel@tonic-gate
1160Sstevel@tonic-gate /*
1170Sstevel@tonic-gate * Free the array of rwlocks held for reading.
1180Sstevel@tonic-gate */
1190Sstevel@tonic-gate void
rwl_free(ulwp_t * ulwp)1200Sstevel@tonic-gate rwl_free(ulwp_t *ulwp)
1210Sstevel@tonic-gate {
1220Sstevel@tonic-gate uint_t nlocks;
1230Sstevel@tonic-gate
1244574Sraf if ((nlocks = ulwp->ul_rdlockcnt) != 0)
1250Sstevel@tonic-gate lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t));
1264574Sraf ulwp->ul_rdlockcnt = 0;
1270Sstevel@tonic-gate ulwp->ul_readlock.single.rd_rwlock = NULL;
1280Sstevel@tonic-gate ulwp->ul_readlock.single.rd_count = 0;
1290Sstevel@tonic-gate }
1300Sstevel@tonic-gate
1310Sstevel@tonic-gate /*
1320Sstevel@tonic-gate * Check if a reader version of the lock is held by the current thread.
1330Sstevel@tonic-gate */
1346812Sraf #pragma weak _rw_read_held = rw_read_held
1350Sstevel@tonic-gate int
rw_read_held(rwlock_t * rwlp)1366812Sraf rw_read_held(rwlock_t *rwlp)
1370Sstevel@tonic-gate {
1384570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1394570Sraf uint32_t readers;
1404570Sraf ulwp_t *self = curthread;
1410Sstevel@tonic-gate readlock_t *readlockp;
1420Sstevel@tonic-gate uint_t nlocks;
1434570Sraf int rval = 0;
1440Sstevel@tonic-gate
1454570Sraf no_preempt(self);
1464570Sraf
1474570Sraf readers = *rwstate;
1484570Sraf ASSERT_CONSISTENT_STATE(readers);
1494570Sraf if (!(readers & URW_WRITE_LOCKED) &&
1504570Sraf (readers & URW_READERS_MASK) != 0) {
1514570Sraf /*
1524570Sraf * The lock is held for reading by some thread.
1534570Sraf * Search our array of rwlocks held for reading for a match.
1544570Sraf */
1554574Sraf if ((nlocks = self->ul_rdlockcnt) != 0)
1564570Sraf readlockp = self->ul_readlock.array;
1574570Sraf else {
1584570Sraf nlocks = 1;
1594570Sraf readlockp = &self->ul_readlock.single;
1604570Sraf }
1614570Sraf for (; nlocks; nlocks--, readlockp++) {
1624570Sraf if (readlockp->rd_rwlock == rwlp) {
1634570Sraf if (readlockp->rd_count)
1644570Sraf rval = 1;
1654570Sraf break;
1664570Sraf }
1674570Sraf }
1680Sstevel@tonic-gate }
1690Sstevel@tonic-gate
1704570Sraf preempt(self);
1714570Sraf return (rval);
1720Sstevel@tonic-gate }
1730Sstevel@tonic-gate
1740Sstevel@tonic-gate /*
1750Sstevel@tonic-gate * Check if a writer version of the lock is held by the current thread.
1760Sstevel@tonic-gate */
1776812Sraf #pragma weak _rw_write_held = rw_write_held
1780Sstevel@tonic-gate int
rw_write_held(rwlock_t * rwlp)1796812Sraf rw_write_held(rwlock_t *rwlp)
1800Sstevel@tonic-gate {
1814570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
1824570Sraf uint32_t readers;
1830Sstevel@tonic-gate ulwp_t *self = curthread;
1844570Sraf int rval;
1854570Sraf
1864570Sraf no_preempt(self);
1870Sstevel@tonic-gate
1884570Sraf readers = *rwstate;
1894570Sraf ASSERT_CONSISTENT_STATE(readers);
1904570Sraf rval = ((readers & URW_WRITE_LOCKED) &&
1914570Sraf rwlp->rwlock_owner == (uintptr_t)self &&
1924570Sraf (rwlp->rwlock_type == USYNC_THREAD ||
1934570Sraf rwlp->rwlock_ownerpid == self->ul_uberdata->pid));
1940Sstevel@tonic-gate
1954570Sraf preempt(self);
1964570Sraf return (rval);
1970Sstevel@tonic-gate }
1980Sstevel@tonic-gate
1996812Sraf #pragma weak _rwlock_init = rwlock_init
2000Sstevel@tonic-gate /* ARGSUSED2 */
2010Sstevel@tonic-gate int
rwlock_init(rwlock_t * rwlp,int type,void * arg)2026812Sraf rwlock_init(rwlock_t *rwlp, int type, void *arg)
2030Sstevel@tonic-gate {
2047255Sraf ulwp_t *self = curthread;
2057255Sraf
2060Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS)
2070Sstevel@tonic-gate return (EINVAL);
2080Sstevel@tonic-gate /*
2090Sstevel@tonic-gate * Once reinitialized, we can no longer be holding a read or write lock.
2100Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks.
2110Sstevel@tonic-gate */
2127255Sraf sigoff(self);
2134570Sraf rwl_entry(rwlp)->rd_count = 0;
2147255Sraf sigon(self);
2156515Sraf (void) memset(rwlp, 0, sizeof (*rwlp));
2160Sstevel@tonic-gate rwlp->rwlock_type = (uint16_t)type;
2170Sstevel@tonic-gate rwlp->rwlock_magic = RWL_MAGIC;
2180Sstevel@tonic-gate rwlp->mutex.mutex_type = (uint8_t)type;
2190Sstevel@tonic-gate rwlp->mutex.mutex_flag = LOCK_INITED;
2200Sstevel@tonic-gate rwlp->mutex.mutex_magic = MUTEX_MAGIC;
2217255Sraf
2227255Sraf /*
2237255Sraf * This should be at the beginning of the function,
2247255Sraf * but for the sake of old broken applications that
2257255Sraf * do not have proper alignment for their rwlocks
2267255Sraf * (and don't check the return code from rwlock_init),
2277255Sraf * we put it here, after initializing the rwlock regardless.
2287255Sraf */
2297255Sraf if (((uintptr_t)rwlp & (_LONG_LONG_ALIGNMENT - 1)) &&
2307255Sraf self->ul_misaligned == 0)
2317255Sraf return (EINVAL);
2327255Sraf
2330Sstevel@tonic-gate return (0);
2340Sstevel@tonic-gate }
2350Sstevel@tonic-gate
2366812Sraf #pragma weak pthread_rwlock_destroy = rwlock_destroy
2376812Sraf #pragma weak _rwlock_destroy = rwlock_destroy
2380Sstevel@tonic-gate int
rwlock_destroy(rwlock_t * rwlp)2396812Sraf rwlock_destroy(rwlock_t *rwlp)
2400Sstevel@tonic-gate {
24110637SRoger.Faulkner@Sun.COM ulwp_t *self = curthread;
24210637SRoger.Faulkner@Sun.COM
2430Sstevel@tonic-gate /*
2440Sstevel@tonic-gate * Once destroyed, we can no longer be holding a read or write lock.
2450Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks.
2460Sstevel@tonic-gate */
24710637SRoger.Faulkner@Sun.COM sigoff(self);
2484570Sraf rwl_entry(rwlp)->rd_count = 0;
24910637SRoger.Faulkner@Sun.COM sigon(self);
2500Sstevel@tonic-gate rwlp->rwlock_magic = 0;
2510Sstevel@tonic-gate tdb_sync_obj_deregister(rwlp);
2520Sstevel@tonic-gate return (0);
2530Sstevel@tonic-gate }
2540Sstevel@tonic-gate
2550Sstevel@tonic-gate /*
256*12677SRoger.Faulkner@Oracle.COM * The following four functions:
257*12677SRoger.Faulkner@Oracle.COM * read_lock_try()
258*12677SRoger.Faulkner@Oracle.COM * read_unlock_try()
259*12677SRoger.Faulkner@Oracle.COM * write_lock_try()
260*12677SRoger.Faulkner@Oracle.COM * write_unlock_try()
261*12677SRoger.Faulkner@Oracle.COM * lie at the heart of the fast-path code for rwlocks,
262*12677SRoger.Faulkner@Oracle.COM * both process-private and process-shared.
263*12677SRoger.Faulkner@Oracle.COM *
264*12677SRoger.Faulkner@Oracle.COM * They are called once without recourse to any other locking primitives.
265*12677SRoger.Faulkner@Oracle.COM * If they succeed, we are done and the fast-path code was successful.
266*12677SRoger.Faulkner@Oracle.COM * If they fail, we have to deal with lock queues, either to enqueue
267*12677SRoger.Faulkner@Oracle.COM * ourself and sleep or to dequeue and wake up someone else (slow paths).
268*12677SRoger.Faulkner@Oracle.COM *
269*12677SRoger.Faulkner@Oracle.COM * Unless 'ignore_waiters_flag' is true (a condition that applies only
270*12677SRoger.Faulkner@Oracle.COM * when read_lock_try() or write_lock_try() is called from code that
271*12677SRoger.Faulkner@Oracle.COM * is already in the slow path and has already acquired the queue lock),
272*12677SRoger.Faulkner@Oracle.COM * these functions will always fail if the waiters flag, URW_HAS_WAITERS,
273*12677SRoger.Faulkner@Oracle.COM * is set in the 'rwstate' word. Thus, setting the waiters flag on the
274*12677SRoger.Faulkner@Oracle.COM * rwlock and acquiring the queue lock guarantees exclusive access to
275*12677SRoger.Faulkner@Oracle.COM * the rwlock (and is the only way to guarantee exclusive access).
276*12677SRoger.Faulkner@Oracle.COM */
277*12677SRoger.Faulkner@Oracle.COM
278*12677SRoger.Faulkner@Oracle.COM /*
2794570Sraf * Attempt to acquire a readers lock. Return true on success.
2804570Sraf */
2814570Sraf static int
read_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)2824570Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
2834570Sraf {
2844570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
2854570Sraf uint32_t mask = ignore_waiters_flag?
2866247Sraf URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED);
2874570Sraf uint32_t readers;
2884570Sraf ulwp_t *self = curthread;
2894570Sraf
2904570Sraf no_preempt(self);
2914570Sraf while (((readers = *rwstate) & mask) == 0) {
2924570Sraf if (atomic_cas_32(rwstate, readers, readers + 1) == readers) {
2934570Sraf preempt(self);
2944570Sraf return (1);
2954570Sraf }
2964570Sraf }
2974570Sraf preempt(self);
2984570Sraf return (0);
2994570Sraf }
3004570Sraf
3014570Sraf /*
3024570Sraf * Attempt to release a reader lock. Return true on success.
3034570Sraf */
3044570Sraf static int
read_unlock_try(rwlock_t * rwlp)3054570Sraf read_unlock_try(rwlock_t *rwlp)
3064570Sraf {
3074570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3084570Sraf uint32_t readers;
3094570Sraf ulwp_t *self = curthread;
3104570Sraf
3114570Sraf no_preempt(self);
3124570Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
3134570Sraf if (atomic_cas_32(rwstate, readers, readers - 1) == readers) {
3144570Sraf preempt(self);
3154570Sraf return (1);
3164570Sraf }
3174570Sraf }
3184570Sraf preempt(self);
3194570Sraf return (0);
3204570Sraf }
3214570Sraf
3224570Sraf /*
3234570Sraf * Attempt to acquire a writer lock. Return true on success.
3244570Sraf */
3254570Sraf static int
write_lock_try(rwlock_t * rwlp,int ignore_waiters_flag)3264570Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag)
3274570Sraf {
3284570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3294570Sraf uint32_t mask = ignore_waiters_flag?
3306247Sraf (URW_WRITE_LOCKED | URW_READERS_MASK) :
3316247Sraf (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK);
3324570Sraf ulwp_t *self = curthread;
3334570Sraf uint32_t readers;
3344570Sraf
3354570Sraf no_preempt(self);
3364570Sraf while (((readers = *rwstate) & mask) == 0) {
3374570Sraf if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED)
3384570Sraf == readers) {
3394570Sraf preempt(self);
3404570Sraf return (1);
3414570Sraf }
3424570Sraf }
3434570Sraf preempt(self);
3444570Sraf return (0);
3454570Sraf }
3464570Sraf
3474570Sraf /*
3484570Sraf * Attempt to release a writer lock. Return true on success.
3494570Sraf */
3504570Sraf static int
write_unlock_try(rwlock_t * rwlp)3514570Sraf write_unlock_try(rwlock_t *rwlp)
3524570Sraf {
3534570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
3544570Sraf uint32_t readers;
3554570Sraf ulwp_t *self = curthread;
3564570Sraf
3574570Sraf no_preempt(self);
3584570Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) {
3594570Sraf if (atomic_cas_32(rwstate, readers, 0) == readers) {
3604570Sraf preempt(self);
3614570Sraf return (1);
3624570Sraf }
3634570Sraf }
3644570Sraf preempt(self);
3654570Sraf return (0);
3664570Sraf }
3674570Sraf
3684570Sraf /*
369*12677SRoger.Faulkner@Oracle.COM * Release a process-private rwlock and wake up any thread(s) sleeping on it.
3704570Sraf * This is called when a thread releases a lock that appears to have waiters.
3710Sstevel@tonic-gate */
372*12677SRoger.Faulkner@Oracle.COM static void
rw_queue_release(rwlock_t * rwlp)373*12677SRoger.Faulkner@Oracle.COM rw_queue_release(rwlock_t *rwlp)
3740Sstevel@tonic-gate {
3754570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
376*12677SRoger.Faulkner@Oracle.COM queue_head_t *qp;
3774570Sraf uint32_t readers;
378*12677SRoger.Faulkner@Oracle.COM uint32_t writer;
3794570Sraf ulwp_t **ulwpp;
3800Sstevel@tonic-gate ulwp_t *ulwp;
3816247Sraf ulwp_t *prev;
3826247Sraf int nlwpid = 0;
3836247Sraf int more;
3846247Sraf int maxlwps = MAXLWPS;
3854570Sraf lwpid_t buffer[MAXLWPS];
3864570Sraf lwpid_t *lwpid = buffer;
3874570Sraf
388*12677SRoger.Faulkner@Oracle.COM qp = queue_lock(rwlp, MX);
389*12677SRoger.Faulkner@Oracle.COM
390*12677SRoger.Faulkner@Oracle.COM /*
391*12677SRoger.Faulkner@Oracle.COM * Here is where we actually drop the lock,
392*12677SRoger.Faulkner@Oracle.COM * but we retain the URW_HAS_WAITERS flag, if it is already set.
393*12677SRoger.Faulkner@Oracle.COM */
3944570Sraf readers = *rwstate;
3954570Sraf ASSERT_CONSISTENT_STATE(readers);
396*12677SRoger.Faulkner@Oracle.COM if (readers & URW_WRITE_LOCKED) /* drop the writer lock */
397*12677SRoger.Faulkner@Oracle.COM atomic_and_32(rwstate, ~URW_WRITE_LOCKED);
398*12677SRoger.Faulkner@Oracle.COM else /* drop the readers lock */
399*12677SRoger.Faulkner@Oracle.COM atomic_dec_32(rwstate);
400*12677SRoger.Faulkner@Oracle.COM if (!(readers & URW_HAS_WAITERS)) { /* no waiters */
4014570Sraf queue_unlock(qp);
402*12677SRoger.Faulkner@Oracle.COM return;
4034570Sraf }
404*12677SRoger.Faulkner@Oracle.COM
405*12677SRoger.Faulkner@Oracle.COM /*
406*12677SRoger.Faulkner@Oracle.COM * The presence of the URW_HAS_WAITERS flag causes all rwlock
407*12677SRoger.Faulkner@Oracle.COM * code to go through the slow path, acquiring queue_lock(qp).
408*12677SRoger.Faulkner@Oracle.COM * Therefore, the rest of this code is safe because we are
409*12677SRoger.Faulkner@Oracle.COM * holding the queue lock and the URW_HAS_WAITERS flag is set.
410*12677SRoger.Faulkner@Oracle.COM */
411*12677SRoger.Faulkner@Oracle.COM
412*12677SRoger.Faulkner@Oracle.COM readers = *rwstate; /* must fetch the value again */
413*12677SRoger.Faulkner@Oracle.COM ASSERT_CONSISTENT_STATE(readers);
414*12677SRoger.Faulkner@Oracle.COM ASSERT(readers & URW_HAS_WAITERS);
415*12677SRoger.Faulkner@Oracle.COM readers &= URW_READERS_MASK; /* count of current readers */
416*12677SRoger.Faulkner@Oracle.COM writer = 0; /* no current writer */
4170Sstevel@tonic-gate
4184570Sraf /*
4196247Sraf * Examine the queue of waiters in priority order and prepare
4206247Sraf * to wake up as many readers as we encounter before encountering
4216247Sraf * a writer. If the highest priority thread on the queue is a
4224570Sraf * writer, stop there and wake it up.
4234570Sraf *
4244570Sraf * We keep track of lwpids that are to be unparked in lwpid[].
4254570Sraf * __lwp_unpark_all() is called to unpark all of them after
4264570Sraf * they have been removed from the sleep queue and the sleep
4274570Sraf * queue lock has been dropped. If we run out of space in our
4284570Sraf * on-stack buffer, we need to allocate more but we can't call
4294570Sraf * lmalloc() because we are holding a queue lock when the overflow
4304570Sraf * occurs and lmalloc() acquires a lock. We can't use alloca()
4314570Sraf * either because the application may have allocated a small
4324570Sraf * stack and we don't want to overrun the stack. So we call
4334570Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap()
4344570Sraf * system call directly since that path acquires no locks.
4354570Sraf */
4366247Sraf while ((ulwpp = queue_slot(qp, &prev, &more)) != NULL) {
4376247Sraf ulwp = *ulwpp;
4386247Sraf ASSERT(ulwp->ul_wchan == rwlp);
4394570Sraf if (ulwp->ul_writer) {
440*12677SRoger.Faulkner@Oracle.COM if (writer != 0 || readers != 0)
4414570Sraf break;
4424570Sraf /* one writer to wake */
443*12677SRoger.Faulkner@Oracle.COM writer++;
4444570Sraf } else {
445*12677SRoger.Faulkner@Oracle.COM if (writer != 0)
4464570Sraf break;
4474570Sraf /* at least one reader to wake */
4484570Sraf readers++;
4494570Sraf if (nlwpid == maxlwps)
4504570Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps);
4514570Sraf }
4526247Sraf queue_unlink(qp, ulwpp, prev);
4536247Sraf ulwp->ul_sleepq = NULL;
4546247Sraf ulwp->ul_wchan = NULL;
455*12677SRoger.Faulkner@Oracle.COM if (writer) {
456*12677SRoger.Faulkner@Oracle.COM /*
457*12677SRoger.Faulkner@Oracle.COM * Hand off the lock to the writer we will be waking.
458*12677SRoger.Faulkner@Oracle.COM */
459*12677SRoger.Faulkner@Oracle.COM ASSERT((*rwstate & ~URW_HAS_WAITERS) == 0);
460*12677SRoger.Faulkner@Oracle.COM atomic_or_32(rwstate, URW_WRITE_LOCKED);
461*12677SRoger.Faulkner@Oracle.COM rwlp->rwlock_owner = (uintptr_t)ulwp;
462*12677SRoger.Faulkner@Oracle.COM }
4634570Sraf lwpid[nlwpid++] = ulwp->ul_lwpid;
4640Sstevel@tonic-gate }
465*12677SRoger.Faulkner@Oracle.COM
466*12677SRoger.Faulkner@Oracle.COM /*
467*12677SRoger.Faulkner@Oracle.COM * This modification of rwstate must be done last.
468*12677SRoger.Faulkner@Oracle.COM * The presence of the URW_HAS_WAITERS flag causes all rwlock
469*12677SRoger.Faulkner@Oracle.COM * code to go through the slow path, acquiring queue_lock(qp).
470*12677SRoger.Faulkner@Oracle.COM * Otherwise the read_lock_try() and write_lock_try() fast paths
471*12677SRoger.Faulkner@Oracle.COM * are effective.
472*12677SRoger.Faulkner@Oracle.COM */
4736247Sraf if (ulwpp == NULL)
4744570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS);
475*12677SRoger.Faulkner@Oracle.COM
4764570Sraf if (nlwpid == 0) {
4774570Sraf queue_unlock(qp);
4784570Sraf } else {
4796247Sraf ulwp_t *self = curthread;
4804570Sraf no_preempt(self);
4814570Sraf queue_unlock(qp);
4824570Sraf if (nlwpid == 1)
4834570Sraf (void) __lwp_unpark(lwpid[0]);
4844570Sraf else
4854570Sraf (void) __lwp_unpark_all(lwpid, nlwpid);
4864570Sraf preempt(self);
4874570Sraf }
4884570Sraf if (lwpid != buffer)
4896515Sraf (void) munmap((caddr_t)lwpid, maxlwps * sizeof (lwpid_t));
4900Sstevel@tonic-gate }
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate /*
4930Sstevel@tonic-gate * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
4940Sstevel@tonic-gate * and trywrlock for process-shared (USYNC_PROCESS) rwlocks.
4950Sstevel@tonic-gate *
4960Sstevel@tonic-gate * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock()
4970Sstevel@tonic-gate * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex
4980Sstevel@tonic-gate * released, and if they need to sleep will release the mutex first. In the
4990Sstevel@tonic-gate * event of a spurious wakeup, these will return EAGAIN (because it is much
5000Sstevel@tonic-gate * easier for us to re-acquire the mutex here).
5010Sstevel@tonic-gate */
5020Sstevel@tonic-gate int
shared_rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)5030Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5040Sstevel@tonic-gate {
5054570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
5064570Sraf mutex_t *mp = &rwlp->mutex;
5074570Sraf uint32_t readers;
5084570Sraf int try_flag;
5094570Sraf int error;
5104570Sraf
5114570Sraf try_flag = (rd_wr & TRY_FLAG);
5124570Sraf rd_wr &= ~TRY_FLAG;
5134570Sraf ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5144570Sraf
5154570Sraf if (!try_flag) {
5164570Sraf DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5174570Sraf }
5184570Sraf
5194570Sraf do {
5204570Sraf if (try_flag && (*rwstate & URW_WRITE_LOCKED)) {
5214570Sraf error = EBUSY;
5224570Sraf break;
5234570Sraf }
5246515Sraf if ((error = mutex_lock(mp)) != 0)
5254570Sraf break;
5264570Sraf if (rd_wr == READ_LOCK) {
5274570Sraf if (read_lock_try(rwlp, 0)) {
5286515Sraf (void) mutex_unlock(mp);
5294570Sraf break;
5304570Sraf }
5314570Sraf } else {
5324570Sraf if (write_lock_try(rwlp, 0)) {
5336515Sraf (void) mutex_unlock(mp);
5344570Sraf break;
5354570Sraf }
5364570Sraf }
5374570Sraf atomic_or_32(rwstate, URW_HAS_WAITERS);
5384570Sraf readers = *rwstate;
5394570Sraf ASSERT_CONSISTENT_STATE(readers);
5404570Sraf /*
5414570Sraf * The calls to __lwp_rwlock_*() below will release the mutex,
5427907SRoger.Faulkner@Sun.COM * so we need a dtrace probe here. The owner field of the
5437907SRoger.Faulkner@Sun.COM * mutex is cleared in the kernel when the mutex is released,
5447907SRoger.Faulkner@Sun.COM * so we should not clear it here.
5454570Sraf */
5464570Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0);
5474570Sraf /*
5484570Sraf * The waiters bit may be inaccurate.
5494570Sraf * Only the kernel knows for sure.
5504570Sraf */
5514570Sraf if (rd_wr == READ_LOCK) {
5524570Sraf if (try_flag)
5534570Sraf error = __lwp_rwlock_tryrdlock(rwlp);
5544570Sraf else
5554570Sraf error = __lwp_rwlock_rdlock(rwlp, tsp);
5564570Sraf } else {
5574570Sraf if (try_flag)
5584570Sraf error = __lwp_rwlock_trywrlock(rwlp);
5594570Sraf else
5604570Sraf error = __lwp_rwlock_wrlock(rwlp, tsp);
5614570Sraf }
5624570Sraf } while (error == EAGAIN || error == EINTR);
5634570Sraf
5644570Sraf if (!try_flag) {
5654570Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
5664570Sraf }
5674570Sraf
5684570Sraf return (error);
5694570Sraf }
5704570Sraf
5714570Sraf /*
5724570Sraf * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock,
5734570Sraf * and trywrlock for process-private (USYNC_THREAD) rwlocks.
5744570Sraf */
5754570Sraf int
rwlock_lock(rwlock_t * rwlp,timespec_t * tsp,int rd_wr)5764570Sraf rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr)
5774570Sraf {
5784570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
5794570Sraf uint32_t readers;
5800Sstevel@tonic-gate ulwp_t *self = curthread;
5814570Sraf queue_head_t *qp;
5824570Sraf ulwp_t *ulwp;
5830Sstevel@tonic-gate int try_flag;
5846247Sraf int ignore_waiters_flag;
5850Sstevel@tonic-gate int error = 0;
5860Sstevel@tonic-gate
5870Sstevel@tonic-gate try_flag = (rd_wr & TRY_FLAG);
5880Sstevel@tonic-gate rd_wr &= ~TRY_FLAG;
5890Sstevel@tonic-gate ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK);
5900Sstevel@tonic-gate
5910Sstevel@tonic-gate if (!try_flag) {
5920Sstevel@tonic-gate DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr);
5930Sstevel@tonic-gate }
5940Sstevel@tonic-gate
5954570Sraf qp = queue_lock(rwlp, MX);
5966247Sraf /* initial attempt to acquire the lock fails if there are waiters */
5976247Sraf ignore_waiters_flag = 0;
5984570Sraf while (error == 0) {
5990Sstevel@tonic-gate if (rd_wr == READ_LOCK) {
6006247Sraf if (read_lock_try(rwlp, ignore_waiters_flag))
6016247Sraf break;
6020Sstevel@tonic-gate } else {
6036247Sraf if (write_lock_try(rwlp, ignore_waiters_flag))
6046247Sraf break;
6050Sstevel@tonic-gate }
6066247Sraf /* subsequent attempts do not fail due to waiters */
6076247Sraf ignore_waiters_flag = 1;
6084570Sraf atomic_or_32(rwstate, URW_HAS_WAITERS);
6094570Sraf readers = *rwstate;
6104570Sraf ASSERT_CONSISTENT_STATE(readers);
6114570Sraf if ((readers & URW_WRITE_LOCKED) ||
6124570Sraf (rd_wr == WRITE_LOCK &&
6134570Sraf (readers & URW_READERS_MASK) != 0))
6140Sstevel@tonic-gate /* EMPTY */; /* somebody holds the lock */
6156247Sraf else if ((ulwp = queue_waiter(qp)) == NULL) {
6164570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS);
617*12677SRoger.Faulkner@Oracle.COM ignore_waiters_flag = 0;
618*12677SRoger.Faulkner@Oracle.COM continue; /* no queued waiters, start over */
6190Sstevel@tonic-gate } else {
6206247Sraf /*
6216247Sraf * Do a priority check on the queued waiter (the
6226247Sraf * highest priority thread on the queue) to see
6236247Sraf * if we should defer to him or just grab the lock.
6246247Sraf */
6250Sstevel@tonic-gate int our_pri = real_priority(self);
6260Sstevel@tonic-gate int his_pri = real_priority(ulwp);
6270Sstevel@tonic-gate
6280Sstevel@tonic-gate if (rd_wr == WRITE_LOCK) {
6290Sstevel@tonic-gate /*
6300Sstevel@tonic-gate * We defer to a queued thread that has
6310Sstevel@tonic-gate * a higher priority than ours.
6320Sstevel@tonic-gate */
633*12677SRoger.Faulkner@Oracle.COM if (his_pri <= our_pri) {
634*12677SRoger.Faulkner@Oracle.COM /*
635*12677SRoger.Faulkner@Oracle.COM * Don't defer, just grab the lock.
636*12677SRoger.Faulkner@Oracle.COM */
637*12677SRoger.Faulkner@Oracle.COM continue;
638*12677SRoger.Faulkner@Oracle.COM }
6390Sstevel@tonic-gate } else {
6400Sstevel@tonic-gate /*
6410Sstevel@tonic-gate * We defer to a queued thread that has
6420Sstevel@tonic-gate * a higher priority than ours or that
6430Sstevel@tonic-gate * is a writer whose priority equals ours.
6440Sstevel@tonic-gate */
6450Sstevel@tonic-gate if (his_pri < our_pri ||
646*12677SRoger.Faulkner@Oracle.COM (his_pri == our_pri && !ulwp->ul_writer)) {
647*12677SRoger.Faulkner@Oracle.COM /*
648*12677SRoger.Faulkner@Oracle.COM * Don't defer, just grab the lock.
649*12677SRoger.Faulkner@Oracle.COM */
650*12677SRoger.Faulkner@Oracle.COM continue;
651*12677SRoger.Faulkner@Oracle.COM }
6520Sstevel@tonic-gate }
6530Sstevel@tonic-gate }
6540Sstevel@tonic-gate /*
6550Sstevel@tonic-gate * We are about to block.
6560Sstevel@tonic-gate * If we're doing a trylock, return EBUSY instead.
6570Sstevel@tonic-gate */
6580Sstevel@tonic-gate if (try_flag) {
6590Sstevel@tonic-gate error = EBUSY;
6600Sstevel@tonic-gate break;
6610Sstevel@tonic-gate }
6620Sstevel@tonic-gate /*
6636247Sraf * Enqueue writers ahead of readers.
6640Sstevel@tonic-gate */
6650Sstevel@tonic-gate self->ul_writer = rd_wr; /* *must* be 0 or 1 */
6666247Sraf enqueue(qp, self, 0);
6670Sstevel@tonic-gate set_parking_flag(self, 1);
6680Sstevel@tonic-gate queue_unlock(qp);
6690Sstevel@tonic-gate if ((error = __lwp_park(tsp, 0)) == EINTR)
670*12677SRoger.Faulkner@Oracle.COM error = 0;
6710Sstevel@tonic-gate set_parking_flag(self, 0);
6720Sstevel@tonic-gate qp = queue_lock(rwlp, MX);
673*12677SRoger.Faulkner@Oracle.COM if (self->ul_sleepq && dequeue_self(qp) == 0) {
6744570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS);
675*12677SRoger.Faulkner@Oracle.COM ignore_waiters_flag = 0;
676*12677SRoger.Faulkner@Oracle.COM }
6776247Sraf self->ul_writer = 0;
678*12677SRoger.Faulkner@Oracle.COM if (rd_wr == WRITE_LOCK &&
679*12677SRoger.Faulkner@Oracle.COM (*rwstate & URW_WRITE_LOCKED) &&
680*12677SRoger.Faulkner@Oracle.COM rwlp->rwlock_owner == (uintptr_t)self) {
681*12677SRoger.Faulkner@Oracle.COM /*
682*12677SRoger.Faulkner@Oracle.COM * We acquired the lock by hand-off
683*12677SRoger.Faulkner@Oracle.COM * from the previous owner,
684*12677SRoger.Faulkner@Oracle.COM */
685*12677SRoger.Faulkner@Oracle.COM error = 0; /* timedlock did not fail */
686*12677SRoger.Faulkner@Oracle.COM break;
687*12677SRoger.Faulkner@Oracle.COM }
6880Sstevel@tonic-gate }
6890Sstevel@tonic-gate
690*12677SRoger.Faulkner@Oracle.COM /*
691*12677SRoger.Faulkner@Oracle.COM * Make one final check to see if there are any threads left
692*12677SRoger.Faulkner@Oracle.COM * on the rwlock queue. Clear the URW_HAS_WAITERS flag if not.
693*12677SRoger.Faulkner@Oracle.COM */
694*12677SRoger.Faulkner@Oracle.COM if (qp->qh_root == NULL || qp->qh_root->qr_head == NULL)
695*12677SRoger.Faulkner@Oracle.COM atomic_and_32(rwstate, ~URW_HAS_WAITERS);
696*12677SRoger.Faulkner@Oracle.COM
6974570Sraf queue_unlock(qp);
6984570Sraf
6994570Sraf if (!try_flag) {
7004570Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0);
7014570Sraf }
7020Sstevel@tonic-gate
7030Sstevel@tonic-gate return (error);
7040Sstevel@tonic-gate }
7050Sstevel@tonic-gate
7060Sstevel@tonic-gate int
rw_rdlock_impl(rwlock_t * rwlp,timespec_t * tsp)7070Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp)
7080Sstevel@tonic-gate {
7090Sstevel@tonic-gate ulwp_t *self = curthread;
7100Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata;
7110Sstevel@tonic-gate readlock_t *readlockp;
7120Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
7130Sstevel@tonic-gate int error;
7140Sstevel@tonic-gate
7150Sstevel@tonic-gate /*
7160Sstevel@tonic-gate * If we already hold a readers lock on this rwlock,
7170Sstevel@tonic-gate * just increment our reference count and return.
7180Sstevel@tonic-gate */
7194570Sraf sigoff(self);
7200Sstevel@tonic-gate readlockp = rwl_entry(rwlp);
7210Sstevel@tonic-gate if (readlockp->rd_count != 0) {
7224570Sraf if (readlockp->rd_count == READ_LOCK_MAX) {
7234570Sraf sigon(self);
7244570Sraf error = EAGAIN;
7254570Sraf goto out;
7264570Sraf }
7274570Sraf sigon(self);
7284570Sraf error = 0;
7294570Sraf goto out;
7300Sstevel@tonic-gate }
7314570Sraf sigon(self);
7320Sstevel@tonic-gate
7330Sstevel@tonic-gate /*
7340Sstevel@tonic-gate * If we hold the writer lock, bail out.
7350Sstevel@tonic-gate */
7366812Sraf if (rw_write_held(rwlp)) {
7370Sstevel@tonic-gate if (self->ul_error_detection)
7380Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_rdlock",
7390Sstevel@tonic-gate "calling thread owns the writer lock");
7404570Sraf error = EDEADLK;
7414570Sraf goto out;
7420Sstevel@tonic-gate }
7430Sstevel@tonic-gate
7444570Sraf if (read_lock_try(rwlp, 0))
7454570Sraf error = 0;
7464570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
7470Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, READ_LOCK);
7480Sstevel@tonic-gate else /* user-level */
7490Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, READ_LOCK);
7500Sstevel@tonic-gate
7514570Sraf out:
7520Sstevel@tonic-gate if (error == 0) {
7534570Sraf sigoff(self);
7544570Sraf rwl_entry(rwlp)->rd_count++;
7554570Sraf sigon(self);
7560Sstevel@tonic-gate if (rwsp)
7570Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock);
7584570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
7594570Sraf } else {
7604570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error);
7610Sstevel@tonic-gate }
7620Sstevel@tonic-gate
7630Sstevel@tonic-gate return (error);
7640Sstevel@tonic-gate }
7650Sstevel@tonic-gate
7666812Sraf #pragma weak pthread_rwlock_rdlock = rw_rdlock
7676812Sraf #pragma weak _rw_rdlock = rw_rdlock
7680Sstevel@tonic-gate int
rw_rdlock(rwlock_t * rwlp)7696812Sraf rw_rdlock(rwlock_t *rwlp)
7700Sstevel@tonic-gate {
7710Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7720Sstevel@tonic-gate return (rw_rdlock_impl(rwlp, NULL));
7730Sstevel@tonic-gate }
7740Sstevel@tonic-gate
7750Sstevel@tonic-gate void
lrw_rdlock(rwlock_t * rwlp)7760Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp)
7770Sstevel@tonic-gate {
7780Sstevel@tonic-gate enter_critical(curthread);
7790Sstevel@tonic-gate (void) rw_rdlock_impl(rwlp, NULL);
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate
7820Sstevel@tonic-gate int
pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)7836812Sraf pthread_rwlock_reltimedrdlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7846812Sraf const struct timespec *_RESTRICT_KYWD reltime)
7850Sstevel@tonic-gate {
7860Sstevel@tonic-gate timespec_t tslocal = *reltime;
7870Sstevel@tonic-gate int error;
7880Sstevel@tonic-gate
7890Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
7906812Sraf error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
7910Sstevel@tonic-gate if (error == ETIME)
7920Sstevel@tonic-gate error = ETIMEDOUT;
7930Sstevel@tonic-gate return (error);
7940Sstevel@tonic-gate }
7950Sstevel@tonic-gate
7960Sstevel@tonic-gate int
pthread_rwlock_timedrdlock(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD abstime)7976812Sraf pthread_rwlock_timedrdlock(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
7986812Sraf const struct timespec *_RESTRICT_KYWD abstime)
7990Sstevel@tonic-gate {
8000Sstevel@tonic-gate timespec_t tslocal;
8010Sstevel@tonic-gate int error;
8020Sstevel@tonic-gate
8030Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8040Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
8056812Sraf error = rw_rdlock_impl((rwlock_t *)rwlp, &tslocal);
8060Sstevel@tonic-gate if (error == ETIME)
8070Sstevel@tonic-gate error = ETIMEDOUT;
8080Sstevel@tonic-gate return (error);
8090Sstevel@tonic-gate }
8100Sstevel@tonic-gate
8110Sstevel@tonic-gate int
rw_wrlock_impl(rwlock_t * rwlp,timespec_t * tsp)8120Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp)
8130Sstevel@tonic-gate {
8140Sstevel@tonic-gate ulwp_t *self = curthread;
8150Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata;
8160Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
8170Sstevel@tonic-gate int error;
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate /*
8200Sstevel@tonic-gate * If we hold a readers lock on this rwlock, bail out.
8210Sstevel@tonic-gate */
8226812Sraf if (rw_read_held(rwlp)) {
8230Sstevel@tonic-gate if (self->ul_error_detection)
8240Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock",
8250Sstevel@tonic-gate "calling thread owns the readers lock");
8264570Sraf error = EDEADLK;
8274570Sraf goto out;
8280Sstevel@tonic-gate }
8290Sstevel@tonic-gate
8300Sstevel@tonic-gate /*
8310Sstevel@tonic-gate * If we hold the writer lock, bail out.
8320Sstevel@tonic-gate */
8336812Sraf if (rw_write_held(rwlp)) {
8340Sstevel@tonic-gate if (self->ul_error_detection)
8350Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock",
8360Sstevel@tonic-gate "calling thread owns the writer lock");
8374570Sraf error = EDEADLK;
8384570Sraf goto out;
8390Sstevel@tonic-gate }
8400Sstevel@tonic-gate
8414570Sraf if (write_lock_try(rwlp, 0))
8424570Sraf error = 0;
8434570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
8440Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK);
8454570Sraf else /* user-level */
8460Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, WRITE_LOCK);
8470Sstevel@tonic-gate
8484570Sraf out:
8494570Sraf if (error == 0) {
8504570Sraf rwlp->rwlock_owner = (uintptr_t)self;
8514570Sraf if (rwlp->rwlock_type == USYNC_PROCESS)
8524570Sraf rwlp->rwlock_ownerpid = udp->pid;
8534570Sraf if (rwsp) {
8544570Sraf tdb_incr(rwsp->rw_wrlock);
8554570Sraf rwsp->rw_wrlock_begin_hold = gethrtime();
8564570Sraf }
8574570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
8584570Sraf } else {
8594570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error);
8600Sstevel@tonic-gate }
8610Sstevel@tonic-gate return (error);
8620Sstevel@tonic-gate }
8630Sstevel@tonic-gate
8646812Sraf #pragma weak pthread_rwlock_wrlock = rw_wrlock
8656812Sraf #pragma weak _rw_wrlock = rw_wrlock
8660Sstevel@tonic-gate int
rw_wrlock(rwlock_t * rwlp)8676812Sraf rw_wrlock(rwlock_t *rwlp)
8680Sstevel@tonic-gate {
8690Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8700Sstevel@tonic-gate return (rw_wrlock_impl(rwlp, NULL));
8710Sstevel@tonic-gate }
8720Sstevel@tonic-gate
8730Sstevel@tonic-gate void
lrw_wrlock(rwlock_t * rwlp)8740Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp)
8750Sstevel@tonic-gate {
8760Sstevel@tonic-gate enter_critical(curthread);
8770Sstevel@tonic-gate (void) rw_wrlock_impl(rwlp, NULL);
8780Sstevel@tonic-gate }
8790Sstevel@tonic-gate
8800Sstevel@tonic-gate int
pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t * _RESTRICT_KYWD rwlp,const struct timespec * _RESTRICT_KYWD reltime)8816812Sraf pthread_rwlock_reltimedwrlock_np(pthread_rwlock_t *_RESTRICT_KYWD rwlp,
8826812Sraf const struct timespec *_RESTRICT_KYWD reltime)
8830Sstevel@tonic-gate {
8840Sstevel@tonic-gate timespec_t tslocal = *reltime;
8850Sstevel@tonic-gate int error;
8860Sstevel@tonic-gate
8870Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
8886812Sraf error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
8890Sstevel@tonic-gate if (error == ETIME)
8900Sstevel@tonic-gate error = ETIMEDOUT;
8910Sstevel@tonic-gate return (error);
8920Sstevel@tonic-gate }
8930Sstevel@tonic-gate
8940Sstevel@tonic-gate int
pthread_rwlock_timedwrlock(pthread_rwlock_t * rwlp,const timespec_t * abstime)8956812Sraf pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlp, const timespec_t *abstime)
8960Sstevel@tonic-gate {
8970Sstevel@tonic-gate timespec_t tslocal;
8980Sstevel@tonic-gate int error;
8990Sstevel@tonic-gate
9000Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
9010Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal);
9026812Sraf error = rw_wrlock_impl((rwlock_t *)rwlp, &tslocal);
9030Sstevel@tonic-gate if (error == ETIME)
9040Sstevel@tonic-gate error = ETIMEDOUT;
9050Sstevel@tonic-gate return (error);
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate
9086812Sraf #pragma weak pthread_rwlock_tryrdlock = rw_tryrdlock
9090Sstevel@tonic-gate int
rw_tryrdlock(rwlock_t * rwlp)9106812Sraf rw_tryrdlock(rwlock_t *rwlp)
9110Sstevel@tonic-gate {
9120Sstevel@tonic-gate ulwp_t *self = curthread;
9130Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata;
9140Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
9150Sstevel@tonic-gate readlock_t *readlockp;
9160Sstevel@tonic-gate int error;
9170Sstevel@tonic-gate
9180Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags);
9190Sstevel@tonic-gate
9200Sstevel@tonic-gate if (rwsp)
9210Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock_try);
9220Sstevel@tonic-gate
9230Sstevel@tonic-gate /*
9240Sstevel@tonic-gate * If we already hold a readers lock on this rwlock,
9250Sstevel@tonic-gate * just increment our reference count and return.
9260Sstevel@tonic-gate */
9274570Sraf sigoff(self);
9280Sstevel@tonic-gate readlockp = rwl_entry(rwlp);
9290Sstevel@tonic-gate if (readlockp->rd_count != 0) {
9304570Sraf if (readlockp->rd_count == READ_LOCK_MAX) {
9314570Sraf sigon(self);
9324570Sraf error = EAGAIN;
9334570Sraf goto out;
9344570Sraf }
9354570Sraf sigon(self);
9364570Sraf error = 0;
9374570Sraf goto out;
9380Sstevel@tonic-gate }
9394570Sraf sigon(self);
9400Sstevel@tonic-gate
9414570Sraf if (read_lock_try(rwlp, 0))
9424570Sraf error = 0;
9434570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
9440Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
9450Sstevel@tonic-gate else /* user-level */
9460Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY);
9470Sstevel@tonic-gate
9484570Sraf out:
9494570Sraf if (error == 0) {
9504570Sraf sigoff(self);
9514570Sraf rwl_entry(rwlp)->rd_count++;
9524570Sraf sigon(self);
9534570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK);
9544570Sraf } else {
9554570Sraf if (rwsp)
9564570Sraf tdb_incr(rwsp->rw_rdlock_try_fail);
9574570Sraf if (error != EBUSY) {
9584570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK,
9594570Sraf error);
9604570Sraf }
9614570Sraf }
9620Sstevel@tonic-gate
9630Sstevel@tonic-gate return (error);
9640Sstevel@tonic-gate }
9650Sstevel@tonic-gate
9666812Sraf #pragma weak pthread_rwlock_trywrlock = rw_trywrlock
9670Sstevel@tonic-gate int
rw_trywrlock(rwlock_t * rwlp)9686812Sraf rw_trywrlock(rwlock_t *rwlp)
9690Sstevel@tonic-gate {
9700Sstevel@tonic-gate ulwp_t *self = curthread;
9710Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata;
9720Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp);
9730Sstevel@tonic-gate int error;
9740Sstevel@tonic-gate
9754570Sraf ASSERT(!self->ul_critical || self->ul_bindflags);
9760Sstevel@tonic-gate
9770Sstevel@tonic-gate if (rwsp)
9780Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock_try);
9790Sstevel@tonic-gate
9804570Sraf if (write_lock_try(rwlp, 0))
9814570Sraf error = 0;
9824570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */
9830Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
9844570Sraf else /* user-level */
9850Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY);
9864570Sraf
9874570Sraf if (error == 0) {
9884570Sraf rwlp->rwlock_owner = (uintptr_t)self;
9894570Sraf if (rwlp->rwlock_type == USYNC_PROCESS)
9904570Sraf rwlp->rwlock_ownerpid = udp->pid;
9914570Sraf if (rwsp)
9924570Sraf rwsp->rw_wrlock_begin_hold = gethrtime();
9934570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK);
9944570Sraf } else {
9954570Sraf if (rwsp)
9960Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock_try_fail);
9974570Sraf if (error != EBUSY) {
9984570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK,
9994570Sraf error);
10004570Sraf }
10010Sstevel@tonic-gate }
10020Sstevel@tonic-gate return (error);
10030Sstevel@tonic-gate }
10040Sstevel@tonic-gate
10056812Sraf #pragma weak pthread_rwlock_unlock = rw_unlock
10066812Sraf #pragma weak _rw_unlock = rw_unlock
10070Sstevel@tonic-gate int
rw_unlock(rwlock_t * rwlp)10086812Sraf rw_unlock(rwlock_t *rwlp)
10090Sstevel@tonic-gate {
10104570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers;
10114570Sraf uint32_t readers;
10120Sstevel@tonic-gate ulwp_t *self = curthread;
10130Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata;
10140Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp;
10154570Sraf int rd_wr;
10160Sstevel@tonic-gate
10174570Sraf readers = *rwstate;
10184570Sraf ASSERT_CONSISTENT_STATE(readers);
10194570Sraf if (readers & URW_WRITE_LOCKED) {
10204570Sraf rd_wr = WRITE_LOCK;
10214570Sraf readers = 0;
10224570Sraf } else {
10234570Sraf rd_wr = READ_LOCK;
10244570Sraf readers &= URW_READERS_MASK;
10250Sstevel@tonic-gate }
10260Sstevel@tonic-gate
10274570Sraf if (rd_wr == WRITE_LOCK) {
10280Sstevel@tonic-gate /*
10290Sstevel@tonic-gate * Since the writer lock is held, we'd better be
10300Sstevel@tonic-gate * holding it, else we cannot legitimately be here.
10310Sstevel@tonic-gate */
10326812Sraf if (!rw_write_held(rwlp)) {
10330Sstevel@tonic-gate if (self->ul_error_detection)
10340Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock",
10350Sstevel@tonic-gate "writer lock held, "
10360Sstevel@tonic-gate "but not by the calling thread");
10370Sstevel@tonic-gate return (EPERM);
10380Sstevel@tonic-gate }
10390Sstevel@tonic-gate if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) {
10400Sstevel@tonic-gate if (rwsp->rw_wrlock_begin_hold)
10410Sstevel@tonic-gate rwsp->rw_wrlock_hold_time +=
10420Sstevel@tonic-gate gethrtime() - rwsp->rw_wrlock_begin_hold;
10430Sstevel@tonic-gate rwsp->rw_wrlock_begin_hold = 0;
10440Sstevel@tonic-gate }
10454570Sraf rwlp->rwlock_owner = 0;
10464570Sraf rwlp->rwlock_ownerpid = 0;
10474570Sraf } else if (readers > 0) {
10480Sstevel@tonic-gate /*
10490Sstevel@tonic-gate * A readers lock is held; if we don't hold one, bail out.
10500Sstevel@tonic-gate */
10514570Sraf readlock_t *readlockp;
10524570Sraf
10534570Sraf sigoff(self);
10544570Sraf readlockp = rwl_entry(rwlp);
10550Sstevel@tonic-gate if (readlockp->rd_count == 0) {
10564570Sraf sigon(self);
10570Sstevel@tonic-gate if (self->ul_error_detection)
10580Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock",
10590Sstevel@tonic-gate "readers lock held, "
10600Sstevel@tonic-gate "but not by the calling thread");
10610Sstevel@tonic-gate return (EPERM);
10620Sstevel@tonic-gate }
10630Sstevel@tonic-gate /*
10640Sstevel@tonic-gate * If we hold more than one readers lock on this rwlock,
10650Sstevel@tonic-gate * just decrement our reference count and return.
10660Sstevel@tonic-gate */
10670Sstevel@tonic-gate if (--readlockp->rd_count != 0) {
10684570Sraf sigon(self);
10694570Sraf goto out;
10700Sstevel@tonic-gate }
10714570Sraf sigon(self);
10720Sstevel@tonic-gate } else {
10730Sstevel@tonic-gate /*
10740Sstevel@tonic-gate * This is a usage error.
10750Sstevel@tonic-gate * No thread should release an unowned lock.
10760Sstevel@tonic-gate */
10770Sstevel@tonic-gate if (self->ul_error_detection)
10780Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", "lock not owned");
10790Sstevel@tonic-gate return (EPERM);
10800Sstevel@tonic-gate }
10810Sstevel@tonic-gate
10824570Sraf if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) {
10834570Sraf /* EMPTY */;
10844570Sraf } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) {
10854570Sraf /* EMPTY */;
10864570Sraf } else if (rwlp->rwlock_type == USYNC_PROCESS) {
10876515Sraf (void) mutex_lock(&rwlp->mutex);
10884570Sraf (void) __lwp_rwlock_unlock(rwlp);
10896515Sraf (void) mutex_unlock(&rwlp->mutex);
10904570Sraf } else {
1091*12677SRoger.Faulkner@Oracle.COM rw_queue_release(rwlp);
10920Sstevel@tonic-gate }
10930Sstevel@tonic-gate
10944570Sraf out:
10954570Sraf DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr);
10960Sstevel@tonic-gate return (0);
10970Sstevel@tonic-gate }
10980Sstevel@tonic-gate
10990Sstevel@tonic-gate void
lrw_unlock(rwlock_t * rwlp)11000Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp)
11010Sstevel@tonic-gate {
11026812Sraf (void) rw_unlock(rwlp);
11030Sstevel@tonic-gate exit_critical(curthread);
11040Sstevel@tonic-gate }
1105