10Sstevel@tonic-gate /* 20Sstevel@tonic-gate * CDDL HEADER START 30Sstevel@tonic-gate * 40Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*4570Sraf * Common Development and Distribution License (the "License"). 6*4570Sraf * You may not use this file except in compliance with the License. 70Sstevel@tonic-gate * 80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 100Sstevel@tonic-gate * See the License for the specific language governing permissions 110Sstevel@tonic-gate * and limitations under the License. 120Sstevel@tonic-gate * 130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 180Sstevel@tonic-gate * 190Sstevel@tonic-gate * CDDL HEADER END 200Sstevel@tonic-gate */ 21*4570Sraf 220Sstevel@tonic-gate /* 23*4570Sraf * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 240Sstevel@tonic-gate * Use is subject to license terms. 250Sstevel@tonic-gate */ 260Sstevel@tonic-gate 270Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 280Sstevel@tonic-gate 290Sstevel@tonic-gate #include "lint.h" 300Sstevel@tonic-gate #include "thr_uberdata.h" 310Sstevel@tonic-gate #include <sys/sdt.h> 320Sstevel@tonic-gate 330Sstevel@tonic-gate #define TRY_FLAG 0x10 340Sstevel@tonic-gate #define READ_LOCK 0 350Sstevel@tonic-gate #define WRITE_LOCK 1 360Sstevel@tonic-gate #define READ_LOCK_TRY (READ_LOCK | TRY_FLAG) 370Sstevel@tonic-gate #define WRITE_LOCK_TRY (WRITE_LOCK | TRY_FLAG) 380Sstevel@tonic-gate 390Sstevel@tonic-gate #define NLOCKS 4 /* initial number of readlock_t structs allocated */ 400Sstevel@tonic-gate 41*4570Sraf #define ASSERT_CONSISTENT_STATE(readers) \ 42*4570Sraf ASSERT(!((readers) & URW_WRITE_LOCKED) || \ 43*4570Sraf ((readers) & ~URW_HAS_WAITERS) == URW_WRITE_LOCKED) 44*4570Sraf 450Sstevel@tonic-gate /* 460Sstevel@tonic-gate * Find/allocate an entry for rwlp in our array of rwlocks held for reading. 47*4570Sraf * We must be deferring signals for this to be safe. 48*4570Sraf * Else if we are returning an entry with ul_rdlocks == 0, 49*4570Sraf * it could be reassigned behind our back in a signal handler. 500Sstevel@tonic-gate */ 510Sstevel@tonic-gate static readlock_t * 520Sstevel@tonic-gate rwl_entry(rwlock_t *rwlp) 530Sstevel@tonic-gate { 540Sstevel@tonic-gate ulwp_t *self = curthread; 550Sstevel@tonic-gate readlock_t *remembered = NULL; 560Sstevel@tonic-gate readlock_t *readlockp; 570Sstevel@tonic-gate uint_t nlocks; 580Sstevel@tonic-gate 59*4570Sraf /* we must be deferring signals */ 60*4570Sraf ASSERT((self->ul_critical + self->ul_sigdefer) != 0); 61*4570Sraf 620Sstevel@tonic-gate if ((nlocks = self->ul_rdlocks) != 0) 630Sstevel@tonic-gate readlockp = self->ul_readlock.array; 640Sstevel@tonic-gate else { 650Sstevel@tonic-gate nlocks = 1; 660Sstevel@tonic-gate readlockp = &self->ul_readlock.single; 670Sstevel@tonic-gate } 680Sstevel@tonic-gate 690Sstevel@tonic-gate for (; nlocks; nlocks--, readlockp++) { 700Sstevel@tonic-gate if (readlockp->rd_rwlock == rwlp) 710Sstevel@tonic-gate return (readlockp); 720Sstevel@tonic-gate if (readlockp->rd_count == 0 && remembered == NULL) 730Sstevel@tonic-gate remembered = readlockp; 740Sstevel@tonic-gate } 750Sstevel@tonic-gate if (remembered != NULL) { 760Sstevel@tonic-gate remembered->rd_rwlock = rwlp; 770Sstevel@tonic-gate return (remembered); 780Sstevel@tonic-gate } 790Sstevel@tonic-gate 800Sstevel@tonic-gate /* 810Sstevel@tonic-gate * No entry available. Allocate more space, converting the single 820Sstevel@tonic-gate * readlock_t entry into an array of readlock_t entries if necessary. 830Sstevel@tonic-gate */ 840Sstevel@tonic-gate if ((nlocks = self->ul_rdlocks) == 0) { 850Sstevel@tonic-gate /* 860Sstevel@tonic-gate * Initial allocation of the readlock_t array. 870Sstevel@tonic-gate * Convert the single entry into an array. 880Sstevel@tonic-gate */ 890Sstevel@tonic-gate self->ul_rdlocks = nlocks = NLOCKS; 900Sstevel@tonic-gate readlockp = lmalloc(nlocks * sizeof (readlock_t)); 910Sstevel@tonic-gate /* 920Sstevel@tonic-gate * The single readlock_t becomes the first entry in the array. 930Sstevel@tonic-gate */ 940Sstevel@tonic-gate *readlockp = self->ul_readlock.single; 950Sstevel@tonic-gate self->ul_readlock.single.rd_count = 0; 960Sstevel@tonic-gate self->ul_readlock.array = readlockp; 970Sstevel@tonic-gate /* 980Sstevel@tonic-gate * Return the next available entry in the array. 990Sstevel@tonic-gate */ 1000Sstevel@tonic-gate (++readlockp)->rd_rwlock = rwlp; 1010Sstevel@tonic-gate return (readlockp); 1020Sstevel@tonic-gate } 1030Sstevel@tonic-gate /* 1040Sstevel@tonic-gate * Reallocate the array, double the size each time. 1050Sstevel@tonic-gate */ 1060Sstevel@tonic-gate readlockp = lmalloc(nlocks * 2 * sizeof (readlock_t)); 1070Sstevel@tonic-gate (void) _memcpy(readlockp, self->ul_readlock.array, 1080Sstevel@tonic-gate nlocks * sizeof (readlock_t)); 1090Sstevel@tonic-gate lfree(self->ul_readlock.array, nlocks * sizeof (readlock_t)); 1100Sstevel@tonic-gate self->ul_readlock.array = readlockp; 1110Sstevel@tonic-gate self->ul_rdlocks *= 2; 1120Sstevel@tonic-gate /* 1130Sstevel@tonic-gate * Return the next available entry in the newly allocated array. 1140Sstevel@tonic-gate */ 1150Sstevel@tonic-gate (readlockp += nlocks)->rd_rwlock = rwlp; 1160Sstevel@tonic-gate return (readlockp); 1170Sstevel@tonic-gate } 1180Sstevel@tonic-gate 1190Sstevel@tonic-gate /* 1200Sstevel@tonic-gate * Free the array of rwlocks held for reading. 1210Sstevel@tonic-gate */ 1220Sstevel@tonic-gate void 1230Sstevel@tonic-gate rwl_free(ulwp_t *ulwp) 1240Sstevel@tonic-gate { 1250Sstevel@tonic-gate uint_t nlocks; 1260Sstevel@tonic-gate 1270Sstevel@tonic-gate if ((nlocks = ulwp->ul_rdlocks) != 0) 1280Sstevel@tonic-gate lfree(ulwp->ul_readlock.array, nlocks * sizeof (readlock_t)); 1290Sstevel@tonic-gate ulwp->ul_rdlocks = 0; 1300Sstevel@tonic-gate ulwp->ul_readlock.single.rd_rwlock = NULL; 1310Sstevel@tonic-gate ulwp->ul_readlock.single.rd_count = 0; 1320Sstevel@tonic-gate } 1330Sstevel@tonic-gate 1340Sstevel@tonic-gate /* 1350Sstevel@tonic-gate * Check if a reader version of the lock is held by the current thread. 1360Sstevel@tonic-gate * rw_read_is_held() is private to libc. 1370Sstevel@tonic-gate */ 1380Sstevel@tonic-gate #pragma weak rw_read_is_held = _rw_read_held 1390Sstevel@tonic-gate #pragma weak rw_read_held = _rw_read_held 1400Sstevel@tonic-gate int 1410Sstevel@tonic-gate _rw_read_held(rwlock_t *rwlp) 1420Sstevel@tonic-gate { 143*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 144*4570Sraf uint32_t readers; 145*4570Sraf ulwp_t *self = curthread; 1460Sstevel@tonic-gate readlock_t *readlockp; 1470Sstevel@tonic-gate uint_t nlocks; 148*4570Sraf int rval = 0; 1490Sstevel@tonic-gate 150*4570Sraf no_preempt(self); 151*4570Sraf 152*4570Sraf readers = *rwstate; 153*4570Sraf ASSERT_CONSISTENT_STATE(readers); 154*4570Sraf if (!(readers & URW_WRITE_LOCKED) && 155*4570Sraf (readers & URW_READERS_MASK) != 0) { 156*4570Sraf /* 157*4570Sraf * The lock is held for reading by some thread. 158*4570Sraf * Search our array of rwlocks held for reading for a match. 159*4570Sraf */ 160*4570Sraf if ((nlocks = self->ul_rdlocks) != 0) 161*4570Sraf readlockp = self->ul_readlock.array; 162*4570Sraf else { 163*4570Sraf nlocks = 1; 164*4570Sraf readlockp = &self->ul_readlock.single; 165*4570Sraf } 166*4570Sraf for (; nlocks; nlocks--, readlockp++) { 167*4570Sraf if (readlockp->rd_rwlock == rwlp) { 168*4570Sraf if (readlockp->rd_count) 169*4570Sraf rval = 1; 170*4570Sraf break; 171*4570Sraf } 172*4570Sraf } 1730Sstevel@tonic-gate } 1740Sstevel@tonic-gate 175*4570Sraf preempt(self); 176*4570Sraf return (rval); 1770Sstevel@tonic-gate } 1780Sstevel@tonic-gate 1790Sstevel@tonic-gate /* 1800Sstevel@tonic-gate * Check if a writer version of the lock is held by the current thread. 1810Sstevel@tonic-gate * rw_write_is_held() is private to libc. 1820Sstevel@tonic-gate */ 1830Sstevel@tonic-gate #pragma weak rw_write_is_held = _rw_write_held 1840Sstevel@tonic-gate #pragma weak rw_write_held = _rw_write_held 1850Sstevel@tonic-gate int 1860Sstevel@tonic-gate _rw_write_held(rwlock_t *rwlp) 1870Sstevel@tonic-gate { 188*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 189*4570Sraf uint32_t readers; 1900Sstevel@tonic-gate ulwp_t *self = curthread; 191*4570Sraf int rval; 192*4570Sraf 193*4570Sraf no_preempt(self); 1940Sstevel@tonic-gate 195*4570Sraf readers = *rwstate; 196*4570Sraf ASSERT_CONSISTENT_STATE(readers); 197*4570Sraf rval = ((readers & URW_WRITE_LOCKED) && 198*4570Sraf rwlp->rwlock_owner == (uintptr_t)self && 199*4570Sraf (rwlp->rwlock_type == USYNC_THREAD || 200*4570Sraf rwlp->rwlock_ownerpid == self->ul_uberdata->pid)); 2010Sstevel@tonic-gate 202*4570Sraf preempt(self); 203*4570Sraf return (rval); 2040Sstevel@tonic-gate } 2050Sstevel@tonic-gate 2060Sstevel@tonic-gate #pragma weak rwlock_init = __rwlock_init 2070Sstevel@tonic-gate #pragma weak _rwlock_init = __rwlock_init 2080Sstevel@tonic-gate /* ARGSUSED2 */ 2090Sstevel@tonic-gate int 2100Sstevel@tonic-gate __rwlock_init(rwlock_t *rwlp, int type, void *arg) 2110Sstevel@tonic-gate { 2120Sstevel@tonic-gate if (type != USYNC_THREAD && type != USYNC_PROCESS) 2130Sstevel@tonic-gate return (EINVAL); 2140Sstevel@tonic-gate /* 2150Sstevel@tonic-gate * Once reinitialized, we can no longer be holding a read or write lock. 2160Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks. 2170Sstevel@tonic-gate */ 218*4570Sraf sigoff(curthread); 219*4570Sraf rwl_entry(rwlp)->rd_count = 0; 220*4570Sraf sigon(curthread); 2210Sstevel@tonic-gate (void) _memset(rwlp, 0, sizeof (*rwlp)); 2220Sstevel@tonic-gate rwlp->rwlock_type = (uint16_t)type; 2230Sstevel@tonic-gate rwlp->rwlock_magic = RWL_MAGIC; 2240Sstevel@tonic-gate rwlp->mutex.mutex_type = (uint8_t)type; 2250Sstevel@tonic-gate rwlp->mutex.mutex_flag = LOCK_INITED; 2260Sstevel@tonic-gate rwlp->mutex.mutex_magic = MUTEX_MAGIC; 2270Sstevel@tonic-gate return (0); 2280Sstevel@tonic-gate } 2290Sstevel@tonic-gate 2300Sstevel@tonic-gate #pragma weak rwlock_destroy = __rwlock_destroy 2310Sstevel@tonic-gate #pragma weak _rwlock_destroy = __rwlock_destroy 2320Sstevel@tonic-gate #pragma weak pthread_rwlock_destroy = __rwlock_destroy 2330Sstevel@tonic-gate #pragma weak _pthread_rwlock_destroy = __rwlock_destroy 2340Sstevel@tonic-gate int 2350Sstevel@tonic-gate __rwlock_destroy(rwlock_t *rwlp) 2360Sstevel@tonic-gate { 2370Sstevel@tonic-gate /* 2380Sstevel@tonic-gate * Once destroyed, we can no longer be holding a read or write lock. 2390Sstevel@tonic-gate * We can do nothing about other threads that are holding read locks. 2400Sstevel@tonic-gate */ 241*4570Sraf sigoff(curthread); 242*4570Sraf rwl_entry(rwlp)->rd_count = 0; 243*4570Sraf sigon(curthread); 2440Sstevel@tonic-gate rwlp->rwlock_magic = 0; 2450Sstevel@tonic-gate tdb_sync_obj_deregister(rwlp); 2460Sstevel@tonic-gate return (0); 2470Sstevel@tonic-gate } 2480Sstevel@tonic-gate 2490Sstevel@tonic-gate /* 250*4570Sraf * Attempt to acquire a readers lock. Return true on success. 251*4570Sraf */ 252*4570Sraf static int 253*4570Sraf read_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 254*4570Sraf { 255*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 256*4570Sraf uint32_t mask = ignore_waiters_flag? 257*4570Sraf URW_WRITE_LOCKED : (URW_HAS_WAITERS | URW_WRITE_LOCKED); 258*4570Sraf uint32_t readers; 259*4570Sraf ulwp_t *self = curthread; 260*4570Sraf 261*4570Sraf no_preempt(self); 262*4570Sraf while (((readers = *rwstate) & mask) == 0) { 263*4570Sraf if (atomic_cas_32(rwstate, readers, readers + 1) == readers) { 264*4570Sraf preempt(self); 265*4570Sraf return (1); 266*4570Sraf } 267*4570Sraf } 268*4570Sraf preempt(self); 269*4570Sraf return (0); 270*4570Sraf } 271*4570Sraf 272*4570Sraf /* 273*4570Sraf * Attempt to release a reader lock. Return true on success. 274*4570Sraf */ 275*4570Sraf static int 276*4570Sraf read_unlock_try(rwlock_t *rwlp) 277*4570Sraf { 278*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 279*4570Sraf uint32_t readers; 280*4570Sraf ulwp_t *self = curthread; 281*4570Sraf 282*4570Sraf no_preempt(self); 283*4570Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 284*4570Sraf if (atomic_cas_32(rwstate, readers, readers - 1) == readers) { 285*4570Sraf preempt(self); 286*4570Sraf return (1); 287*4570Sraf } 288*4570Sraf } 289*4570Sraf preempt(self); 290*4570Sraf return (0); 291*4570Sraf } 292*4570Sraf 293*4570Sraf /* 294*4570Sraf * Attempt to acquire a writer lock. Return true on success. 295*4570Sraf */ 296*4570Sraf static int 297*4570Sraf write_lock_try(rwlock_t *rwlp, int ignore_waiters_flag) 298*4570Sraf { 299*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 300*4570Sraf uint32_t mask = ignore_waiters_flag? 301*4570Sraf (URW_WRITE_LOCKED | URW_READERS_MASK) : 302*4570Sraf (URW_HAS_WAITERS | URW_WRITE_LOCKED | URW_READERS_MASK); 303*4570Sraf ulwp_t *self = curthread; 304*4570Sraf uint32_t readers; 305*4570Sraf 306*4570Sraf no_preempt(self); 307*4570Sraf while (((readers = *rwstate) & mask) == 0) { 308*4570Sraf if (atomic_cas_32(rwstate, readers, readers | URW_WRITE_LOCKED) 309*4570Sraf == readers) { 310*4570Sraf preempt(self); 311*4570Sraf return (1); 312*4570Sraf } 313*4570Sraf } 314*4570Sraf preempt(self); 315*4570Sraf return (0); 316*4570Sraf } 317*4570Sraf 318*4570Sraf /* 319*4570Sraf * Attempt to release a writer lock. Return true on success. 320*4570Sraf */ 321*4570Sraf static int 322*4570Sraf write_unlock_try(rwlock_t *rwlp) 323*4570Sraf { 324*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 325*4570Sraf uint32_t readers; 326*4570Sraf ulwp_t *self = curthread; 327*4570Sraf 328*4570Sraf no_preempt(self); 329*4570Sraf while (((readers = *rwstate) & URW_HAS_WAITERS) == 0) { 330*4570Sraf if (atomic_cas_32(rwstate, readers, 0) == readers) { 331*4570Sraf preempt(self); 332*4570Sraf return (1); 333*4570Sraf } 334*4570Sraf } 335*4570Sraf preempt(self); 336*4570Sraf return (0); 337*4570Sraf } 338*4570Sraf 339*4570Sraf /* 340*4570Sraf * Wake up thread(s) sleeping on the rwlock queue and then 3410Sstevel@tonic-gate * drop the queue lock. Return non-zero if we wake up someone. 342*4570Sraf * This is called when a thread releases a lock that appears to have waiters. 3430Sstevel@tonic-gate */ 3440Sstevel@tonic-gate static int 3450Sstevel@tonic-gate rw_queue_release(queue_head_t *qp, rwlock_t *rwlp) 3460Sstevel@tonic-gate { 347*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 348*4570Sraf uint32_t readers; 349*4570Sraf uint32_t writers; 350*4570Sraf int nlwpid = 0; 351*4570Sraf int maxlwps = MAXLWPS; 352*4570Sraf ulwp_t *self; 353*4570Sraf ulwp_t **ulwpp; 3540Sstevel@tonic-gate ulwp_t *ulwp; 355*4570Sraf ulwp_t *prev = NULL; 356*4570Sraf lwpid_t buffer[MAXLWPS]; 357*4570Sraf lwpid_t *lwpid = buffer; 358*4570Sraf 359*4570Sraf readers = *rwstate; 360*4570Sraf ASSERT_CONSISTENT_STATE(readers); 361*4570Sraf if (!(readers & URW_HAS_WAITERS)) { 362*4570Sraf queue_unlock(qp); 363*4570Sraf return (0); 364*4570Sraf } 365*4570Sraf readers &= URW_READERS_MASK; 366*4570Sraf writers = 0; 3670Sstevel@tonic-gate 368*4570Sraf /* 369*4570Sraf * Walk the list of waiters and prepare to wake up as 370*4570Sraf * many readers as we encounter before encountering 371*4570Sraf * a writer. If the first thread on the list is a 372*4570Sraf * writer, stop there and wake it up. 373*4570Sraf * 374*4570Sraf * We keep track of lwpids that are to be unparked in lwpid[]. 375*4570Sraf * __lwp_unpark_all() is called to unpark all of them after 376*4570Sraf * they have been removed from the sleep queue and the sleep 377*4570Sraf * queue lock has been dropped. If we run out of space in our 378*4570Sraf * on-stack buffer, we need to allocate more but we can't call 379*4570Sraf * lmalloc() because we are holding a queue lock when the overflow 380*4570Sraf * occurs and lmalloc() acquires a lock. We can't use alloca() 381*4570Sraf * either because the application may have allocated a small 382*4570Sraf * stack and we don't want to overrun the stack. So we call 383*4570Sraf * alloc_lwpids() to allocate a bigger buffer using the mmap() 384*4570Sraf * system call directly since that path acquires no locks. 385*4570Sraf */ 386*4570Sraf ulwpp = &qp->qh_head; 387*4570Sraf while ((ulwp = *ulwpp) != NULL) { 388*4570Sraf if (ulwp->ul_wchan != rwlp) { 389*4570Sraf prev = ulwp; 390*4570Sraf ulwpp = &ulwp->ul_link; 391*4570Sraf continue; 3920Sstevel@tonic-gate } 393*4570Sraf if (ulwp->ul_writer) { 394*4570Sraf if (writers != 0 || readers != 0) 395*4570Sraf break; 396*4570Sraf /* one writer to wake */ 397*4570Sraf writers++; 398*4570Sraf } else { 399*4570Sraf if (writers != 0) 400*4570Sraf break; 401*4570Sraf /* at least one reader to wake */ 402*4570Sraf readers++; 403*4570Sraf if (nlwpid == maxlwps) 404*4570Sraf lwpid = alloc_lwpids(lwpid, &nlwpid, &maxlwps); 405*4570Sraf } 406*4570Sraf (void) queue_unlink(qp, ulwpp, prev); 407*4570Sraf lwpid[nlwpid++] = ulwp->ul_lwpid; 4080Sstevel@tonic-gate } 409*4570Sraf if (ulwp == NULL) 410*4570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 411*4570Sraf if (nlwpid == 0) { 412*4570Sraf queue_unlock(qp); 413*4570Sraf } else { 414*4570Sraf self = curthread; 415*4570Sraf no_preempt(self); 416*4570Sraf queue_unlock(qp); 417*4570Sraf if (nlwpid == 1) 418*4570Sraf (void) __lwp_unpark(lwpid[0]); 419*4570Sraf else 420*4570Sraf (void) __lwp_unpark_all(lwpid, nlwpid); 421*4570Sraf preempt(self); 422*4570Sraf } 423*4570Sraf if (lwpid != buffer) 424*4570Sraf (void) _private_munmap(lwpid, maxlwps * sizeof (lwpid_t)); 425*4570Sraf return (nlwpid != 0); 4260Sstevel@tonic-gate } 4270Sstevel@tonic-gate 4280Sstevel@tonic-gate /* 4290Sstevel@tonic-gate * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 4300Sstevel@tonic-gate * and trywrlock for process-shared (USYNC_PROCESS) rwlocks. 4310Sstevel@tonic-gate * 4320Sstevel@tonic-gate * Note: if the lock appears to be contended we call __lwp_rwlock_rdlock() 4330Sstevel@tonic-gate * or __lwp_rwlock_wrlock() holding the mutex. These return with the mutex 4340Sstevel@tonic-gate * released, and if they need to sleep will release the mutex first. In the 4350Sstevel@tonic-gate * event of a spurious wakeup, these will return EAGAIN (because it is much 4360Sstevel@tonic-gate * easier for us to re-acquire the mutex here). 4370Sstevel@tonic-gate */ 4380Sstevel@tonic-gate int 4390Sstevel@tonic-gate shared_rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 4400Sstevel@tonic-gate { 441*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 442*4570Sraf mutex_t *mp = &rwlp->mutex; 443*4570Sraf /* LINTED set but not used */ 444*4570Sraf uint32_t readers; 445*4570Sraf int try_flag; 446*4570Sraf int error; 447*4570Sraf 448*4570Sraf try_flag = (rd_wr & TRY_FLAG); 449*4570Sraf rd_wr &= ~TRY_FLAG; 450*4570Sraf ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 451*4570Sraf 452*4570Sraf if (!try_flag) { 453*4570Sraf DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 454*4570Sraf } 455*4570Sraf 456*4570Sraf do { 457*4570Sraf if (try_flag && (*rwstate & URW_WRITE_LOCKED)) { 458*4570Sraf error = EBUSY; 459*4570Sraf break; 460*4570Sraf } 461*4570Sraf if ((error = _private_mutex_lock(mp)) != 0) 462*4570Sraf break; 463*4570Sraf if (rd_wr == READ_LOCK) { 464*4570Sraf if (read_lock_try(rwlp, 0)) { 465*4570Sraf (void) _private_mutex_unlock(mp); 466*4570Sraf break; 467*4570Sraf } 468*4570Sraf } else { 469*4570Sraf if (write_lock_try(rwlp, 0)) { 470*4570Sraf (void) _private_mutex_unlock(mp); 471*4570Sraf break; 472*4570Sraf } 473*4570Sraf } 474*4570Sraf atomic_or_32(rwstate, URW_HAS_WAITERS); 475*4570Sraf readers = *rwstate; 476*4570Sraf ASSERT_CONSISTENT_STATE(readers); 477*4570Sraf /* 478*4570Sraf * The calls to __lwp_rwlock_*() below will release the mutex, 479*4570Sraf * so we need a dtrace probe here. 480*4570Sraf */ 481*4570Sraf mp->mutex_owner = 0; 482*4570Sraf DTRACE_PROBE2(plockstat, mutex__release, mp, 0); 483*4570Sraf /* 484*4570Sraf * The waiters bit may be inaccurate. 485*4570Sraf * Only the kernel knows for sure. 486*4570Sraf */ 487*4570Sraf if (rd_wr == READ_LOCK) { 488*4570Sraf if (try_flag) 489*4570Sraf error = __lwp_rwlock_tryrdlock(rwlp); 490*4570Sraf else 491*4570Sraf error = __lwp_rwlock_rdlock(rwlp, tsp); 492*4570Sraf } else { 493*4570Sraf if (try_flag) 494*4570Sraf error = __lwp_rwlock_trywrlock(rwlp); 495*4570Sraf else 496*4570Sraf error = __lwp_rwlock_wrlock(rwlp, tsp); 497*4570Sraf } 498*4570Sraf } while (error == EAGAIN || error == EINTR); 499*4570Sraf 500*4570Sraf if (!try_flag) { 501*4570Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 502*4570Sraf } 503*4570Sraf 504*4570Sraf return (error); 505*4570Sraf } 506*4570Sraf 507*4570Sraf /* 508*4570Sraf * Common code for rdlock, timedrdlock, wrlock, timedwrlock, tryrdlock, 509*4570Sraf * and trywrlock for process-private (USYNC_THREAD) rwlocks. 510*4570Sraf */ 511*4570Sraf int 512*4570Sraf rwlock_lock(rwlock_t *rwlp, timespec_t *tsp, int rd_wr) 513*4570Sraf { 514*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 515*4570Sraf uint32_t readers; 5160Sstevel@tonic-gate ulwp_t *self = curthread; 517*4570Sraf queue_head_t *qp; 518*4570Sraf ulwp_t *ulwp; 5190Sstevel@tonic-gate int try_flag; 5200Sstevel@tonic-gate int error = 0; 5210Sstevel@tonic-gate 5220Sstevel@tonic-gate try_flag = (rd_wr & TRY_FLAG); 5230Sstevel@tonic-gate rd_wr &= ~TRY_FLAG; 5240Sstevel@tonic-gate ASSERT(rd_wr == READ_LOCK || rd_wr == WRITE_LOCK); 5250Sstevel@tonic-gate 5260Sstevel@tonic-gate if (!try_flag) { 5270Sstevel@tonic-gate DTRACE_PROBE2(plockstat, rw__block, rwlp, rd_wr); 5280Sstevel@tonic-gate } 5290Sstevel@tonic-gate 530*4570Sraf qp = queue_lock(rwlp, MX); 531*4570Sraf retry: 532*4570Sraf while (error == 0) { 5330Sstevel@tonic-gate if (rd_wr == READ_LOCK) { 534*4570Sraf if (read_lock_try(rwlp, 0)) 535*4570Sraf goto out; 5360Sstevel@tonic-gate } else { 537*4570Sraf if (write_lock_try(rwlp, 0)) 538*4570Sraf goto out; 5390Sstevel@tonic-gate } 540*4570Sraf atomic_or_32(rwstate, URW_HAS_WAITERS); 541*4570Sraf readers = *rwstate; 542*4570Sraf ASSERT_CONSISTENT_STATE(readers); 543*4570Sraf if ((readers & URW_WRITE_LOCKED) || 544*4570Sraf (rd_wr == WRITE_LOCK && 545*4570Sraf (readers & URW_READERS_MASK) != 0)) 5460Sstevel@tonic-gate /* EMPTY */; /* somebody holds the lock */ 5470Sstevel@tonic-gate else if ((ulwp = queue_waiter(qp, rwlp)) == NULL) { 548*4570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 5490Sstevel@tonic-gate break; /* no queued waiters */ 5500Sstevel@tonic-gate } else { 5510Sstevel@tonic-gate int our_pri = real_priority(self); 5520Sstevel@tonic-gate int his_pri = real_priority(ulwp); 5530Sstevel@tonic-gate 5540Sstevel@tonic-gate if (rd_wr == WRITE_LOCK) { 5550Sstevel@tonic-gate /* 5560Sstevel@tonic-gate * We defer to a queued thread that has 5570Sstevel@tonic-gate * a higher priority than ours. 5580Sstevel@tonic-gate */ 5590Sstevel@tonic-gate if (his_pri <= our_pri) 5600Sstevel@tonic-gate break; 5610Sstevel@tonic-gate } else { 5620Sstevel@tonic-gate /* 5630Sstevel@tonic-gate * We defer to a queued thread that has 5640Sstevel@tonic-gate * a higher priority than ours or that 5650Sstevel@tonic-gate * is a writer whose priority equals ours. 5660Sstevel@tonic-gate */ 5670Sstevel@tonic-gate if (his_pri < our_pri || 5680Sstevel@tonic-gate (his_pri == our_pri && !ulwp->ul_writer)) 5690Sstevel@tonic-gate break; 5700Sstevel@tonic-gate } 5710Sstevel@tonic-gate } 5720Sstevel@tonic-gate /* 5730Sstevel@tonic-gate * We are about to block. 5740Sstevel@tonic-gate * If we're doing a trylock, return EBUSY instead. 5750Sstevel@tonic-gate */ 5760Sstevel@tonic-gate if (try_flag) { 5770Sstevel@tonic-gate error = EBUSY; 5780Sstevel@tonic-gate break; 5790Sstevel@tonic-gate } 5800Sstevel@tonic-gate /* 5810Sstevel@tonic-gate * Enqueue writers ahead of readers of the 5820Sstevel@tonic-gate * same priority. 5830Sstevel@tonic-gate */ 5840Sstevel@tonic-gate self->ul_writer = rd_wr; /* *must* be 0 or 1 */ 5850Sstevel@tonic-gate enqueue(qp, self, rwlp, MX); 5860Sstevel@tonic-gate set_parking_flag(self, 1); 5870Sstevel@tonic-gate queue_unlock(qp); 5880Sstevel@tonic-gate if ((error = __lwp_park(tsp, 0)) == EINTR) 5890Sstevel@tonic-gate error = 0; 5900Sstevel@tonic-gate self->ul_writer = 0; 5910Sstevel@tonic-gate set_parking_flag(self, 0); 5920Sstevel@tonic-gate qp = queue_lock(rwlp, MX); 593*4570Sraf if (self->ul_sleepq && dequeue_self(qp, rwlp) == 0) 594*4570Sraf atomic_and_32(rwstate, ~URW_HAS_WAITERS); 5950Sstevel@tonic-gate } 5960Sstevel@tonic-gate 5970Sstevel@tonic-gate if (error == 0) { 598*4570Sraf if (rd_wr == READ_LOCK) { 599*4570Sraf if (!read_lock_try(rwlp, 1)) 600*4570Sraf goto retry; 601*4570Sraf } else { 602*4570Sraf if (!write_lock_try(rwlp, 1)) 603*4570Sraf goto retry; 6040Sstevel@tonic-gate } 6050Sstevel@tonic-gate } 6060Sstevel@tonic-gate 607*4570Sraf out: 608*4570Sraf queue_unlock(qp); 609*4570Sraf 610*4570Sraf if (!try_flag) { 611*4570Sraf DTRACE_PROBE3(plockstat, rw__blocked, rwlp, rd_wr, error == 0); 612*4570Sraf } 6130Sstevel@tonic-gate 6140Sstevel@tonic-gate return (error); 6150Sstevel@tonic-gate } 6160Sstevel@tonic-gate 6170Sstevel@tonic-gate int 6180Sstevel@tonic-gate rw_rdlock_impl(rwlock_t *rwlp, timespec_t *tsp) 6190Sstevel@tonic-gate { 6200Sstevel@tonic-gate ulwp_t *self = curthread; 6210Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 6220Sstevel@tonic-gate readlock_t *readlockp; 6230Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 6240Sstevel@tonic-gate int error; 6250Sstevel@tonic-gate 6260Sstevel@tonic-gate /* 6270Sstevel@tonic-gate * If we already hold a readers lock on this rwlock, 6280Sstevel@tonic-gate * just increment our reference count and return. 6290Sstevel@tonic-gate */ 630*4570Sraf sigoff(self); 6310Sstevel@tonic-gate readlockp = rwl_entry(rwlp); 6320Sstevel@tonic-gate if (readlockp->rd_count != 0) { 633*4570Sraf if (readlockp->rd_count == READ_LOCK_MAX) { 634*4570Sraf sigon(self); 635*4570Sraf error = EAGAIN; 636*4570Sraf goto out; 637*4570Sraf } 638*4570Sraf sigon(self); 639*4570Sraf error = 0; 640*4570Sraf goto out; 6410Sstevel@tonic-gate } 642*4570Sraf sigon(self); 6430Sstevel@tonic-gate 6440Sstevel@tonic-gate /* 6450Sstevel@tonic-gate * If we hold the writer lock, bail out. 6460Sstevel@tonic-gate */ 6470Sstevel@tonic-gate if (rw_write_is_held(rwlp)) { 6480Sstevel@tonic-gate if (self->ul_error_detection) 6490Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_rdlock", 6500Sstevel@tonic-gate "calling thread owns the writer lock"); 651*4570Sraf error = EDEADLK; 652*4570Sraf goto out; 6530Sstevel@tonic-gate } 6540Sstevel@tonic-gate 655*4570Sraf if (read_lock_try(rwlp, 0)) 656*4570Sraf error = 0; 657*4570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 6580Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, READ_LOCK); 6590Sstevel@tonic-gate else /* user-level */ 6600Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, READ_LOCK); 6610Sstevel@tonic-gate 662*4570Sraf out: 6630Sstevel@tonic-gate if (error == 0) { 664*4570Sraf sigoff(self); 665*4570Sraf rwl_entry(rwlp)->rd_count++; 666*4570Sraf sigon(self); 6670Sstevel@tonic-gate if (rwsp) 6680Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock); 669*4570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 670*4570Sraf } else { 671*4570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, error); 6720Sstevel@tonic-gate } 6730Sstevel@tonic-gate 6740Sstevel@tonic-gate return (error); 6750Sstevel@tonic-gate } 6760Sstevel@tonic-gate 6770Sstevel@tonic-gate #pragma weak rw_rdlock = __rw_rdlock 6780Sstevel@tonic-gate #pragma weak _rw_rdlock = __rw_rdlock 6790Sstevel@tonic-gate #pragma weak pthread_rwlock_rdlock = __rw_rdlock 6800Sstevel@tonic-gate #pragma weak _pthread_rwlock_rdlock = __rw_rdlock 6810Sstevel@tonic-gate int 6820Sstevel@tonic-gate __rw_rdlock(rwlock_t *rwlp) 6830Sstevel@tonic-gate { 6840Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 6850Sstevel@tonic-gate return (rw_rdlock_impl(rwlp, NULL)); 6860Sstevel@tonic-gate } 6870Sstevel@tonic-gate 6880Sstevel@tonic-gate void 6890Sstevel@tonic-gate lrw_rdlock(rwlock_t *rwlp) 6900Sstevel@tonic-gate { 6910Sstevel@tonic-gate enter_critical(curthread); 6920Sstevel@tonic-gate (void) rw_rdlock_impl(rwlp, NULL); 6930Sstevel@tonic-gate } 6940Sstevel@tonic-gate 6950Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedrdlock_np = \ 6960Sstevel@tonic-gate _pthread_rwlock_reltimedrdlock_np 6970Sstevel@tonic-gate int 6980Sstevel@tonic-gate _pthread_rwlock_reltimedrdlock_np(rwlock_t *rwlp, const timespec_t *reltime) 6990Sstevel@tonic-gate { 7000Sstevel@tonic-gate timespec_t tslocal = *reltime; 7010Sstevel@tonic-gate int error; 7020Sstevel@tonic-gate 7030Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7040Sstevel@tonic-gate error = rw_rdlock_impl(rwlp, &tslocal); 7050Sstevel@tonic-gate if (error == ETIME) 7060Sstevel@tonic-gate error = ETIMEDOUT; 7070Sstevel@tonic-gate return (error); 7080Sstevel@tonic-gate } 7090Sstevel@tonic-gate 7100Sstevel@tonic-gate #pragma weak pthread_rwlock_timedrdlock = _pthread_rwlock_timedrdlock 7110Sstevel@tonic-gate int 7120Sstevel@tonic-gate _pthread_rwlock_timedrdlock(rwlock_t *rwlp, const timespec_t *abstime) 7130Sstevel@tonic-gate { 7140Sstevel@tonic-gate timespec_t tslocal; 7150Sstevel@tonic-gate int error; 7160Sstevel@tonic-gate 7170Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7180Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 7190Sstevel@tonic-gate error = rw_rdlock_impl(rwlp, &tslocal); 7200Sstevel@tonic-gate if (error == ETIME) 7210Sstevel@tonic-gate error = ETIMEDOUT; 7220Sstevel@tonic-gate return (error); 7230Sstevel@tonic-gate } 7240Sstevel@tonic-gate 7250Sstevel@tonic-gate int 7260Sstevel@tonic-gate rw_wrlock_impl(rwlock_t *rwlp, timespec_t *tsp) 7270Sstevel@tonic-gate { 7280Sstevel@tonic-gate ulwp_t *self = curthread; 7290Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 7300Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 7310Sstevel@tonic-gate int error; 7320Sstevel@tonic-gate 7330Sstevel@tonic-gate /* 7340Sstevel@tonic-gate * If we hold a readers lock on this rwlock, bail out. 7350Sstevel@tonic-gate */ 7360Sstevel@tonic-gate if (rw_read_is_held(rwlp)) { 7370Sstevel@tonic-gate if (self->ul_error_detection) 7380Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock", 7390Sstevel@tonic-gate "calling thread owns the readers lock"); 740*4570Sraf error = EDEADLK; 741*4570Sraf goto out; 7420Sstevel@tonic-gate } 7430Sstevel@tonic-gate 7440Sstevel@tonic-gate /* 7450Sstevel@tonic-gate * If we hold the writer lock, bail out. 7460Sstevel@tonic-gate */ 7470Sstevel@tonic-gate if (rw_write_is_held(rwlp)) { 7480Sstevel@tonic-gate if (self->ul_error_detection) 7490Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_wrlock", 7500Sstevel@tonic-gate "calling thread owns the writer lock"); 751*4570Sraf error = EDEADLK; 752*4570Sraf goto out; 7530Sstevel@tonic-gate } 7540Sstevel@tonic-gate 755*4570Sraf if (write_lock_try(rwlp, 0)) 756*4570Sraf error = 0; 757*4570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 7580Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, tsp, WRITE_LOCK); 759*4570Sraf else /* user-level */ 7600Sstevel@tonic-gate error = rwlock_lock(rwlp, tsp, WRITE_LOCK); 7610Sstevel@tonic-gate 762*4570Sraf out: 763*4570Sraf if (error == 0) { 764*4570Sraf rwlp->rwlock_owner = (uintptr_t)self; 765*4570Sraf if (rwlp->rwlock_type == USYNC_PROCESS) 766*4570Sraf rwlp->rwlock_ownerpid = udp->pid; 767*4570Sraf if (rwsp) { 768*4570Sraf tdb_incr(rwsp->rw_wrlock); 769*4570Sraf rwsp->rw_wrlock_begin_hold = gethrtime(); 770*4570Sraf } 771*4570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 772*4570Sraf } else { 773*4570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, error); 7740Sstevel@tonic-gate } 7750Sstevel@tonic-gate return (error); 7760Sstevel@tonic-gate } 7770Sstevel@tonic-gate 7780Sstevel@tonic-gate #pragma weak rw_wrlock = __rw_wrlock 7790Sstevel@tonic-gate #pragma weak _rw_wrlock = __rw_wrlock 7800Sstevel@tonic-gate #pragma weak pthread_rwlock_wrlock = __rw_wrlock 7810Sstevel@tonic-gate #pragma weak _pthread_rwlock_wrlock = __rw_wrlock 7820Sstevel@tonic-gate int 7830Sstevel@tonic-gate __rw_wrlock(rwlock_t *rwlp) 7840Sstevel@tonic-gate { 7850Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 7860Sstevel@tonic-gate return (rw_wrlock_impl(rwlp, NULL)); 7870Sstevel@tonic-gate } 7880Sstevel@tonic-gate 7890Sstevel@tonic-gate void 7900Sstevel@tonic-gate lrw_wrlock(rwlock_t *rwlp) 7910Sstevel@tonic-gate { 7920Sstevel@tonic-gate enter_critical(curthread); 7930Sstevel@tonic-gate (void) rw_wrlock_impl(rwlp, NULL); 7940Sstevel@tonic-gate } 7950Sstevel@tonic-gate 7960Sstevel@tonic-gate #pragma weak pthread_rwlock_reltimedwrlock_np = \ 7970Sstevel@tonic-gate _pthread_rwlock_reltimedwrlock_np 7980Sstevel@tonic-gate int 7990Sstevel@tonic-gate _pthread_rwlock_reltimedwrlock_np(rwlock_t *rwlp, const timespec_t *reltime) 8000Sstevel@tonic-gate { 8010Sstevel@tonic-gate timespec_t tslocal = *reltime; 8020Sstevel@tonic-gate int error; 8030Sstevel@tonic-gate 8040Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8050Sstevel@tonic-gate error = rw_wrlock_impl(rwlp, &tslocal); 8060Sstevel@tonic-gate if (error == ETIME) 8070Sstevel@tonic-gate error = ETIMEDOUT; 8080Sstevel@tonic-gate return (error); 8090Sstevel@tonic-gate } 8100Sstevel@tonic-gate 8110Sstevel@tonic-gate #pragma weak pthread_rwlock_timedwrlock = _pthread_rwlock_timedwrlock 8120Sstevel@tonic-gate int 8130Sstevel@tonic-gate _pthread_rwlock_timedwrlock(rwlock_t *rwlp, const timespec_t *abstime) 8140Sstevel@tonic-gate { 8150Sstevel@tonic-gate timespec_t tslocal; 8160Sstevel@tonic-gate int error; 8170Sstevel@tonic-gate 8180Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8190Sstevel@tonic-gate abstime_to_reltime(CLOCK_REALTIME, abstime, &tslocal); 8200Sstevel@tonic-gate error = rw_wrlock_impl(rwlp, &tslocal); 8210Sstevel@tonic-gate if (error == ETIME) 8220Sstevel@tonic-gate error = ETIMEDOUT; 8230Sstevel@tonic-gate return (error); 8240Sstevel@tonic-gate } 8250Sstevel@tonic-gate 8260Sstevel@tonic-gate #pragma weak rw_tryrdlock = __rw_tryrdlock 8270Sstevel@tonic-gate #pragma weak _rw_tryrdlock = __rw_tryrdlock 8280Sstevel@tonic-gate #pragma weak pthread_rwlock_tryrdlock = __rw_tryrdlock 8290Sstevel@tonic-gate #pragma weak _pthread_rwlock_tryrdlock = __rw_tryrdlock 8300Sstevel@tonic-gate int 8310Sstevel@tonic-gate __rw_tryrdlock(rwlock_t *rwlp) 8320Sstevel@tonic-gate { 8330Sstevel@tonic-gate ulwp_t *self = curthread; 8340Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8350Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 8360Sstevel@tonic-gate readlock_t *readlockp; 8370Sstevel@tonic-gate int error; 8380Sstevel@tonic-gate 8390Sstevel@tonic-gate ASSERT(!curthread->ul_critical || curthread->ul_bindflags); 8400Sstevel@tonic-gate 8410Sstevel@tonic-gate if (rwsp) 8420Sstevel@tonic-gate tdb_incr(rwsp->rw_rdlock_try); 8430Sstevel@tonic-gate 8440Sstevel@tonic-gate /* 8450Sstevel@tonic-gate * If we already hold a readers lock on this rwlock, 8460Sstevel@tonic-gate * just increment our reference count and return. 8470Sstevel@tonic-gate */ 848*4570Sraf sigoff(self); 8490Sstevel@tonic-gate readlockp = rwl_entry(rwlp); 8500Sstevel@tonic-gate if (readlockp->rd_count != 0) { 851*4570Sraf if (readlockp->rd_count == READ_LOCK_MAX) { 852*4570Sraf sigon(self); 853*4570Sraf error = EAGAIN; 854*4570Sraf goto out; 855*4570Sraf } 856*4570Sraf sigon(self); 857*4570Sraf error = 0; 858*4570Sraf goto out; 8590Sstevel@tonic-gate } 860*4570Sraf sigon(self); 8610Sstevel@tonic-gate 862*4570Sraf if (read_lock_try(rwlp, 0)) 863*4570Sraf error = 0; 864*4570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 8650Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 8660Sstevel@tonic-gate else /* user-level */ 8670Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, READ_LOCK_TRY); 8680Sstevel@tonic-gate 869*4570Sraf out: 870*4570Sraf if (error == 0) { 871*4570Sraf sigoff(self); 872*4570Sraf rwl_entry(rwlp)->rd_count++; 873*4570Sraf sigon(self); 874*4570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, READ_LOCK); 875*4570Sraf } else { 876*4570Sraf if (rwsp) 877*4570Sraf tdb_incr(rwsp->rw_rdlock_try_fail); 878*4570Sraf if (error != EBUSY) { 879*4570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, READ_LOCK, 880*4570Sraf error); 881*4570Sraf } 882*4570Sraf } 8830Sstevel@tonic-gate 8840Sstevel@tonic-gate return (error); 8850Sstevel@tonic-gate } 8860Sstevel@tonic-gate 8870Sstevel@tonic-gate #pragma weak rw_trywrlock = __rw_trywrlock 8880Sstevel@tonic-gate #pragma weak _rw_trywrlock = __rw_trywrlock 8890Sstevel@tonic-gate #pragma weak pthread_rwlock_trywrlock = __rw_trywrlock 8900Sstevel@tonic-gate #pragma weak _pthread_rwlock_trywrlock = __rw_trywrlock 8910Sstevel@tonic-gate int 8920Sstevel@tonic-gate __rw_trywrlock(rwlock_t *rwlp) 8930Sstevel@tonic-gate { 8940Sstevel@tonic-gate ulwp_t *self = curthread; 8950Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 8960Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp = RWLOCK_STATS(rwlp, udp); 8970Sstevel@tonic-gate int error; 8980Sstevel@tonic-gate 899*4570Sraf ASSERT(!self->ul_critical || self->ul_bindflags); 9000Sstevel@tonic-gate 9010Sstevel@tonic-gate if (rwsp) 9020Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock_try); 9030Sstevel@tonic-gate 904*4570Sraf if (write_lock_try(rwlp, 0)) 905*4570Sraf error = 0; 906*4570Sraf else if (rwlp->rwlock_type == USYNC_PROCESS) /* kernel-level */ 9070Sstevel@tonic-gate error = shared_rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 908*4570Sraf else /* user-level */ 9090Sstevel@tonic-gate error = rwlock_lock(rwlp, NULL, WRITE_LOCK_TRY); 910*4570Sraf 911*4570Sraf if (error == 0) { 912*4570Sraf rwlp->rwlock_owner = (uintptr_t)self; 913*4570Sraf if (rwlp->rwlock_type == USYNC_PROCESS) 914*4570Sraf rwlp->rwlock_ownerpid = udp->pid; 915*4570Sraf if (rwsp) 916*4570Sraf rwsp->rw_wrlock_begin_hold = gethrtime(); 917*4570Sraf DTRACE_PROBE2(plockstat, rw__acquire, rwlp, WRITE_LOCK); 918*4570Sraf } else { 919*4570Sraf if (rwsp) 9200Sstevel@tonic-gate tdb_incr(rwsp->rw_wrlock_try_fail); 921*4570Sraf if (error != EBUSY) { 922*4570Sraf DTRACE_PROBE3(plockstat, rw__error, rwlp, WRITE_LOCK, 923*4570Sraf error); 924*4570Sraf } 9250Sstevel@tonic-gate } 9260Sstevel@tonic-gate return (error); 9270Sstevel@tonic-gate } 9280Sstevel@tonic-gate 9290Sstevel@tonic-gate #pragma weak rw_unlock = __rw_unlock 9300Sstevel@tonic-gate #pragma weak _rw_unlock = __rw_unlock 9310Sstevel@tonic-gate #pragma weak pthread_rwlock_unlock = __rw_unlock 9320Sstevel@tonic-gate #pragma weak _pthread_rwlock_unlock = __rw_unlock 9330Sstevel@tonic-gate int 9340Sstevel@tonic-gate __rw_unlock(rwlock_t *rwlp) 9350Sstevel@tonic-gate { 936*4570Sraf volatile uint32_t *rwstate = (volatile uint32_t *)&rwlp->rwlock_readers; 937*4570Sraf uint32_t readers; 9380Sstevel@tonic-gate ulwp_t *self = curthread; 9390Sstevel@tonic-gate uberdata_t *udp = self->ul_uberdata; 9400Sstevel@tonic-gate tdb_rwlock_stats_t *rwsp; 941*4570Sraf queue_head_t *qp; 942*4570Sraf int rd_wr; 943*4570Sraf int waked = 0; 9440Sstevel@tonic-gate 945*4570Sraf readers = *rwstate; 946*4570Sraf ASSERT_CONSISTENT_STATE(readers); 947*4570Sraf if (readers & URW_WRITE_LOCKED) { 948*4570Sraf rd_wr = WRITE_LOCK; 949*4570Sraf readers = 0; 950*4570Sraf } else { 951*4570Sraf rd_wr = READ_LOCK; 952*4570Sraf readers &= URW_READERS_MASK; 9530Sstevel@tonic-gate } 9540Sstevel@tonic-gate 955*4570Sraf if (rd_wr == WRITE_LOCK) { 9560Sstevel@tonic-gate /* 9570Sstevel@tonic-gate * Since the writer lock is held, we'd better be 9580Sstevel@tonic-gate * holding it, else we cannot legitimately be here. 9590Sstevel@tonic-gate */ 9600Sstevel@tonic-gate if (!rw_write_is_held(rwlp)) { 9610Sstevel@tonic-gate if (self->ul_error_detection) 9620Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", 9630Sstevel@tonic-gate "writer lock held, " 9640Sstevel@tonic-gate "but not by the calling thread"); 9650Sstevel@tonic-gate return (EPERM); 9660Sstevel@tonic-gate } 9670Sstevel@tonic-gate if ((rwsp = RWLOCK_STATS(rwlp, udp)) != NULL) { 9680Sstevel@tonic-gate if (rwsp->rw_wrlock_begin_hold) 9690Sstevel@tonic-gate rwsp->rw_wrlock_hold_time += 9700Sstevel@tonic-gate gethrtime() - rwsp->rw_wrlock_begin_hold; 9710Sstevel@tonic-gate rwsp->rw_wrlock_begin_hold = 0; 9720Sstevel@tonic-gate } 973*4570Sraf rwlp->rwlock_owner = 0; 974*4570Sraf rwlp->rwlock_ownerpid = 0; 975*4570Sraf } else if (readers > 0) { 9760Sstevel@tonic-gate /* 9770Sstevel@tonic-gate * A readers lock is held; if we don't hold one, bail out. 9780Sstevel@tonic-gate */ 979*4570Sraf readlock_t *readlockp; 980*4570Sraf 981*4570Sraf sigoff(self); 982*4570Sraf readlockp = rwl_entry(rwlp); 9830Sstevel@tonic-gate if (readlockp->rd_count == 0) { 984*4570Sraf sigon(self); 9850Sstevel@tonic-gate if (self->ul_error_detection) 9860Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", 9870Sstevel@tonic-gate "readers lock held, " 9880Sstevel@tonic-gate "but not by the calling thread"); 9890Sstevel@tonic-gate return (EPERM); 9900Sstevel@tonic-gate } 9910Sstevel@tonic-gate /* 9920Sstevel@tonic-gate * If we hold more than one readers lock on this rwlock, 9930Sstevel@tonic-gate * just decrement our reference count and return. 9940Sstevel@tonic-gate */ 9950Sstevel@tonic-gate if (--readlockp->rd_count != 0) { 996*4570Sraf sigon(self); 997*4570Sraf goto out; 9980Sstevel@tonic-gate } 999*4570Sraf sigon(self); 10000Sstevel@tonic-gate } else { 10010Sstevel@tonic-gate /* 10020Sstevel@tonic-gate * This is a usage error. 10030Sstevel@tonic-gate * No thread should release an unowned lock. 10040Sstevel@tonic-gate */ 10050Sstevel@tonic-gate if (self->ul_error_detection) 10060Sstevel@tonic-gate rwlock_error(rwlp, "rwlock_unlock", "lock not owned"); 10070Sstevel@tonic-gate return (EPERM); 10080Sstevel@tonic-gate } 10090Sstevel@tonic-gate 1010*4570Sraf if (rd_wr == WRITE_LOCK && write_unlock_try(rwlp)) { 1011*4570Sraf /* EMPTY */; 1012*4570Sraf } else if (rd_wr == READ_LOCK && read_unlock_try(rwlp)) { 1013*4570Sraf /* EMPTY */; 1014*4570Sraf } else if (rwlp->rwlock_type == USYNC_PROCESS) { 1015*4570Sraf (void) _private_mutex_lock(&rwlp->mutex); 1016*4570Sraf (void) __lwp_rwlock_unlock(rwlp); 1017*4570Sraf (void) _private_mutex_unlock(&rwlp->mutex); 1018*4570Sraf waked = 1; 1019*4570Sraf } else { 10200Sstevel@tonic-gate qp = queue_lock(rwlp, MX); 1021*4570Sraf if (rd_wr == READ_LOCK) 1022*4570Sraf atomic_dec_32(rwstate); 1023*4570Sraf else 1024*4570Sraf atomic_and_32(rwstate, ~URW_WRITE_LOCKED); 10250Sstevel@tonic-gate waked = rw_queue_release(qp, rwlp); 10260Sstevel@tonic-gate } 10270Sstevel@tonic-gate 1028*4570Sraf out: 1029*4570Sraf DTRACE_PROBE2(plockstat, rw__release, rwlp, rd_wr); 1030*4570Sraf 10310Sstevel@tonic-gate /* 10320Sstevel@tonic-gate * Yield to the thread we just waked up, just in case we might 10330Sstevel@tonic-gate * be about to grab the rwlock again immediately upon return. 10340Sstevel@tonic-gate * This is pretty weak but it helps on a uniprocessor and also 10350Sstevel@tonic-gate * when cpu affinity has assigned both ourself and the other 10360Sstevel@tonic-gate * thread to the same CPU. Note that lwp_yield() will yield 10370Sstevel@tonic-gate * the processor only if the writer is at the same or higher 10380Sstevel@tonic-gate * priority than ourself. This provides more balanced program 10390Sstevel@tonic-gate * behavior; it doesn't guarantee acquisition of the lock by 10400Sstevel@tonic-gate * the pending writer. 10410Sstevel@tonic-gate */ 10420Sstevel@tonic-gate if (waked) 10430Sstevel@tonic-gate lwp_yield(); 10440Sstevel@tonic-gate return (0); 10450Sstevel@tonic-gate } 10460Sstevel@tonic-gate 10470Sstevel@tonic-gate void 10480Sstevel@tonic-gate lrw_unlock(rwlock_t *rwlp) 10490Sstevel@tonic-gate { 10500Sstevel@tonic-gate (void) __rw_unlock(rwlp); 10510Sstevel@tonic-gate exit_critical(curthread); 10520Sstevel@tonic-gate } 1053