1*0Sstevel@tonic-gate /* 2*0Sstevel@tonic-gate * CDDL HEADER START 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*0Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*0Sstevel@tonic-gate * with the License. 8*0Sstevel@tonic-gate * 9*0Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*0Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*0Sstevel@tonic-gate * See the License for the specific language governing permissions 12*0Sstevel@tonic-gate * and limitations under the License. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*0Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*0Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*0Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*0Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*0Sstevel@tonic-gate * 20*0Sstevel@tonic-gate * CDDL HEADER END 21*0Sstevel@tonic-gate */ 22*0Sstevel@tonic-gate /* 23*0Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*0Sstevel@tonic-gate * Use is subject to license terms. 25*0Sstevel@tonic-gate */ 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*0Sstevel@tonic-gate 29*0Sstevel@tonic-gate #include <sys/param.h> 30*0Sstevel@tonic-gate #include <sys/thread.h> 31*0Sstevel@tonic-gate #include <sys/cmn_err.h> 32*0Sstevel@tonic-gate #include <sys/debug.h> 33*0Sstevel@tonic-gate #include <sys/cpuvar.h> 34*0Sstevel@tonic-gate #include <sys/sobject.h> 35*0Sstevel@tonic-gate #include <sys/turnstile.h> 36*0Sstevel@tonic-gate #include <sys/rwlock.h> 37*0Sstevel@tonic-gate #include <sys/rwlock_impl.h> 38*0Sstevel@tonic-gate #include <sys/atomic.h> 39*0Sstevel@tonic-gate #include <sys/lockstat.h> 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate /* 42*0Sstevel@tonic-gate * Big Theory Statement for readers/writer locking primitives. 43*0Sstevel@tonic-gate * 44*0Sstevel@tonic-gate * An rwlock provides exclusive access to a single thread ("writer") or 45*0Sstevel@tonic-gate * concurrent access to multiple threads ("readers"). See rwlock(9F) 46*0Sstevel@tonic-gate * for a full description of the interfaces and programming model. 47*0Sstevel@tonic-gate * The rest of this comment describes the implementation. 48*0Sstevel@tonic-gate * 49*0Sstevel@tonic-gate * An rwlock is a single word with the following structure: 50*0Sstevel@tonic-gate * 51*0Sstevel@tonic-gate * --------------------------------------------------------------------- 52*0Sstevel@tonic-gate * | OWNER (writer) or HOLD COUNT (readers) | WRLOCK | WRWANT | WAIT | 53*0Sstevel@tonic-gate * --------------------------------------------------------------------- 54*0Sstevel@tonic-gate * 63 / 31 .. 3 2 1 0 55*0Sstevel@tonic-gate * 56*0Sstevel@tonic-gate * The waiters bit (0) indicates whether any threads are blocked waiting 57*0Sstevel@tonic-gate * for the lock. The write-wanted bit (1) indicates whether any threads 58*0Sstevel@tonic-gate * are blocked waiting for write access. The write-locked bit (2) indicates 59*0Sstevel@tonic-gate * whether the lock is held by a writer, which determines whether the upper 60*0Sstevel@tonic-gate * bits (3..31 in ILP32, 3..63 in LP64) should be interpreted as the owner 61*0Sstevel@tonic-gate * (thread pointer) or the hold count (number of readers). 62*0Sstevel@tonic-gate * 63*0Sstevel@tonic-gate * In the absence of any contention, a writer gets the lock by setting 64*0Sstevel@tonic-gate * this word to (curthread | RW_WRITE_LOCKED); a reader gets the lock 65*0Sstevel@tonic-gate * by incrementing the hold count (i.e. adding 8, aka RW_READ_LOCK). 66*0Sstevel@tonic-gate * 67*0Sstevel@tonic-gate * A writer will fail to acquire the lock if any other thread owns it. 68*0Sstevel@tonic-gate * A reader will fail if the lock is either owned or wanted by a writer. 69*0Sstevel@tonic-gate * rw_tryenter() returns 0 in these cases; rw_enter() blocks until the 70*0Sstevel@tonic-gate * lock becomes available. 71*0Sstevel@tonic-gate * 72*0Sstevel@tonic-gate * When a thread blocks it acquires the rwlock's hashed turnstile lock and 73*0Sstevel@tonic-gate * attempts to set RW_HAS_WAITERS (and RW_WRITE_WANTED in the writer case) 74*0Sstevel@tonic-gate * atomically *only if the lock still appears busy*. A thread must never 75*0Sstevel@tonic-gate * accidentally block for an available lock since there would be no owner 76*0Sstevel@tonic-gate * to awaken it. casip() provides the required atomicity. Once casip() 77*0Sstevel@tonic-gate * succeeds, the decision to block becomes final and irreversible. The 78*0Sstevel@tonic-gate * thread will not become runnable again until it has been granted ownership 79*0Sstevel@tonic-gate * of the lock via direct handoff from a former owner as described below. 80*0Sstevel@tonic-gate * 81*0Sstevel@tonic-gate * In the absence of any waiters, rw_exit() just clears the lock (if it 82*0Sstevel@tonic-gate * is write-locked) or decrements the hold count (if it is read-locked). 83*0Sstevel@tonic-gate * Note that even if waiters are present, decrementing the hold count 84*0Sstevel@tonic-gate * to a non-zero value requires no special action since the lock is still 85*0Sstevel@tonic-gate * held by at least one other thread. 86*0Sstevel@tonic-gate * 87*0Sstevel@tonic-gate * On the "final exit" (transition to unheld state) of a lock with waiters, 88*0Sstevel@tonic-gate * rw_exit_wakeup() grabs the turnstile lock and transfers ownership directly 89*0Sstevel@tonic-gate * to the next writer or set of readers. There are several advantages to this 90*0Sstevel@tonic-gate * approach: (1) it closes all windows for priority inversion (when a new 91*0Sstevel@tonic-gate * writer has grabbed the lock but has not yet inherited from blocked readers); 92*0Sstevel@tonic-gate * (2) it prevents starvation of equal-priority threads by granting the lock 93*0Sstevel@tonic-gate * in FIFO order; (3) it eliminates the need for a write-wanted count -- a 94*0Sstevel@tonic-gate * single bit suffices because the lock remains held until all waiting 95*0Sstevel@tonic-gate * writers are gone; (4) when we awaken N readers we can perform a single 96*0Sstevel@tonic-gate * "atomic_add(&x, N)" to set the total hold count rather than having all N 97*0Sstevel@tonic-gate * threads fight for the cache to perform an "atomic_add(&x, 1)" upon wakeup. 98*0Sstevel@tonic-gate * 99*0Sstevel@tonic-gate * The most interesting policy decision in rw_exit_wakeup() is which thread 100*0Sstevel@tonic-gate * to wake. Starvation is always possible with priority-based scheduling, 101*0Sstevel@tonic-gate * but any sane wakeup policy should at least satisfy these requirements: 102*0Sstevel@tonic-gate * 103*0Sstevel@tonic-gate * (1) The highest-priority thread in the system should not starve. 104*0Sstevel@tonic-gate * (2) The highest-priority writer should not starve. 105*0Sstevel@tonic-gate * (3) No writer should starve due to lower-priority threads. 106*0Sstevel@tonic-gate * (4) No reader should starve due to lower-priority writers. 107*0Sstevel@tonic-gate * (5) If all threads have equal priority, none of them should starve. 108*0Sstevel@tonic-gate * 109*0Sstevel@tonic-gate * We used to employ a writers-always-win policy, which doesn't even 110*0Sstevel@tonic-gate * satisfy (1): a steady stream of low-priority writers can starve out 111*0Sstevel@tonic-gate * a real-time reader! This is clearly a broken policy -- it violates 112*0Sstevel@tonic-gate * (1), (4), and (5) -- but it's how rwlocks always used to behave. 113*0Sstevel@tonic-gate * 114*0Sstevel@tonic-gate * A round-robin policy (exiting readers grant the lock to blocked writers 115*0Sstevel@tonic-gate * and vice versa) satisfies all but (3): a single high-priority writer 116*0Sstevel@tonic-gate * and many low-priority readers can starve out medium-priority writers. 117*0Sstevel@tonic-gate * 118*0Sstevel@tonic-gate * A strict priority policy (grant the lock to the highest priority blocked 119*0Sstevel@tonic-gate * thread) satisfies everything but (2): a steady stream of high-priority 120*0Sstevel@tonic-gate * readers can permanently starve the highest-priority writer. 121*0Sstevel@tonic-gate * 122*0Sstevel@tonic-gate * The reason we care about (2) is that it's important to process writers 123*0Sstevel@tonic-gate * reasonably quickly -- even if they're low priority -- because their very 124*0Sstevel@tonic-gate * presence causes all readers to take the slow (blocking) path through this 125*0Sstevel@tonic-gate * code. There is also a general sense that writers deserve some degree of 126*0Sstevel@tonic-gate * deference because they're updating the data upon which all readers act. 127*0Sstevel@tonic-gate * Presumably this data should not be allowed to become arbitrarily stale 128*0Sstevel@tonic-gate * due to writer starvation. Finally, it seems reasonable to level the 129*0Sstevel@tonic-gate * playing field a bit to compensate for the fact that it's so much harder 130*0Sstevel@tonic-gate * for a writer to get in when there are already many readers present. 131*0Sstevel@tonic-gate * 132*0Sstevel@tonic-gate * A hybrid of round-robin and strict priority can be made to satisfy 133*0Sstevel@tonic-gate * all five criteria. In this "writer priority policy" exiting readers 134*0Sstevel@tonic-gate * always grant the lock to waiting writers, but exiting writers only 135*0Sstevel@tonic-gate * grant the lock to readers of the same or higher priority than the 136*0Sstevel@tonic-gate * highest-priority blocked writer. Thus requirement (2) is satisfied, 137*0Sstevel@tonic-gate * necessarily, by a willful act of priority inversion: an exiting reader 138*0Sstevel@tonic-gate * will grant the lock to a blocked writer even if there are blocked 139*0Sstevel@tonic-gate * readers of higher priority. The situation is mitigated by the fact 140*0Sstevel@tonic-gate * that writers always inherit priority from blocked readers, and the 141*0Sstevel@tonic-gate * writer will awaken those readers as soon as it exits the lock. 142*0Sstevel@tonic-gate * 143*0Sstevel@tonic-gate * rw_downgrade() follows the same wakeup policy as an exiting writer. 144*0Sstevel@tonic-gate * 145*0Sstevel@tonic-gate * rw_tryupgrade() has the same failure mode as rw_tryenter() for a 146*0Sstevel@tonic-gate * write lock. Both honor the WRITE_WANTED bit by specification. 147*0Sstevel@tonic-gate * 148*0Sstevel@tonic-gate * The following rules apply to manipulation of rwlock internal state: 149*0Sstevel@tonic-gate * 150*0Sstevel@tonic-gate * (1) The rwlock is only modified via the atomic primitives casip() 151*0Sstevel@tonic-gate * and atomic_add_ip(). 152*0Sstevel@tonic-gate * 153*0Sstevel@tonic-gate * (2) The waiters bit and write-wanted bit are only modified under 154*0Sstevel@tonic-gate * turnstile_lookup(). This ensures that the turnstile is consistent 155*0Sstevel@tonic-gate * with the rwlock. 156*0Sstevel@tonic-gate * 157*0Sstevel@tonic-gate * (3) Waiters receive the lock by direct handoff from the previous 158*0Sstevel@tonic-gate * owner. Therefore, waiters *always* wake up holding the lock. 159*0Sstevel@tonic-gate */ 160*0Sstevel@tonic-gate 161*0Sstevel@tonic-gate /* 162*0Sstevel@tonic-gate * The sobj_ops vector exports a set of functions needed when a thread 163*0Sstevel@tonic-gate * is asleep on a synchronization object of a given type. 164*0Sstevel@tonic-gate */ 165*0Sstevel@tonic-gate static sobj_ops_t rw_sobj_ops = { 166*0Sstevel@tonic-gate SOBJ_RWLOCK, rw_owner, turnstile_stay_asleep, turnstile_change_pri 167*0Sstevel@tonic-gate }; 168*0Sstevel@tonic-gate 169*0Sstevel@tonic-gate /* 170*0Sstevel@tonic-gate * If the system panics on an rwlock, save the address of the offending 171*0Sstevel@tonic-gate * rwlock in panic_rwlock_addr, and save the contents in panic_rwlock. 172*0Sstevel@tonic-gate */ 173*0Sstevel@tonic-gate static rwlock_impl_t panic_rwlock; 174*0Sstevel@tonic-gate static rwlock_impl_t *panic_rwlock_addr; 175*0Sstevel@tonic-gate 176*0Sstevel@tonic-gate static void 177*0Sstevel@tonic-gate rw_panic(char *msg, rwlock_impl_t *lp) 178*0Sstevel@tonic-gate { 179*0Sstevel@tonic-gate if (panicstr) 180*0Sstevel@tonic-gate return; 181*0Sstevel@tonic-gate 182*0Sstevel@tonic-gate if (casptr(&panic_rwlock_addr, NULL, lp) == NULL) 183*0Sstevel@tonic-gate panic_rwlock = *lp; 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate panic("%s, lp=%p wwwh=%lx thread=%p", 186*0Sstevel@tonic-gate msg, lp, panic_rwlock.rw_wwwh, curthread); 187*0Sstevel@tonic-gate } 188*0Sstevel@tonic-gate 189*0Sstevel@tonic-gate /* ARGSUSED */ 190*0Sstevel@tonic-gate void 191*0Sstevel@tonic-gate rw_init(krwlock_t *rwlp, char *name, krw_type_t type, void *arg) 192*0Sstevel@tonic-gate { 193*0Sstevel@tonic-gate ((rwlock_impl_t *)rwlp)->rw_wwwh = 0; 194*0Sstevel@tonic-gate } 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gate void 197*0Sstevel@tonic-gate rw_destroy(krwlock_t *rwlp) 198*0Sstevel@tonic-gate { 199*0Sstevel@tonic-gate rwlock_impl_t *lp = (rwlock_impl_t *)rwlp; 200*0Sstevel@tonic-gate 201*0Sstevel@tonic-gate if (lp->rw_wwwh != 0) { 202*0Sstevel@tonic-gate if ((lp->rw_wwwh & RW_DOUBLE_LOCK) == RW_DOUBLE_LOCK) 203*0Sstevel@tonic-gate rw_panic("rw_destroy: lock already destroyed", lp); 204*0Sstevel@tonic-gate else 205*0Sstevel@tonic-gate rw_panic("rw_destroy: lock still active", lp); 206*0Sstevel@tonic-gate } 207*0Sstevel@tonic-gate 208*0Sstevel@tonic-gate lp->rw_wwwh = RW_DOUBLE_LOCK; 209*0Sstevel@tonic-gate } 210*0Sstevel@tonic-gate 211*0Sstevel@tonic-gate /* 212*0Sstevel@tonic-gate * Verify that an rwlock is held correctly. 213*0Sstevel@tonic-gate */ 214*0Sstevel@tonic-gate static int 215*0Sstevel@tonic-gate rw_locked(rwlock_impl_t *lp, krw_t rw) 216*0Sstevel@tonic-gate { 217*0Sstevel@tonic-gate uintptr_t old = lp->rw_wwwh; 218*0Sstevel@tonic-gate 219*0Sstevel@tonic-gate if (rw == RW_READER) 220*0Sstevel@tonic-gate return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED)); 221*0Sstevel@tonic-gate 222*0Sstevel@tonic-gate if (rw == RW_WRITER) 223*0Sstevel@tonic-gate return ((old & RW_OWNER) == (uintptr_t)curthread); 224*0Sstevel@tonic-gate 225*0Sstevel@tonic-gate return (0); 226*0Sstevel@tonic-gate } 227*0Sstevel@tonic-gate 228*0Sstevel@tonic-gate /* 229*0Sstevel@tonic-gate * Full-service implementation of rw_enter() to handle all the hard cases. 230*0Sstevel@tonic-gate * Called from the assembly version if anything complicated is going on. 231*0Sstevel@tonic-gate * The only semantic difference between calling rw_enter() and calling 232*0Sstevel@tonic-gate * rw_enter_sleep() directly is that we assume the caller has already done 233*0Sstevel@tonic-gate * a THREAD_KPRI_REQUEST() in the RW_READER case. 234*0Sstevel@tonic-gate */ 235*0Sstevel@tonic-gate void 236*0Sstevel@tonic-gate rw_enter_sleep(rwlock_impl_t *lp, krw_t rw) 237*0Sstevel@tonic-gate { 238*0Sstevel@tonic-gate uintptr_t old, new, lock_value, lock_busy, lock_wait; 239*0Sstevel@tonic-gate hrtime_t sleep_time; 240*0Sstevel@tonic-gate turnstile_t *ts; 241*0Sstevel@tonic-gate 242*0Sstevel@tonic-gate if (rw == RW_READER) { 243*0Sstevel@tonic-gate lock_value = RW_READ_LOCK; 244*0Sstevel@tonic-gate lock_busy = RW_WRITE_CLAIMED; 245*0Sstevel@tonic-gate lock_wait = RW_HAS_WAITERS; 246*0Sstevel@tonic-gate } else { 247*0Sstevel@tonic-gate lock_value = RW_WRITE_LOCK(curthread); 248*0Sstevel@tonic-gate lock_busy = (uintptr_t)RW_LOCKED; 249*0Sstevel@tonic-gate lock_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 250*0Sstevel@tonic-gate } 251*0Sstevel@tonic-gate 252*0Sstevel@tonic-gate for (;;) { 253*0Sstevel@tonic-gate if (((old = lp->rw_wwwh) & lock_busy) == 0) { 254*0Sstevel@tonic-gate if (casip(&lp->rw_wwwh, old, old + lock_value) != old) 255*0Sstevel@tonic-gate continue; 256*0Sstevel@tonic-gate break; 257*0Sstevel@tonic-gate } 258*0Sstevel@tonic-gate 259*0Sstevel@tonic-gate if (panicstr) 260*0Sstevel@tonic-gate return; 261*0Sstevel@tonic-gate 262*0Sstevel@tonic-gate if ((old & RW_DOUBLE_LOCK) == RW_DOUBLE_LOCK) { 263*0Sstevel@tonic-gate rw_panic("rw_enter: bad rwlock", lp); 264*0Sstevel@tonic-gate return; 265*0Sstevel@tonic-gate } 266*0Sstevel@tonic-gate 267*0Sstevel@tonic-gate if ((old & RW_OWNER) == (uintptr_t)curthread) { 268*0Sstevel@tonic-gate rw_panic("recursive rw_enter", lp); 269*0Sstevel@tonic-gate return; 270*0Sstevel@tonic-gate } 271*0Sstevel@tonic-gate 272*0Sstevel@tonic-gate ts = turnstile_lookup(lp); 273*0Sstevel@tonic-gate 274*0Sstevel@tonic-gate do { 275*0Sstevel@tonic-gate if (((old = lp->rw_wwwh) & lock_busy) == 0) 276*0Sstevel@tonic-gate break; 277*0Sstevel@tonic-gate new = old | lock_wait; 278*0Sstevel@tonic-gate } while (old != new && casip(&lp->rw_wwwh, old, new) != old); 279*0Sstevel@tonic-gate 280*0Sstevel@tonic-gate if ((old & lock_busy) == 0) { 281*0Sstevel@tonic-gate /* 282*0Sstevel@tonic-gate * The lock appears free now; try the dance again 283*0Sstevel@tonic-gate */ 284*0Sstevel@tonic-gate turnstile_exit(lp); 285*0Sstevel@tonic-gate continue; 286*0Sstevel@tonic-gate } 287*0Sstevel@tonic-gate 288*0Sstevel@tonic-gate /* 289*0Sstevel@tonic-gate * We really are going to block. Bump the stats, and drop 290*0Sstevel@tonic-gate * kpri if we're a reader. 291*0Sstevel@tonic-gate */ 292*0Sstevel@tonic-gate ASSERT(lp->rw_wwwh & lock_wait); 293*0Sstevel@tonic-gate ASSERT(lp->rw_wwwh & RW_LOCKED); 294*0Sstevel@tonic-gate 295*0Sstevel@tonic-gate sleep_time = -gethrtime(); 296*0Sstevel@tonic-gate if (rw == RW_READER) { 297*0Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 298*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, rw_rdfails, 1); 299*0Sstevel@tonic-gate (void) turnstile_block(ts, TS_READER_Q, lp, 300*0Sstevel@tonic-gate &rw_sobj_ops, NULL, NULL); 301*0Sstevel@tonic-gate } else { 302*0Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, rw_wrfails, 1); 303*0Sstevel@tonic-gate (void) turnstile_block(ts, TS_WRITER_Q, lp, 304*0Sstevel@tonic-gate &rw_sobj_ops, NULL, NULL); 305*0Sstevel@tonic-gate } 306*0Sstevel@tonic-gate sleep_time += gethrtime(); 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate LOCKSTAT_RECORD4(LS_RW_ENTER_BLOCK, lp, sleep_time, rw, 309*0Sstevel@tonic-gate (old & RW_WRITE_LOCKED) ? 1 : 0, 310*0Sstevel@tonic-gate old >> RW_HOLD_COUNT_SHIFT); 311*0Sstevel@tonic-gate 312*0Sstevel@tonic-gate /* 313*0Sstevel@tonic-gate * We wake up holding the lock (and having kpri if we're 314*0Sstevel@tonic-gate * a reader) via direct handoff from the previous owner. 315*0Sstevel@tonic-gate */ 316*0Sstevel@tonic-gate break; 317*0Sstevel@tonic-gate } 318*0Sstevel@tonic-gate 319*0Sstevel@tonic-gate ASSERT(rw_locked(lp, rw)); 320*0Sstevel@tonic-gate 321*0Sstevel@tonic-gate membar_enter(); 322*0Sstevel@tonic-gate 323*0Sstevel@tonic-gate LOCKSTAT_RECORD(LS_RW_ENTER_ACQUIRE, lp, rw); 324*0Sstevel@tonic-gate } 325*0Sstevel@tonic-gate 326*0Sstevel@tonic-gate /* 327*0Sstevel@tonic-gate * Return the number of readers to wake, or zero if we should wake a writer. 328*0Sstevel@tonic-gate * Called only by exiting/downgrading writers (readers don't wake readers). 329*0Sstevel@tonic-gate */ 330*0Sstevel@tonic-gate static int 331*0Sstevel@tonic-gate rw_readers_to_wake(turnstile_t *ts) 332*0Sstevel@tonic-gate { 333*0Sstevel@tonic-gate kthread_t *next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first; 334*0Sstevel@tonic-gate kthread_t *next_reader = ts->ts_sleepq[TS_READER_Q].sq_first; 335*0Sstevel@tonic-gate pri_t wpri = (next_writer != NULL) ? DISP_PRIO(next_writer) : -1; 336*0Sstevel@tonic-gate int count = 0; 337*0Sstevel@tonic-gate 338*0Sstevel@tonic-gate while (next_reader != NULL) { 339*0Sstevel@tonic-gate if (DISP_PRIO(next_reader) < wpri) 340*0Sstevel@tonic-gate break; 341*0Sstevel@tonic-gate next_reader->t_kpri_req++; 342*0Sstevel@tonic-gate next_reader = next_reader->t_link; 343*0Sstevel@tonic-gate count++; 344*0Sstevel@tonic-gate } 345*0Sstevel@tonic-gate return (count); 346*0Sstevel@tonic-gate } 347*0Sstevel@tonic-gate 348*0Sstevel@tonic-gate /* 349*0Sstevel@tonic-gate * Full-service implementation of rw_exit() to handle all the hard cases. 350*0Sstevel@tonic-gate * Called from the assembly version if anything complicated is going on. 351*0Sstevel@tonic-gate * There is no semantic difference between calling rw_exit() and calling 352*0Sstevel@tonic-gate * rw_exit_wakeup() directly. 353*0Sstevel@tonic-gate */ 354*0Sstevel@tonic-gate void 355*0Sstevel@tonic-gate rw_exit_wakeup(rwlock_impl_t *lp) 356*0Sstevel@tonic-gate { 357*0Sstevel@tonic-gate turnstile_t *ts; 358*0Sstevel@tonic-gate uintptr_t old, new, lock_value; 359*0Sstevel@tonic-gate kthread_t *next_writer; 360*0Sstevel@tonic-gate int nreaders; 361*0Sstevel@tonic-gate 362*0Sstevel@tonic-gate membar_exit(); 363*0Sstevel@tonic-gate 364*0Sstevel@tonic-gate old = lp->rw_wwwh; 365*0Sstevel@tonic-gate if (old & RW_WRITE_LOCKED) { 366*0Sstevel@tonic-gate if ((old & RW_OWNER) != (uintptr_t)curthread) { 367*0Sstevel@tonic-gate rw_panic("rw_exit: not owner", lp); 368*0Sstevel@tonic-gate lp->rw_wwwh = 0; 369*0Sstevel@tonic-gate return; 370*0Sstevel@tonic-gate } 371*0Sstevel@tonic-gate lock_value = RW_WRITE_LOCK(curthread); 372*0Sstevel@tonic-gate } else { 373*0Sstevel@tonic-gate if ((old & RW_LOCKED) == 0) { 374*0Sstevel@tonic-gate rw_panic("rw_exit: lock not held", lp); 375*0Sstevel@tonic-gate return; 376*0Sstevel@tonic-gate } 377*0Sstevel@tonic-gate lock_value = RW_READ_LOCK; 378*0Sstevel@tonic-gate } 379*0Sstevel@tonic-gate 380*0Sstevel@tonic-gate for (;;) { 381*0Sstevel@tonic-gate /* 382*0Sstevel@tonic-gate * If this is *not* the final exit of a lock with waiters, 383*0Sstevel@tonic-gate * just drop the lock -- there's nothing tricky going on. 384*0Sstevel@tonic-gate */ 385*0Sstevel@tonic-gate old = lp->rw_wwwh; 386*0Sstevel@tonic-gate new = old - lock_value; 387*0Sstevel@tonic-gate if ((new & (RW_LOCKED | RW_HAS_WAITERS)) != RW_HAS_WAITERS) { 388*0Sstevel@tonic-gate if (casip(&lp->rw_wwwh, old, new) != old) 389*0Sstevel@tonic-gate continue; 390*0Sstevel@tonic-gate break; 391*0Sstevel@tonic-gate } 392*0Sstevel@tonic-gate 393*0Sstevel@tonic-gate /* 394*0Sstevel@tonic-gate * Perform the final exit of a lock that has waiters. 395*0Sstevel@tonic-gate */ 396*0Sstevel@tonic-gate ts = turnstile_lookup(lp); 397*0Sstevel@tonic-gate 398*0Sstevel@tonic-gate next_writer = ts->ts_sleepq[TS_WRITER_Q].sq_first; 399*0Sstevel@tonic-gate 400*0Sstevel@tonic-gate if ((old & RW_WRITE_LOCKED) && 401*0Sstevel@tonic-gate (nreaders = rw_readers_to_wake(ts)) > 0) { 402*0Sstevel@tonic-gate /* 403*0Sstevel@tonic-gate * Don't drop the lock -- just set the hold count 404*0Sstevel@tonic-gate * such that we grant the lock to all readers at once. 405*0Sstevel@tonic-gate */ 406*0Sstevel@tonic-gate new = nreaders * RW_READ_LOCK; 407*0Sstevel@tonic-gate if (ts->ts_waiters > nreaders) 408*0Sstevel@tonic-gate new |= RW_HAS_WAITERS; 409*0Sstevel@tonic-gate if (next_writer) 410*0Sstevel@tonic-gate new |= RW_WRITE_WANTED; 411*0Sstevel@tonic-gate lp->rw_wwwh = new; 412*0Sstevel@tonic-gate membar_enter(); 413*0Sstevel@tonic-gate turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL); 414*0Sstevel@tonic-gate } else { 415*0Sstevel@tonic-gate /* 416*0Sstevel@tonic-gate * Don't drop the lock -- just transfer ownership 417*0Sstevel@tonic-gate * directly to next_writer. Note that there must 418*0Sstevel@tonic-gate * be at least one waiting writer, because we get 419*0Sstevel@tonic-gate * here only if (A) the lock is read-locked or 420*0Sstevel@tonic-gate * (B) there are no waiting readers. In case (A), 421*0Sstevel@tonic-gate * since the lock is read-locked there would be no 422*0Sstevel@tonic-gate * reason for other readers to have blocked unless 423*0Sstevel@tonic-gate * the RW_WRITE_WANTED bit was set. In case (B), 424*0Sstevel@tonic-gate * since there are waiters but no waiting readers, 425*0Sstevel@tonic-gate * they must all be waiting writers. 426*0Sstevel@tonic-gate */ 427*0Sstevel@tonic-gate ASSERT(lp->rw_wwwh & RW_WRITE_WANTED); 428*0Sstevel@tonic-gate new = RW_WRITE_LOCK(next_writer); 429*0Sstevel@tonic-gate if (ts->ts_waiters > 1) 430*0Sstevel@tonic-gate new |= RW_HAS_WAITERS; 431*0Sstevel@tonic-gate if (next_writer->t_link) 432*0Sstevel@tonic-gate new |= RW_WRITE_WANTED; 433*0Sstevel@tonic-gate lp->rw_wwwh = new; 434*0Sstevel@tonic-gate membar_enter(); 435*0Sstevel@tonic-gate turnstile_wakeup(ts, TS_WRITER_Q, 1, next_writer); 436*0Sstevel@tonic-gate } 437*0Sstevel@tonic-gate break; 438*0Sstevel@tonic-gate } 439*0Sstevel@tonic-gate 440*0Sstevel@tonic-gate if (lock_value == RW_READ_LOCK) { 441*0Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 442*0Sstevel@tonic-gate LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_READER); 443*0Sstevel@tonic-gate } else { 444*0Sstevel@tonic-gate LOCKSTAT_RECORD(LS_RW_EXIT_RELEASE, lp, RW_WRITER); 445*0Sstevel@tonic-gate } 446*0Sstevel@tonic-gate } 447*0Sstevel@tonic-gate 448*0Sstevel@tonic-gate int 449*0Sstevel@tonic-gate rw_tryenter(krwlock_t *rwlp, krw_t rw) 450*0Sstevel@tonic-gate { 451*0Sstevel@tonic-gate rwlock_impl_t *lp = (rwlock_impl_t *)rwlp; 452*0Sstevel@tonic-gate uintptr_t old; 453*0Sstevel@tonic-gate 454*0Sstevel@tonic-gate if (rw == RW_READER) { 455*0Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 456*0Sstevel@tonic-gate do { 457*0Sstevel@tonic-gate if ((old = lp->rw_wwwh) & RW_WRITE_CLAIMED) { 458*0Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 459*0Sstevel@tonic-gate return (0); 460*0Sstevel@tonic-gate } 461*0Sstevel@tonic-gate } while (casip(&lp->rw_wwwh, old, old + RW_READ_LOCK) != old); 462*0Sstevel@tonic-gate LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw); 463*0Sstevel@tonic-gate } else { 464*0Sstevel@tonic-gate if (casip(&lp->rw_wwwh, 0, RW_WRITE_LOCK(curthread)) != 0) 465*0Sstevel@tonic-gate return (0); 466*0Sstevel@tonic-gate LOCKSTAT_RECORD(LS_RW_TRYENTER_ACQUIRE, lp, rw); 467*0Sstevel@tonic-gate } 468*0Sstevel@tonic-gate ASSERT(rw_locked(lp, rw)); 469*0Sstevel@tonic-gate membar_enter(); 470*0Sstevel@tonic-gate return (1); 471*0Sstevel@tonic-gate } 472*0Sstevel@tonic-gate 473*0Sstevel@tonic-gate void 474*0Sstevel@tonic-gate rw_downgrade(krwlock_t *rwlp) 475*0Sstevel@tonic-gate { 476*0Sstevel@tonic-gate rwlock_impl_t *lp = (rwlock_impl_t *)rwlp; 477*0Sstevel@tonic-gate 478*0Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 479*0Sstevel@tonic-gate membar_exit(); 480*0Sstevel@tonic-gate 481*0Sstevel@tonic-gate if ((lp->rw_wwwh & RW_OWNER) != (uintptr_t)curthread) { 482*0Sstevel@tonic-gate rw_panic("rw_downgrade: not owner", lp); 483*0Sstevel@tonic-gate return; 484*0Sstevel@tonic-gate } 485*0Sstevel@tonic-gate 486*0Sstevel@tonic-gate if (atomic_add_ip_nv(&lp->rw_wwwh, 487*0Sstevel@tonic-gate RW_READ_LOCK - RW_WRITE_LOCK(curthread)) & RW_HAS_WAITERS) { 488*0Sstevel@tonic-gate turnstile_t *ts = turnstile_lookup(lp); 489*0Sstevel@tonic-gate int nreaders = rw_readers_to_wake(ts); 490*0Sstevel@tonic-gate if (nreaders > 0) { 491*0Sstevel@tonic-gate uintptr_t delta = nreaders * RW_READ_LOCK; 492*0Sstevel@tonic-gate if (ts->ts_waiters == nreaders) 493*0Sstevel@tonic-gate delta -= RW_HAS_WAITERS; 494*0Sstevel@tonic-gate atomic_add_ip(&lp->rw_wwwh, delta); 495*0Sstevel@tonic-gate } 496*0Sstevel@tonic-gate turnstile_wakeup(ts, TS_READER_Q, nreaders, NULL); 497*0Sstevel@tonic-gate } 498*0Sstevel@tonic-gate ASSERT(rw_locked(lp, RW_READER)); 499*0Sstevel@tonic-gate LOCKSTAT_RECORD0(LS_RW_DOWNGRADE_DOWNGRADE, lp); 500*0Sstevel@tonic-gate } 501*0Sstevel@tonic-gate 502*0Sstevel@tonic-gate int 503*0Sstevel@tonic-gate rw_tryupgrade(krwlock_t *rwlp) 504*0Sstevel@tonic-gate { 505*0Sstevel@tonic-gate rwlock_impl_t *lp = (rwlock_impl_t *)rwlp; 506*0Sstevel@tonic-gate uintptr_t old, new; 507*0Sstevel@tonic-gate 508*0Sstevel@tonic-gate ASSERT(rw_locked(lp, RW_READER)); 509*0Sstevel@tonic-gate 510*0Sstevel@tonic-gate do { 511*0Sstevel@tonic-gate if (((old = lp->rw_wwwh) & ~RW_HAS_WAITERS) != RW_READ_LOCK) 512*0Sstevel@tonic-gate return (0); 513*0Sstevel@tonic-gate new = old + RW_WRITE_LOCK(curthread) - RW_READ_LOCK; 514*0Sstevel@tonic-gate } while (casip(&lp->rw_wwwh, old, new) != old); 515*0Sstevel@tonic-gate 516*0Sstevel@tonic-gate membar_enter(); 517*0Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 518*0Sstevel@tonic-gate LOCKSTAT_RECORD0(LS_RW_TRYUPGRADE_UPGRADE, lp); 519*0Sstevel@tonic-gate ASSERT(rw_locked(lp, RW_WRITER)); 520*0Sstevel@tonic-gate return (1); 521*0Sstevel@tonic-gate } 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate int 524*0Sstevel@tonic-gate rw_read_held(krwlock_t *rwlp) 525*0Sstevel@tonic-gate { 526*0Sstevel@tonic-gate uintptr_t tmp; 527*0Sstevel@tonic-gate 528*0Sstevel@tonic-gate return (_RW_READ_HELD(rwlp, tmp)); 529*0Sstevel@tonic-gate } 530*0Sstevel@tonic-gate 531*0Sstevel@tonic-gate int 532*0Sstevel@tonic-gate rw_write_held(krwlock_t *rwlp) 533*0Sstevel@tonic-gate { 534*0Sstevel@tonic-gate return (_RW_WRITE_HELD(rwlp)); 535*0Sstevel@tonic-gate } 536*0Sstevel@tonic-gate 537*0Sstevel@tonic-gate int 538*0Sstevel@tonic-gate rw_lock_held(krwlock_t *rwlp) 539*0Sstevel@tonic-gate { 540*0Sstevel@tonic-gate return (_RW_LOCK_HELD(rwlp)); 541*0Sstevel@tonic-gate } 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate /* 544*0Sstevel@tonic-gate * Like rw_read_held(), but ASSERTs that the lock is currently held 545*0Sstevel@tonic-gate */ 546*0Sstevel@tonic-gate int 547*0Sstevel@tonic-gate rw_read_locked(krwlock_t *rwlp) 548*0Sstevel@tonic-gate { 549*0Sstevel@tonic-gate uintptr_t old = ((rwlock_impl_t *)rwlp)->rw_wwwh; 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate ASSERT(old & RW_LOCKED); 552*0Sstevel@tonic-gate return ((old & RW_LOCKED) && !(old & RW_WRITE_LOCKED)); 553*0Sstevel@tonic-gate } 554*0Sstevel@tonic-gate 555*0Sstevel@tonic-gate /* 556*0Sstevel@tonic-gate * Returns non-zero if the lock is either held or desired by a writer 557*0Sstevel@tonic-gate */ 558*0Sstevel@tonic-gate int 559*0Sstevel@tonic-gate rw_iswriter(krwlock_t *rwlp) 560*0Sstevel@tonic-gate { 561*0Sstevel@tonic-gate return (_RW_ISWRITER(rwlp)); 562*0Sstevel@tonic-gate } 563*0Sstevel@tonic-gate 564*0Sstevel@tonic-gate kthread_t * 565*0Sstevel@tonic-gate rw_owner(krwlock_t *rwlp) 566*0Sstevel@tonic-gate { 567*0Sstevel@tonic-gate uintptr_t old = ((rwlock_impl_t *)rwlp)->rw_wwwh; 568*0Sstevel@tonic-gate 569*0Sstevel@tonic-gate return ((old & RW_WRITE_LOCKED) ? (kthread_t *)(old & RW_OWNER) : NULL); 570*0Sstevel@tonic-gate } 571