1 /* $NetBSD: kern_turnstile.c,v 1.25 2009/09/13 14:38:20 bouyer Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Turnstiles are described in detail in: 34 * 35 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 36 * Richard McDougall. 37 * 38 * Turnstiles are kept in a hash table. There are likely to be many more 39 * synchronisation objects than there are threads. Since a thread can block 40 * on only one lock at a time, we only need one turnstile per thread, and 41 * so they are allocated at thread creation time. 42 * 43 * When a thread decides it needs to block on a lock, it looks up the 44 * active turnstile for that lock. If no active turnstile exists, then 45 * the process lends its turnstile to the lock. If there is already an 46 * active turnstile for the lock, the thread places its turnstile on a 47 * list of free turnstiles, and references the active one instead. 48 * 49 * The act of looking up the turnstile acquires an interlock on the sleep 50 * queue. If a thread decides it doesn't need to block after all, then this 51 * interlock must be released by explicitly aborting the turnstile 52 * operation. 53 * 54 * When a thread is awakened, it needs to get its turnstile back. If there 55 * are still other threads waiting in the active turnstile, the thread 56 * grabs a free turnstile off the free list. Otherwise, it can take back 57 * the active turnstile from the lock (thus deactivating the turnstile). 58 * 59 * Turnstiles are the place to do priority inheritence. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.25 2009/09/13 14:38:20 bouyer Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/lockdebug.h> 67 #include <sys/pool.h> 68 #include <sys/proc.h> 69 #include <sys/sleepq.h> 70 #include <sys/systm.h> 71 72 #include <uvm/uvm_extern.h> 73 74 #define TS_HASH_SIZE 64 75 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 76 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 77 78 tschain_t turnstile_tab[TS_HASH_SIZE]; 79 pool_cache_t turnstile_cache; 80 81 int turnstile_ctor(void *, void *, int); 82 83 extern turnstile_t turnstile0; 84 85 /* 86 * turnstile_init: 87 * 88 * Initialize the turnstile mechanism. 89 */ 90 void 91 turnstile_init(void) 92 { 93 tschain_t *tc; 94 int i; 95 96 for (i = 0; i < TS_HASH_SIZE; i++) { 97 tc = &turnstile_tab[i]; 98 LIST_INIT(&tc->tc_chain); 99 tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 100 } 101 102 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 103 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 104 KASSERT(turnstile_cache != NULL); 105 106 (void)turnstile_ctor(NULL, &turnstile0, 0); 107 } 108 109 /* 110 * turnstile_ctor: 111 * 112 * Constructor for turnstiles. 113 */ 114 int 115 turnstile_ctor(void *arg, void *obj, int flags) 116 { 117 turnstile_t *ts = obj; 118 119 memset(ts, 0, sizeof(*ts)); 120 sleepq_init(&ts->ts_sleepq[TS_READER_Q]); 121 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]); 122 return (0); 123 } 124 125 /* 126 * turnstile_remove: 127 * 128 * Remove an LWP from a turnstile sleep queue and wake it. 129 */ 130 static inline void 131 turnstile_remove(turnstile_t *ts, lwp_t *l, int q) 132 { 133 turnstile_t *nts; 134 135 KASSERT(l->l_ts == ts); 136 137 /* 138 * This process is no longer using the active turnstile. 139 * Find an inactive one on the free list to give to it. 140 */ 141 if ((nts = ts->ts_free) != NULL) { 142 KASSERT(TS_ALL_WAITERS(ts) > 1); 143 l->l_ts = nts; 144 ts->ts_free = nts->ts_free; 145 nts->ts_free = NULL; 146 } else { 147 /* 148 * If the free list is empty, this is the last 149 * waiter. 150 */ 151 KASSERT(TS_ALL_WAITERS(ts) == 1); 152 LIST_REMOVE(ts, ts_chain); 153 } 154 155 ts->ts_waiters[q]--; 156 (void)sleepq_remove(&ts->ts_sleepq[q], l); 157 } 158 159 /* 160 * turnstile_lookup: 161 * 162 * Look up the turnstile for the specified lock. This acquires and 163 * holds the turnstile chain lock (sleep queue interlock). 164 */ 165 turnstile_t * 166 turnstile_lookup(wchan_t obj) 167 { 168 turnstile_t *ts; 169 tschain_t *tc; 170 171 tc = &turnstile_tab[TS_HASH(obj)]; 172 mutex_spin_enter(tc->tc_mutex); 173 174 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 175 if (ts->ts_obj == obj) 176 return (ts); 177 178 /* 179 * No turnstile yet for this lock. No problem, turnstile_block() 180 * handles this by fetching the turnstile from the blocking thread. 181 */ 182 return (NULL); 183 } 184 185 /* 186 * turnstile_exit: 187 * 188 * Abort a turnstile operation. 189 */ 190 void 191 turnstile_exit(wchan_t obj) 192 { 193 tschain_t *tc; 194 195 tc = &turnstile_tab[TS_HASH(obj)]; 196 mutex_spin_exit(tc->tc_mutex); 197 } 198 199 /* 200 * turnstile_block: 201 * 202 * Enter an object into the turnstile chain and prepare the current 203 * LWP for sleep. 204 */ 205 void 206 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 207 { 208 lwp_t *l; 209 lwp_t *cur; /* cached curlwp */ 210 lwp_t *owner; 211 turnstile_t *ots; 212 tschain_t *tc; 213 sleepq_t *sq; 214 pri_t prio, obase; 215 216 tc = &turnstile_tab[TS_HASH(obj)]; 217 l = cur = curlwp; 218 219 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 220 KASSERT(mutex_owned(tc->tc_mutex)); 221 KASSERT(l != NULL && l->l_ts != NULL); 222 223 if (ts == NULL) { 224 /* 225 * We are the first thread to wait for this object; 226 * lend our turnstile to it. 227 */ 228 ts = l->l_ts; 229 KASSERT(TS_ALL_WAITERS(ts) == 0); 230 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && 231 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 232 ts->ts_obj = obj; 233 ts->ts_inheritor = NULL; 234 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 235 } else { 236 /* 237 * Object already has a turnstile. Put our turnstile 238 * onto the free list, and reference the existing 239 * turnstile instead. 240 */ 241 ots = l->l_ts; 242 KASSERT(ots->ts_free == NULL); 243 ots->ts_free = ts->ts_free; 244 ts->ts_free = ots; 245 l->l_ts = ts; 246 247 KASSERT(ts->ts_obj == obj); 248 KASSERT(TS_ALL_WAITERS(ts) != 0); 249 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || 250 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 251 } 252 253 sq = &ts->ts_sleepq[q]; 254 ts->ts_waiters[q]++; 255 sleepq_enter(sq, l, tc->tc_mutex); 256 /* now tc->tc_mutex is also cur->l_mutex and l->l_mutex */ 257 LOCKDEBUG_BARRIER(tc->tc_mutex, 1); 258 l->l_kpriority = true; 259 obase = l->l_kpribase; 260 if (obase < PRI_KTHREAD) 261 l->l_kpribase = PRI_KTHREAD; 262 sleepq_enqueue(sq, obj, "tstile", sobj); 263 264 /* 265 * Disable preemption across this entire block, as we may drop 266 * scheduler locks (allowing preemption), and would prefer not 267 * to be interrupted while in a state of flux. 268 */ 269 KPREEMPT_DISABLE(l); 270 271 /* 272 * Lend our priority to lwps on the blocking chain. 273 * 274 * NOTE: if you get a panic in this code block, it is likely that 275 * a lock has been destroyed or corrupted while still in use. Try 276 * compiling a kernel with LOCKDEBUG to pinpoint the problem. 277 */ 278 prio = lwp_eprio(l); 279 280 for (;;) { 281 bool dolock; 282 283 if (l->l_wchan == NULL) 284 break; 285 286 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 287 if (owner == NULL) 288 break; 289 290 /* The owner may have changed as we have dropped the tc lock */ 291 if (cur == owner) { 292 /* 293 * we own the lock: stop here, sleepq_block() 294 * should wake up immediatly 295 */ 296 break; 297 } 298 299 if (l == owner) { 300 /* owner has changed, restart from curlwp */ 301 lwp_unlock(l); 302 l = cur; 303 lwp_lock(l); 304 prio = lwp_eprio(l); 305 continue; 306 } 307 308 if (l->l_mutex != owner->l_mutex) 309 dolock = true; 310 else 311 dolock = false; 312 if (dolock && !lwp_trylock(owner)) { 313 /* 314 * restart from curlwp. 315 * Note that there may be a livelock here: 316 * the owner may try grabing cur's lock (which is 317 * the tc lock) while we're trying to grab 318 * the owner's lock. 319 */ 320 lwp_unlock(l); 321 l = cur; 322 lwp_lock(l); 323 prio = lwp_eprio(l); 324 continue; 325 } 326 if (prio <= lwp_eprio(owner)) { 327 if (dolock) 328 lwp_unlock(owner); 329 break; 330 } 331 ts = l->l_ts; 332 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 333 if (ts->ts_inheritor == NULL) { 334 ts->ts_inheritor = owner; 335 ts->ts_eprio = prio; 336 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 337 lwp_lendpri(owner, prio); 338 } else if (prio > ts->ts_eprio) { 339 ts->ts_eprio = prio; 340 lwp_lendpri(owner, prio); 341 } 342 if (dolock) 343 lwp_unlock(l); 344 l = owner; 345 } 346 LOCKDEBUG_BARRIER(l->l_mutex, 1); 347 if (cur->l_mutex != l->l_mutex) { 348 lwp_unlock(l); 349 lwp_lock(cur); 350 } 351 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 352 353 sleepq_block(0, false); 354 cur->l_kpribase = obase; 355 KPREEMPT_ENABLE(cur); 356 } 357 358 /* 359 * turnstile_wakeup: 360 * 361 * Wake up the specified number of threads that are blocked 362 * in a turnstile. 363 */ 364 void 365 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 366 { 367 sleepq_t *sq; 368 tschain_t *tc; 369 lwp_t *l; 370 371 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 372 sq = &ts->ts_sleepq[q]; 373 374 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 375 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 376 KASSERT(mutex_owned(tc->tc_mutex)); 377 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 378 379 /* 380 * restore inherited priority if necessary. 381 */ 382 383 if (ts->ts_inheritor != NULL) { 384 turnstile_t *iter; 385 turnstile_t *next; 386 turnstile_t *prev = NULL; 387 pri_t prio; 388 bool dolock; 389 390 ts->ts_inheritor = NULL; 391 l = curlwp; 392 393 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; 394 if (dolock) { 395 lwp_lock(l); 396 } 397 398 /* 399 * the following loop does two things. 400 * 401 * - remove ts from the list. 402 * 403 * - from the rest of the list, find the highest priority. 404 */ 405 406 prio = -1; 407 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 408 for (iter = SLIST_FIRST(&l->l_pi_lenders); 409 iter != NULL; iter = next) { 410 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 411 next = SLIST_NEXT(iter, ts_pichain); 412 if (iter == ts) { 413 if (prev == NULL) { 414 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 415 ts_pichain); 416 } else { 417 SLIST_REMOVE_AFTER(prev, ts_pichain); 418 } 419 } else if (prio < iter->ts_eprio) { 420 prio = iter->ts_eprio; 421 } 422 prev = iter; 423 } 424 425 lwp_lendpri(l, prio); 426 427 if (dolock) { 428 lwp_unlock(l); 429 } 430 } 431 432 if (nl != NULL) { 433 #if defined(DEBUG) || defined(LOCKDEBUG) 434 TAILQ_FOREACH(l, sq, l_sleepchain) { 435 if (l == nl) 436 break; 437 } 438 if (l == NULL) 439 panic("turnstile_wakeup: nl not on sleepq"); 440 #endif 441 turnstile_remove(ts, nl, q); 442 } else { 443 while (count-- > 0) { 444 l = TAILQ_FIRST(sq); 445 KASSERT(l != NULL); 446 turnstile_remove(ts, l, q); 447 } 448 } 449 mutex_spin_exit(tc->tc_mutex); 450 } 451 452 /* 453 * turnstile_unsleep: 454 * 455 * Remove an LWP from the turnstile. This is called when the LWP has 456 * not been awoken normally but instead interrupted: for example, if it 457 * has received a signal. It's not a valid action for turnstiles, 458 * since LWPs blocking on a turnstile are not interruptable. 459 */ 460 u_int 461 turnstile_unsleep(lwp_t *l, bool cleanup) 462 { 463 464 lwp_unlock(l); 465 panic("turnstile_unsleep"); 466 } 467 468 /* 469 * turnstile_changepri: 470 * 471 * Adjust the priority of an LWP residing on a turnstile. 472 */ 473 void 474 turnstile_changepri(lwp_t *l, pri_t pri) 475 { 476 477 /* XXX priority inheritance */ 478 sleepq_changepri(l, pri); 479 } 480 481 #if defined(LOCKDEBUG) 482 /* 483 * turnstile_print: 484 * 485 * Given the address of a lock object, print the contents of a 486 * turnstile. 487 */ 488 void 489 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 490 { 491 turnstile_t *ts; 492 tschain_t *tc; 493 sleepq_t *rsq, *wsq; 494 lwp_t *l; 495 496 tc = &turnstile_tab[TS_HASH(obj)]; 497 498 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 499 if (ts->ts_obj == obj) 500 break; 501 502 (*pr)("Turnstile chain at %p.\n", tc); 503 if (ts == NULL) { 504 (*pr)("=> No active turnstile for this lock.\n"); 505 return; 506 } 507 508 rsq = &ts->ts_sleepq[TS_READER_Q]; 509 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 510 511 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 512 513 (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q)); 514 TAILQ_FOREACH(l, rsq, l_sleepchain) { 515 (*pr)(" %p", l); 516 } 517 (*pr)("\n"); 518 519 (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q)); 520 TAILQ_FOREACH(l, wsq, l_sleepchain) { 521 (*pr)(" %p", l); 522 } 523 (*pr)("\n"); 524 } 525 #endif /* LOCKDEBUG */ 526