1 /* $NetBSD: kern_turnstile.c,v 1.24 2009/03/21 13:11:14 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Turnstiles are described in detail in: 34 * 35 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 36 * Richard McDougall. 37 * 38 * Turnstiles are kept in a hash table. There are likely to be many more 39 * synchronisation objects than there are threads. Since a thread can block 40 * on only one lock at a time, we only need one turnstile per thread, and 41 * so they are allocated at thread creation time. 42 * 43 * When a thread decides it needs to block on a lock, it looks up the 44 * active turnstile for that lock. If no active turnstile exists, then 45 * the process lends its turnstile to the lock. If there is already an 46 * active turnstile for the lock, the thread places its turnstile on a 47 * list of free turnstiles, and references the active one instead. 48 * 49 * The act of looking up the turnstile acquires an interlock on the sleep 50 * queue. If a thread decides it doesn't need to block after all, then this 51 * interlock must be released by explicitly aborting the turnstile 52 * operation. 53 * 54 * When a thread is awakened, it needs to get its turnstile back. If there 55 * are still other threads waiting in the active turnstile, the thread 56 * grabs a free turnstile off the free list. Otherwise, it can take back 57 * the active turnstile from the lock (thus deactivating the turnstile). 58 * 59 * Turnstiles are the place to do priority inheritence. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.24 2009/03/21 13:11:14 ad Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/lockdebug.h> 67 #include <sys/pool.h> 68 #include <sys/proc.h> 69 #include <sys/sleepq.h> 70 #include <sys/systm.h> 71 72 #include <uvm/uvm_extern.h> 73 74 #define TS_HASH_SIZE 64 75 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 76 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 77 78 tschain_t turnstile_tab[TS_HASH_SIZE]; 79 pool_cache_t turnstile_cache; 80 81 int turnstile_ctor(void *, void *, int); 82 83 extern turnstile_t turnstile0; 84 85 /* 86 * turnstile_init: 87 * 88 * Initialize the turnstile mechanism. 89 */ 90 void 91 turnstile_init(void) 92 { 93 tschain_t *tc; 94 int i; 95 96 for (i = 0; i < TS_HASH_SIZE; i++) { 97 tc = &turnstile_tab[i]; 98 LIST_INIT(&tc->tc_chain); 99 tc->tc_mutex = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 100 } 101 102 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 103 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 104 KASSERT(turnstile_cache != NULL); 105 106 (void)turnstile_ctor(NULL, &turnstile0, 0); 107 } 108 109 /* 110 * turnstile_ctor: 111 * 112 * Constructor for turnstiles. 113 */ 114 int 115 turnstile_ctor(void *arg, void *obj, int flags) 116 { 117 turnstile_t *ts = obj; 118 119 memset(ts, 0, sizeof(*ts)); 120 sleepq_init(&ts->ts_sleepq[TS_READER_Q]); 121 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q]); 122 return (0); 123 } 124 125 /* 126 * turnstile_remove: 127 * 128 * Remove an LWP from a turnstile sleep queue and wake it. 129 */ 130 static inline void 131 turnstile_remove(turnstile_t *ts, lwp_t *l, int q) 132 { 133 turnstile_t *nts; 134 135 KASSERT(l->l_ts == ts); 136 137 /* 138 * This process is no longer using the active turnstile. 139 * Find an inactive one on the free list to give to it. 140 */ 141 if ((nts = ts->ts_free) != NULL) { 142 KASSERT(TS_ALL_WAITERS(ts) > 1); 143 l->l_ts = nts; 144 ts->ts_free = nts->ts_free; 145 nts->ts_free = NULL; 146 } else { 147 /* 148 * If the free list is empty, this is the last 149 * waiter. 150 */ 151 KASSERT(TS_ALL_WAITERS(ts) == 1); 152 LIST_REMOVE(ts, ts_chain); 153 } 154 155 ts->ts_waiters[q]--; 156 (void)sleepq_remove(&ts->ts_sleepq[q], l); 157 } 158 159 /* 160 * turnstile_lookup: 161 * 162 * Look up the turnstile for the specified lock. This acquires and 163 * holds the turnstile chain lock (sleep queue interlock). 164 */ 165 turnstile_t * 166 turnstile_lookup(wchan_t obj) 167 { 168 turnstile_t *ts; 169 tschain_t *tc; 170 171 tc = &turnstile_tab[TS_HASH(obj)]; 172 mutex_spin_enter(tc->tc_mutex); 173 174 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 175 if (ts->ts_obj == obj) 176 return (ts); 177 178 /* 179 * No turnstile yet for this lock. No problem, turnstile_block() 180 * handles this by fetching the turnstile from the blocking thread. 181 */ 182 return (NULL); 183 } 184 185 /* 186 * turnstile_exit: 187 * 188 * Abort a turnstile operation. 189 */ 190 void 191 turnstile_exit(wchan_t obj) 192 { 193 tschain_t *tc; 194 195 tc = &turnstile_tab[TS_HASH(obj)]; 196 mutex_spin_exit(tc->tc_mutex); 197 } 198 199 /* 200 * turnstile_block: 201 * 202 * Enter an object into the turnstile chain and prepare the current 203 * LWP for sleep. 204 */ 205 void 206 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 207 { 208 lwp_t *l; 209 lwp_t *cur; /* cached curlwp */ 210 lwp_t *owner; 211 turnstile_t *ots; 212 tschain_t *tc; 213 sleepq_t *sq; 214 pri_t prio, obase; 215 216 tc = &turnstile_tab[TS_HASH(obj)]; 217 l = cur = curlwp; 218 219 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 220 KASSERT(mutex_owned(tc->tc_mutex)); 221 KASSERT(l != NULL && l->l_ts != NULL); 222 223 if (ts == NULL) { 224 /* 225 * We are the first thread to wait for this object; 226 * lend our turnstile to it. 227 */ 228 ts = l->l_ts; 229 KASSERT(TS_ALL_WAITERS(ts) == 0); 230 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) && 231 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 232 ts->ts_obj = obj; 233 ts->ts_inheritor = NULL; 234 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 235 } else { 236 /* 237 * Object already has a turnstile. Put our turnstile 238 * onto the free list, and reference the existing 239 * turnstile instead. 240 */ 241 ots = l->l_ts; 242 KASSERT(ots->ts_free == NULL); 243 ots->ts_free = ts->ts_free; 244 ts->ts_free = ots; 245 l->l_ts = ts; 246 247 KASSERT(ts->ts_obj == obj); 248 KASSERT(TS_ALL_WAITERS(ts) != 0); 249 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q]) || 250 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q])); 251 } 252 253 sq = &ts->ts_sleepq[q]; 254 ts->ts_waiters[q]++; 255 sleepq_enter(sq, l, tc->tc_mutex); 256 LOCKDEBUG_BARRIER(tc->tc_mutex, 1); 257 l->l_kpriority = true; 258 obase = l->l_kpribase; 259 if (obase < PRI_KTHREAD) 260 l->l_kpribase = PRI_KTHREAD; 261 sleepq_enqueue(sq, obj, "tstile", sobj); 262 263 /* 264 * Disable preemption across this entire block, as we may drop 265 * scheduler locks (allowing preemption), and would prefer not 266 * to be interrupted while in a state of flux. 267 */ 268 KPREEMPT_DISABLE(l); 269 270 /* 271 * Lend our priority to lwps on the blocking chain. 272 * 273 * NOTE: if you get a panic in this code block, it is likely that 274 * a lock has been destroyed or corrupted while still in use. Try 275 * compiling a kernel with LOCKDEBUG to pinpoint the problem. 276 */ 277 prio = lwp_eprio(l); 278 for (;;) { 279 bool dolock; 280 281 if (l->l_wchan == NULL) 282 break; 283 284 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 285 if (owner == NULL) 286 break; 287 288 KASSERT(l != owner); 289 KASSERT(cur != owner); 290 291 if (l->l_mutex != owner->l_mutex) 292 dolock = true; 293 else 294 dolock = false; 295 if (dolock && !lwp_trylock(owner)) { 296 /* 297 * restart from curlwp. 298 */ 299 lwp_unlock(l); 300 l = cur; 301 lwp_lock(l); 302 prio = lwp_eprio(l); 303 continue; 304 } 305 if (prio <= lwp_eprio(owner)) { 306 if (dolock) 307 lwp_unlock(owner); 308 break; 309 } 310 ts = l->l_ts; 311 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 312 if (ts->ts_inheritor == NULL) { 313 ts->ts_inheritor = owner; 314 ts->ts_eprio = prio; 315 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 316 lwp_lendpri(owner, prio); 317 } else if (prio > ts->ts_eprio) { 318 ts->ts_eprio = prio; 319 lwp_lendpri(owner, prio); 320 } 321 if (dolock) 322 lwp_unlock(l); 323 l = owner; 324 } 325 LOCKDEBUG_BARRIER(l->l_mutex, 1); 326 if (cur->l_mutex != l->l_mutex) { 327 lwp_unlock(l); 328 lwp_lock(cur); 329 } 330 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 331 332 sleepq_block(0, false); 333 cur->l_kpribase = obase; 334 KPREEMPT_ENABLE(cur); 335 } 336 337 /* 338 * turnstile_wakeup: 339 * 340 * Wake up the specified number of threads that are blocked 341 * in a turnstile. 342 */ 343 void 344 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 345 { 346 sleepq_t *sq; 347 tschain_t *tc; 348 lwp_t *l; 349 350 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 351 sq = &ts->ts_sleepq[q]; 352 353 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 354 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 355 KASSERT(mutex_owned(tc->tc_mutex)); 356 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 357 358 /* 359 * restore inherited priority if necessary. 360 */ 361 362 if (ts->ts_inheritor != NULL) { 363 turnstile_t *iter; 364 turnstile_t *next; 365 turnstile_t *prev = NULL; 366 pri_t prio; 367 bool dolock; 368 369 ts->ts_inheritor = NULL; 370 l = curlwp; 371 372 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; 373 if (dolock) { 374 lwp_lock(l); 375 } 376 377 /* 378 * the following loop does two things. 379 * 380 * - remove ts from the list. 381 * 382 * - from the rest of the list, find the highest priority. 383 */ 384 385 prio = -1; 386 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 387 for (iter = SLIST_FIRST(&l->l_pi_lenders); 388 iter != NULL; iter = next) { 389 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 390 next = SLIST_NEXT(iter, ts_pichain); 391 if (iter == ts) { 392 if (prev == NULL) { 393 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 394 ts_pichain); 395 } else { 396 SLIST_REMOVE_AFTER(prev, ts_pichain); 397 } 398 } else if (prio < iter->ts_eprio) { 399 prio = iter->ts_eprio; 400 } 401 prev = iter; 402 } 403 404 lwp_lendpri(l, prio); 405 406 if (dolock) { 407 lwp_unlock(l); 408 } 409 } 410 411 if (nl != NULL) { 412 #if defined(DEBUG) || defined(LOCKDEBUG) 413 TAILQ_FOREACH(l, sq, l_sleepchain) { 414 if (l == nl) 415 break; 416 } 417 if (l == NULL) 418 panic("turnstile_wakeup: nl not on sleepq"); 419 #endif 420 turnstile_remove(ts, nl, q); 421 } else { 422 while (count-- > 0) { 423 l = TAILQ_FIRST(sq); 424 KASSERT(l != NULL); 425 turnstile_remove(ts, l, q); 426 } 427 } 428 mutex_spin_exit(tc->tc_mutex); 429 } 430 431 /* 432 * turnstile_unsleep: 433 * 434 * Remove an LWP from the turnstile. This is called when the LWP has 435 * not been awoken normally but instead interrupted: for example, if it 436 * has received a signal. It's not a valid action for turnstiles, 437 * since LWPs blocking on a turnstile are not interruptable. 438 */ 439 u_int 440 turnstile_unsleep(lwp_t *l, bool cleanup) 441 { 442 443 lwp_unlock(l); 444 panic("turnstile_unsleep"); 445 } 446 447 /* 448 * turnstile_changepri: 449 * 450 * Adjust the priority of an LWP residing on a turnstile. 451 */ 452 void 453 turnstile_changepri(lwp_t *l, pri_t pri) 454 { 455 456 /* XXX priority inheritance */ 457 sleepq_changepri(l, pri); 458 } 459 460 #if defined(LOCKDEBUG) 461 /* 462 * turnstile_print: 463 * 464 * Given the address of a lock object, print the contents of a 465 * turnstile. 466 */ 467 void 468 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 469 { 470 turnstile_t *ts; 471 tschain_t *tc; 472 sleepq_t *rsq, *wsq; 473 lwp_t *l; 474 475 tc = &turnstile_tab[TS_HASH(obj)]; 476 477 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 478 if (ts->ts_obj == obj) 479 break; 480 481 (*pr)("Turnstile chain at %p.\n", tc); 482 if (ts == NULL) { 483 (*pr)("=> No active turnstile for this lock.\n"); 484 return; 485 } 486 487 rsq = &ts->ts_sleepq[TS_READER_Q]; 488 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 489 490 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 491 492 (*pr)("=> %d waiting readers:", TS_WAITERS(ts, TS_READER_Q)); 493 TAILQ_FOREACH(l, rsq, l_sleepchain) { 494 (*pr)(" %p", l); 495 } 496 (*pr)("\n"); 497 498 (*pr)("=> %d waiting writers:", TS_WAITERS(ts, TS_WRITER_Q)); 499 TAILQ_FOREACH(l, wsq, l_sleepchain) { 500 (*pr)(" %p", l); 501 } 502 (*pr)("\n"); 503 } 504 #endif /* LOCKDEBUG */ 505