1 /* $NetBSD: kern_turnstile.c,v 1.20 2008/04/28 20:24:04 martin Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Turnstiles are described in detail in: 34 * 35 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 36 * Richard McDougall. 37 * 38 * Turnstiles are kept in a hash table. There are likely to be many more 39 * synchronisation objects than there are threads. Since a thread can block 40 * on only one lock at a time, we only need one turnstile per thread, and 41 * so they are allocated at thread creation time. 42 * 43 * When a thread decides it needs to block on a lock, it looks up the 44 * active turnstile for that lock. If no active turnstile exists, then 45 * the process lends its turnstile to the lock. If there is already an 46 * active turnstile for the lock, the thread places its turnstile on a 47 * list of free turnstiles, and references the active one instead. 48 * 49 * The act of looking up the turnstile acquires an interlock on the sleep 50 * queue. If a thread decides it doesn't need to block after all, then this 51 * interlock must be released by explicitly aborting the turnstile 52 * operation. 53 * 54 * When a thread is awakened, it needs to get its turnstile back. If there 55 * are still other threads waiting in the active turnstile, the thread 56 * grabs a free turnstile off the free list. Otherwise, it can take back 57 * the active turnstile from the lock (thus deactivating the turnstile). 58 * 59 * Turnstiles are the place to do priority inheritence. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.20 2008/04/28 20:24:04 martin Exp $"); 64 65 #include <sys/param.h> 66 #include <sys/lockdebug.h> 67 #include <sys/pool.h> 68 #include <sys/proc.h> 69 #include <sys/sleepq.h> 70 #include <sys/systm.h> 71 72 #include <uvm/uvm_extern.h> 73 74 #define TS_HASH_SIZE 64 75 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 76 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 77 78 tschain_t turnstile_tab[TS_HASH_SIZE]; 79 pool_cache_t turnstile_cache; 80 81 int turnstile_ctor(void *, void *, int); 82 83 extern turnstile_t turnstile0; 84 85 /* 86 * turnstile_init: 87 * 88 * Initialize the turnstile mechanism. 89 */ 90 void 91 turnstile_init(void) 92 { 93 tschain_t *tc; 94 int i; 95 96 for (i = 0; i < TS_HASH_SIZE; i++) { 97 tc = &turnstile_tab[i]; 98 LIST_INIT(&tc->tc_chain); 99 mutex_init(&tc->tc_mutex, MUTEX_DEFAULT, IPL_SCHED); 100 } 101 102 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 103 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 104 KASSERT(turnstile_cache != NULL); 105 106 (void)turnstile_ctor(NULL, &turnstile0, 0); 107 } 108 109 /* 110 * turnstile_ctor: 111 * 112 * Constructor for turnstiles. 113 */ 114 int 115 turnstile_ctor(void *arg, void *obj, int flags) 116 { 117 turnstile_t *ts = obj; 118 119 memset(ts, 0, sizeof(*ts)); 120 sleepq_init(&ts->ts_sleepq[TS_READER_Q], NULL); 121 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q], NULL); 122 return (0); 123 } 124 125 /* 126 * turnstile_remove: 127 * 128 * Remove an LWP from a turnstile sleep queue and wake it. 129 */ 130 static inline void 131 turnstile_remove(turnstile_t *ts, lwp_t *l, sleepq_t *sq) 132 { 133 turnstile_t *nts; 134 135 KASSERT(l->l_ts == ts); 136 137 /* 138 * This process is no longer using the active turnstile. 139 * Find an inactive one on the free list to give to it. 140 */ 141 if ((nts = ts->ts_free) != NULL) { 142 KASSERT(TS_ALL_WAITERS(ts) > 1); 143 l->l_ts = nts; 144 ts->ts_free = nts->ts_free; 145 nts->ts_free = NULL; 146 } else { 147 /* 148 * If the free list is empty, this is the last 149 * waiter. 150 */ 151 KASSERT(TS_ALL_WAITERS(ts) == 1); 152 LIST_REMOVE(ts, ts_chain); 153 } 154 155 (void)sleepq_remove(sq, l); 156 } 157 158 /* 159 * turnstile_lookup: 160 * 161 * Look up the turnstile for the specified lock. This acquires and 162 * holds the turnstile chain lock (sleep queue interlock). 163 */ 164 turnstile_t * 165 turnstile_lookup(wchan_t obj) 166 { 167 turnstile_t *ts; 168 tschain_t *tc; 169 170 tc = &turnstile_tab[TS_HASH(obj)]; 171 mutex_spin_enter(&tc->tc_mutex); 172 173 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 174 if (ts->ts_obj == obj) 175 return (ts); 176 177 /* 178 * No turnstile yet for this lock. No problem, turnstile_block() 179 * handles this by fetching the turnstile from the blocking thread. 180 */ 181 return (NULL); 182 } 183 184 /* 185 * turnstile_exit: 186 * 187 * Abort a turnstile operation. 188 */ 189 void 190 turnstile_exit(wchan_t obj) 191 { 192 tschain_t *tc; 193 194 tc = &turnstile_tab[TS_HASH(obj)]; 195 mutex_spin_exit(&tc->tc_mutex); 196 } 197 198 /* 199 * turnstile_block: 200 * 201 * Enter an object into the turnstile chain and prepare the current 202 * LWP for sleep. 203 */ 204 void 205 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 206 { 207 lwp_t *l; 208 lwp_t *cur; /* cached curlwp */ 209 lwp_t *owner; 210 turnstile_t *ots; 211 tschain_t *tc; 212 sleepq_t *sq; 213 pri_t prio, obase; 214 215 tc = &turnstile_tab[TS_HASH(obj)]; 216 l = cur = curlwp; 217 218 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 219 KASSERT(mutex_owned(&tc->tc_mutex)); 220 KASSERT(l != NULL && l->l_ts != NULL); 221 222 if (ts == NULL) { 223 /* 224 * We are the first thread to wait for this object; 225 * lend our turnstile to it. 226 */ 227 ts = l->l_ts; 228 KASSERT(TS_ALL_WAITERS(ts) == 0); 229 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) && 230 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 231 ts->ts_obj = obj; 232 ts->ts_inheritor = NULL; 233 ts->ts_sleepq[TS_READER_Q].sq_mutex = &tc->tc_mutex; 234 ts->ts_sleepq[TS_WRITER_Q].sq_mutex = &tc->tc_mutex; 235 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 236 } else { 237 /* 238 * Object already has a turnstile. Put our turnstile 239 * onto the free list, and reference the existing 240 * turnstile instead. 241 */ 242 ots = l->l_ts; 243 ots->ts_free = ts->ts_free; 244 ts->ts_free = ots; 245 l->l_ts = ts; 246 247 KASSERT(ts->ts_obj == obj); 248 KASSERT(TS_ALL_WAITERS(ts) != 0); 249 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) || 250 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 251 } 252 253 sq = &ts->ts_sleepq[q]; 254 sleepq_enter(sq, l); 255 LOCKDEBUG_BARRIER(&tc->tc_mutex, 1); 256 l->l_kpriority = true; 257 obase = l->l_kpribase; 258 if (obase < PRI_KTHREAD) 259 l->l_kpribase = PRI_KTHREAD; 260 sleepq_enqueue(sq, obj, "tstile", sobj); 261 262 /* 263 * Disable preemption across this entire block, as we may drop 264 * scheduler locks (allowing preemption), and would prefer not 265 * to be interrupted while in a state of flux. 266 */ 267 KPREEMPT_DISABLE(l); 268 269 /* 270 * lend our priority to lwps on the blocking chain. 271 */ 272 prio = lwp_eprio(l); 273 for (;;) { 274 bool dolock; 275 276 if (l->l_wchan == NULL) 277 break; 278 279 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 280 if (owner == NULL) 281 break; 282 283 KASSERT(l != owner); 284 KASSERT(cur != owner); 285 286 if (l->l_mutex != owner->l_mutex) 287 dolock = true; 288 else 289 dolock = false; 290 if (dolock && !lwp_trylock(owner)) { 291 /* 292 * restart from curlwp. 293 */ 294 lwp_unlock(l); 295 l = cur; 296 lwp_lock(l); 297 prio = lwp_eprio(l); 298 continue; 299 } 300 if (prio <= lwp_eprio(owner)) { 301 if (dolock) 302 lwp_unlock(owner); 303 break; 304 } 305 ts = l->l_ts; 306 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 307 if (ts->ts_inheritor == NULL) { 308 ts->ts_inheritor = owner; 309 ts->ts_eprio = prio; 310 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 311 lwp_lendpri(owner, prio); 312 } else if (prio > ts->ts_eprio) { 313 ts->ts_eprio = prio; 314 lwp_lendpri(owner, prio); 315 } 316 if (dolock) 317 lwp_unlock(l); 318 l = owner; 319 } 320 LOCKDEBUG_BARRIER(l->l_mutex, 1); 321 if (cur->l_mutex != l->l_mutex) { 322 lwp_unlock(l); 323 lwp_lock(cur); 324 } 325 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 326 327 sleepq_block(0, false); 328 cur->l_kpribase = obase; 329 KPREEMPT_ENABLE(cur); 330 } 331 332 /* 333 * turnstile_wakeup: 334 * 335 * Wake up the specified number of threads that are blocked 336 * in a turnstile. 337 */ 338 void 339 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 340 { 341 sleepq_t *sq; 342 tschain_t *tc; 343 lwp_t *l; 344 345 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 346 sq = &ts->ts_sleepq[q]; 347 348 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 349 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 350 KASSERT(mutex_owned(&tc->tc_mutex) && sq->sq_mutex == &tc->tc_mutex); 351 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 352 353 /* 354 * restore inherited priority if necessary. 355 */ 356 357 if (ts->ts_inheritor != NULL) { 358 turnstile_t *iter; 359 turnstile_t *next; 360 turnstile_t *prev = NULL; 361 pri_t prio; 362 bool dolock; 363 364 ts->ts_inheritor = NULL; 365 l = curlwp; 366 367 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; 368 if (dolock) { 369 lwp_lock(l); 370 } 371 372 /* 373 * the following loop does two things. 374 * 375 * - remove ts from the list. 376 * 377 * - from the rest of the list, find the highest priority. 378 */ 379 380 prio = -1; 381 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 382 for (iter = SLIST_FIRST(&l->l_pi_lenders); 383 iter != NULL; iter = next) { 384 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 385 next = SLIST_NEXT(iter, ts_pichain); 386 if (iter == ts) { 387 if (prev == NULL) { 388 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 389 ts_pichain); 390 } else { 391 SLIST_REMOVE_AFTER(prev, ts_pichain); 392 } 393 } else if (prio < iter->ts_eprio) { 394 prio = iter->ts_eprio; 395 } 396 prev = iter; 397 } 398 399 lwp_lendpri(l, prio); 400 401 if (dolock) { 402 lwp_unlock(l); 403 } 404 } 405 406 if (nl != NULL) { 407 #if defined(DEBUG) || defined(LOCKDEBUG) 408 TAILQ_FOREACH(l, &sq->sq_queue, l_sleepchain) { 409 if (l == nl) 410 break; 411 } 412 if (l == NULL) 413 panic("turnstile_wakeup: nl not on sleepq"); 414 #endif 415 turnstile_remove(ts, nl, sq); 416 } else { 417 while (count-- > 0) { 418 l = TAILQ_FIRST(&sq->sq_queue); 419 KASSERT(l != NULL); 420 turnstile_remove(ts, l, sq); 421 } 422 } 423 mutex_spin_exit(&tc->tc_mutex); 424 } 425 426 /* 427 * turnstile_unsleep: 428 * 429 * Remove an LWP from the turnstile. This is called when the LWP has 430 * not been awoken normally but instead interrupted: for example, if it 431 * has received a signal. It's not a valid action for turnstiles, 432 * since LWPs blocking on a turnstile are not interruptable. 433 */ 434 u_int 435 turnstile_unsleep(lwp_t *l, bool cleanup) 436 { 437 438 lwp_unlock(l); 439 panic("turnstile_unsleep"); 440 } 441 442 /* 443 * turnstile_changepri: 444 * 445 * Adjust the priority of an LWP residing on a turnstile. 446 */ 447 void 448 turnstile_changepri(lwp_t *l, pri_t pri) 449 { 450 451 /* XXX priority inheritance */ 452 sleepq_changepri(l, pri); 453 } 454 455 #if defined(LOCKDEBUG) 456 /* 457 * turnstile_print: 458 * 459 * Given the address of a lock object, print the contents of a 460 * turnstile. 461 */ 462 void 463 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 464 { 465 turnstile_t *ts; 466 tschain_t *tc; 467 sleepq_t *rsq, *wsq; 468 lwp_t *l; 469 470 tc = &turnstile_tab[TS_HASH(obj)]; 471 472 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 473 if (ts->ts_obj == obj) 474 break; 475 476 (*pr)("Turnstile chain at %p.\n", tc); 477 if (ts == NULL) { 478 (*pr)("=> No active turnstile for this lock.\n"); 479 return; 480 } 481 482 rsq = &ts->ts_sleepq[TS_READER_Q]; 483 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 484 485 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 486 487 (*pr)("=> %d waiting readers:", rsq->sq_waiters); 488 TAILQ_FOREACH(l, &rsq->sq_queue, l_sleepchain) { 489 (*pr)(" %p", l); 490 } 491 (*pr)("\n"); 492 493 (*pr)("=> %d waiting writers:", wsq->sq_waiters); 494 TAILQ_FOREACH(l, &wsq->sq_queue, l_sleepchain) { 495 (*pr)(" %p", l); 496 } 497 (*pr)("\n"); 498 } 499 #endif /* LOCKDEBUG */ 500