1 /* $NetBSD: kern_turnstile.c,v 1.18 2008/04/24 23:26:00 alc Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Turnstiles are described in detail in: 41 * 42 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 43 * Richard McDougall. 44 * 45 * Turnstiles are kept in a hash table. There are likely to be many more 46 * synchronisation objects than there are threads. Since a thread can block 47 * on only one lock at a time, we only need one turnstile per thread, and 48 * so they are allocated at thread creation time. 49 * 50 * When a thread decides it needs to block on a lock, it looks up the 51 * active turnstile for that lock. If no active turnstile exists, then 52 * the process lends its turnstile to the lock. If there is already an 53 * active turnstile for the lock, the thread places its turnstile on a 54 * list of free turnstiles, and references the active one instead. 55 * 56 * The act of looking up the turnstile acquires an interlock on the sleep 57 * queue. If a thread decides it doesn't need to block after all, then this 58 * interlock must be released by explicitly aborting the turnstile 59 * operation. 60 * 61 * When a thread is awakened, it needs to get its turnstile back. If there 62 * are still other threads waiting in the active turnstile, the thread 63 * grabs a free turnstile off the free list. Otherwise, it can take back 64 * the active turnstile from the lock (thus deactivating the turnstile). 65 * 66 * Turnstiles are the place to do priority inheritence. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.18 2008/04/24 23:26:00 alc Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/lockdebug.h> 74 #include <sys/pool.h> 75 #include <sys/proc.h> 76 #include <sys/sleepq.h> 77 #include <sys/systm.h> 78 79 #include <uvm/uvm_extern.h> 80 81 #define TS_HASH_SIZE 64 82 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 83 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 84 85 tschain_t turnstile_tab[TS_HASH_SIZE]; 86 pool_cache_t turnstile_cache; 87 88 int turnstile_ctor(void *, void *, int); 89 90 extern turnstile_t turnstile0; 91 92 /* 93 * turnstile_init: 94 * 95 * Initialize the turnstile mechanism. 96 */ 97 void 98 turnstile_init(void) 99 { 100 tschain_t *tc; 101 int i; 102 103 for (i = 0; i < TS_HASH_SIZE; i++) { 104 tc = &turnstile_tab[i]; 105 LIST_INIT(&tc->tc_chain); 106 mutex_init(&tc->tc_mutex, MUTEX_DEFAULT, IPL_SCHED); 107 } 108 109 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 110 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 111 KASSERT(turnstile_cache != NULL); 112 113 (void)turnstile_ctor(NULL, &turnstile0, 0); 114 } 115 116 /* 117 * turnstile_ctor: 118 * 119 * Constructor for turnstiles. 120 */ 121 int 122 turnstile_ctor(void *arg, void *obj, int flags) 123 { 124 turnstile_t *ts = obj; 125 126 memset(ts, 0, sizeof(*ts)); 127 sleepq_init(&ts->ts_sleepq[TS_READER_Q], NULL); 128 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q], NULL); 129 return (0); 130 } 131 132 /* 133 * turnstile_remove: 134 * 135 * Remove an LWP from a turnstile sleep queue and wake it. 136 */ 137 static inline void 138 turnstile_remove(turnstile_t *ts, lwp_t *l, sleepq_t *sq) 139 { 140 turnstile_t *nts; 141 142 KASSERT(l->l_ts == ts); 143 144 /* 145 * This process is no longer using the active turnstile. 146 * Find an inactive one on the free list to give to it. 147 */ 148 if ((nts = ts->ts_free) != NULL) { 149 KASSERT(TS_ALL_WAITERS(ts) > 1); 150 l->l_ts = nts; 151 ts->ts_free = nts->ts_free; 152 nts->ts_free = NULL; 153 } else { 154 /* 155 * If the free list is empty, this is the last 156 * waiter. 157 */ 158 KASSERT(TS_ALL_WAITERS(ts) == 1); 159 LIST_REMOVE(ts, ts_chain); 160 } 161 162 (void)sleepq_remove(sq, l); 163 } 164 165 /* 166 * turnstile_lookup: 167 * 168 * Look up the turnstile for the specified lock. This acquires and 169 * holds the turnstile chain lock (sleep queue interlock). 170 */ 171 turnstile_t * 172 turnstile_lookup(wchan_t obj) 173 { 174 turnstile_t *ts; 175 tschain_t *tc; 176 177 tc = &turnstile_tab[TS_HASH(obj)]; 178 mutex_spin_enter(&tc->tc_mutex); 179 180 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 181 if (ts->ts_obj == obj) 182 return (ts); 183 184 /* 185 * No turnstile yet for this lock. No problem, turnstile_block() 186 * handles this by fetching the turnstile from the blocking thread. 187 */ 188 return (NULL); 189 } 190 191 /* 192 * turnstile_exit: 193 * 194 * Abort a turnstile operation. 195 */ 196 void 197 turnstile_exit(wchan_t obj) 198 { 199 tschain_t *tc; 200 201 tc = &turnstile_tab[TS_HASH(obj)]; 202 mutex_spin_exit(&tc->tc_mutex); 203 } 204 205 /* 206 * turnstile_block: 207 * 208 * Enter an object into the turnstile chain and prepare the current 209 * LWP for sleep. 210 */ 211 void 212 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 213 { 214 lwp_t *l; 215 lwp_t *cur; /* cached curlwp */ 216 lwp_t *owner; 217 turnstile_t *ots; 218 tschain_t *tc; 219 sleepq_t *sq; 220 pri_t prio, obase; 221 222 tc = &turnstile_tab[TS_HASH(obj)]; 223 l = cur = curlwp; 224 225 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 226 KASSERT(mutex_owned(&tc->tc_mutex)); 227 KASSERT(l != NULL && l->l_ts != NULL); 228 229 if (ts == NULL) { 230 /* 231 * We are the first thread to wait for this object; 232 * lend our turnstile to it. 233 */ 234 ts = l->l_ts; 235 KASSERT(TS_ALL_WAITERS(ts) == 0); 236 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) && 237 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 238 ts->ts_obj = obj; 239 ts->ts_inheritor = NULL; 240 ts->ts_sleepq[TS_READER_Q].sq_mutex = &tc->tc_mutex; 241 ts->ts_sleepq[TS_WRITER_Q].sq_mutex = &tc->tc_mutex; 242 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 243 } else { 244 /* 245 * Object already has a turnstile. Put our turnstile 246 * onto the free list, and reference the existing 247 * turnstile instead. 248 */ 249 ots = l->l_ts; 250 ots->ts_free = ts->ts_free; 251 ts->ts_free = ots; 252 l->l_ts = ts; 253 254 KASSERT(ts->ts_obj == obj); 255 KASSERT(TS_ALL_WAITERS(ts) != 0); 256 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) || 257 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 258 } 259 260 sq = &ts->ts_sleepq[q]; 261 sleepq_enter(sq, l); 262 LOCKDEBUG_BARRIER(&tc->tc_mutex, 1); 263 l->l_kpriority = true; 264 obase = l->l_kpribase; 265 if (obase < PRI_KTHREAD) 266 l->l_kpribase = PRI_KTHREAD; 267 sleepq_enqueue(sq, obj, "tstile", sobj); 268 269 /* 270 * lend our priority to lwps on the blocking chain. 271 */ 272 prio = lwp_eprio(l); 273 for (;;) { 274 bool dolock; 275 276 if (l->l_wchan == NULL) 277 break; 278 279 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 280 if (owner == NULL) 281 break; 282 283 KASSERT(l != owner); 284 KASSERT(cur != owner); 285 286 if (l->l_mutex != owner->l_mutex) 287 dolock = true; 288 else 289 dolock = false; 290 if (dolock && !lwp_trylock(owner)) { 291 /* 292 * restart from curlwp. 293 */ 294 lwp_unlock(l); 295 l = cur; 296 lwp_lock(l); 297 prio = lwp_eprio(l); 298 continue; 299 } 300 if (prio <= lwp_eprio(owner)) { 301 if (dolock) 302 lwp_unlock(owner); 303 break; 304 } 305 ts = l->l_ts; 306 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 307 if (ts->ts_inheritor == NULL) { 308 ts->ts_inheritor = owner; 309 ts->ts_eprio = prio; 310 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 311 lwp_lendpri(owner, prio); 312 } else if (prio > ts->ts_eprio) { 313 ts->ts_eprio = prio; 314 lwp_lendpri(owner, prio); 315 } 316 if (dolock) 317 lwp_unlock(l); 318 l = owner; 319 } 320 LOCKDEBUG_BARRIER(l->l_mutex, 1); 321 if (cur->l_mutex != l->l_mutex) { 322 lwp_unlock(l); 323 lwp_lock(cur); 324 } 325 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 326 327 sleepq_block(0, false); 328 cur->l_kpribase = obase; 329 } 330 331 /* 332 * turnstile_wakeup: 333 * 334 * Wake up the specified number of threads that are blocked 335 * in a turnstile. 336 */ 337 void 338 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 339 { 340 sleepq_t *sq; 341 tschain_t *tc; 342 lwp_t *l; 343 344 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 345 sq = &ts->ts_sleepq[q]; 346 347 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 348 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 349 KASSERT(mutex_owned(&tc->tc_mutex) && sq->sq_mutex == &tc->tc_mutex); 350 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 351 352 /* 353 * restore inherited priority if necessary. 354 */ 355 356 if (ts->ts_inheritor != NULL) { 357 turnstile_t *iter; 358 turnstile_t *next; 359 turnstile_t *prev = NULL; 360 pri_t prio; 361 bool dolock; 362 363 ts->ts_inheritor = NULL; 364 l = curlwp; 365 366 dolock = l->l_mutex == l->l_cpu->ci_schedstate.spc_lwplock; 367 if (dolock) { 368 lwp_lock(l); 369 } 370 371 /* 372 * the following loop does two things. 373 * 374 * - remove ts from the list. 375 * 376 * - from the rest of the list, find the highest priority. 377 */ 378 379 prio = -1; 380 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 381 for (iter = SLIST_FIRST(&l->l_pi_lenders); 382 iter != NULL; iter = next) { 383 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 384 next = SLIST_NEXT(iter, ts_pichain); 385 if (iter == ts) { 386 if (prev == NULL) { 387 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 388 ts_pichain); 389 } else { 390 SLIST_REMOVE_AFTER(prev, ts_pichain); 391 } 392 } else if (prio < iter->ts_eprio) { 393 prio = iter->ts_eprio; 394 } 395 prev = iter; 396 } 397 398 lwp_lendpri(l, prio); 399 400 if (dolock) { 401 lwp_unlock(l); 402 } 403 } 404 405 if (nl != NULL) { 406 #if defined(DEBUG) || defined(LOCKDEBUG) 407 TAILQ_FOREACH(l, &sq->sq_queue, l_sleepchain) { 408 if (l == nl) 409 break; 410 } 411 if (l == NULL) 412 panic("turnstile_wakeup: nl not on sleepq"); 413 #endif 414 turnstile_remove(ts, nl, sq); 415 } else { 416 while (count-- > 0) { 417 l = TAILQ_FIRST(&sq->sq_queue); 418 KASSERT(l != NULL); 419 turnstile_remove(ts, l, sq); 420 } 421 } 422 mutex_spin_exit(&tc->tc_mutex); 423 } 424 425 /* 426 * turnstile_unsleep: 427 * 428 * Remove an LWP from the turnstile. This is called when the LWP has 429 * not been awoken normally but instead interrupted: for example, if it 430 * has received a signal. It's not a valid action for turnstiles, 431 * since LWPs blocking on a turnstile are not interruptable. 432 */ 433 u_int 434 turnstile_unsleep(lwp_t *l, bool cleanup) 435 { 436 437 lwp_unlock(l); 438 panic("turnstile_unsleep"); 439 } 440 441 /* 442 * turnstile_changepri: 443 * 444 * Adjust the priority of an LWP residing on a turnstile. 445 */ 446 void 447 turnstile_changepri(lwp_t *l, pri_t pri) 448 { 449 450 /* XXX priority inheritance */ 451 sleepq_changepri(l, pri); 452 } 453 454 #if defined(LOCKDEBUG) 455 /* 456 * turnstile_print: 457 * 458 * Given the address of a lock object, print the contents of a 459 * turnstile. 460 */ 461 void 462 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 463 { 464 turnstile_t *ts; 465 tschain_t *tc; 466 sleepq_t *rsq, *wsq; 467 lwp_t *l; 468 469 tc = &turnstile_tab[TS_HASH(obj)]; 470 471 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 472 if (ts->ts_obj == obj) 473 break; 474 475 (*pr)("Turnstile chain at %p.\n", tc); 476 if (ts == NULL) { 477 (*pr)("=> No active turnstile for this lock.\n"); 478 return; 479 } 480 481 rsq = &ts->ts_sleepq[TS_READER_Q]; 482 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 483 484 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 485 486 (*pr)("=> %d waiting readers:", rsq->sq_waiters); 487 TAILQ_FOREACH(l, &rsq->sq_queue, l_sleepchain) { 488 (*pr)(" %p", l); 489 } 490 (*pr)("\n"); 491 492 (*pr)("=> %d waiting writers:", wsq->sq_waiters); 493 TAILQ_FOREACH(l, &wsq->sq_queue, l_sleepchain) { 494 (*pr)(" %p", l); 495 } 496 (*pr)("\n"); 497 } 498 #endif /* LOCKDEBUG */ 499