1 /* $NetBSD: kern_turnstile.c,v 1.13 2007/12/05 07:06:54 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe and Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * Turnstiles are described in detail in: 41 * 42 * Solaris Internals: Core Kernel Architecture, Jim Mauro and 43 * Richard McDougall. 44 * 45 * Turnstiles are kept in a hash table. There are likely to be many more 46 * synchronisation objects than there are threads. Since a thread can block 47 * on only one lock at a time, we only need one turnstile per thread, and 48 * so they are allocated at thread creation time. 49 * 50 * When a thread decides it needs to block on a lock, it looks up the 51 * active turnstile for that lock. If no active turnstile exists, then 52 * the process lends its turnstile to the lock. If there is already an 53 * active turnstile for the lock, the thread places its turnstile on a 54 * list of free turnstiles, and references the active one instead. 55 * 56 * The act of looking up the turnstile acquires an interlock on the sleep 57 * queue. If a thread decides it doesn't need to block after all, then this 58 * interlock must be released by explicitly aborting the turnstile 59 * operation. 60 * 61 * When a thread is awakened, it needs to get its turnstile back. If there 62 * are still other threads waiting in the active turnstile, the the thread 63 * grabs a free turnstile off the free list. Otherwise, it can take back 64 * the active turnstile from the lock (thus deactivating the turnstile). 65 * 66 * Turnstiles are the place to do priority inheritence. 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.13 2007/12/05 07:06:54 ad Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/lock.h> 74 #include <sys/lockdebug.h> 75 #include <sys/pool.h> 76 #include <sys/proc.h> 77 #include <sys/sleepq.h> 78 #include <sys/systm.h> 79 80 #include <uvm/uvm_extern.h> 81 82 #define TS_HASH_SIZE 64 83 #define TS_HASH_MASK (TS_HASH_SIZE - 1) 84 #define TS_HASH(obj) (((uintptr_t)(obj) >> 3) & TS_HASH_MASK) 85 86 tschain_t turnstile_tab[TS_HASH_SIZE]; 87 pool_cache_t turnstile_cache; 88 89 int turnstile_ctor(void *, void *, int); 90 91 extern turnstile_t turnstile0; 92 93 /* 94 * turnstile_init: 95 * 96 * Initialize the turnstile mechanism. 97 */ 98 void 99 turnstile_init(void) 100 { 101 tschain_t *tc; 102 int i; 103 104 for (i = 0; i < TS_HASH_SIZE; i++) { 105 tc = &turnstile_tab[i]; 106 LIST_INIT(&tc->tc_chain); 107 mutex_init(&tc->tc_mutex, MUTEX_DEFAULT, IPL_SCHED); 108 } 109 110 turnstile_cache = pool_cache_init(sizeof(turnstile_t), 0, 0, 0, 111 "tstilepl", NULL, IPL_NONE, turnstile_ctor, NULL, NULL); 112 KASSERT(turnstile_cache != NULL); 113 114 (void)turnstile_ctor(NULL, &turnstile0, 0); 115 } 116 117 /* 118 * turnstile_ctor: 119 * 120 * Constructor for turnstiles. 121 */ 122 int 123 turnstile_ctor(void *arg, void *obj, int flags) 124 { 125 turnstile_t *ts = obj; 126 127 memset(ts, 0, sizeof(*ts)); 128 sleepq_init(&ts->ts_sleepq[TS_READER_Q], NULL); 129 sleepq_init(&ts->ts_sleepq[TS_WRITER_Q], NULL); 130 return (0); 131 } 132 133 /* 134 * turnstile_remove: 135 * 136 * Remove an LWP from a turnstile sleep queue and wake it. 137 */ 138 static inline void 139 turnstile_remove(turnstile_t *ts, lwp_t *l, sleepq_t *sq) 140 { 141 turnstile_t *nts; 142 143 KASSERT(l->l_ts == ts); 144 145 /* 146 * This process is no longer using the active turnstile. 147 * Find an inactive one on the free list to give to it. 148 */ 149 if ((nts = ts->ts_free) != NULL) { 150 KASSERT(TS_ALL_WAITERS(ts) > 1); 151 l->l_ts = nts; 152 ts->ts_free = nts->ts_free; 153 nts->ts_free = NULL; 154 } else { 155 /* 156 * If the free list is empty, this is the last 157 * waiter. 158 */ 159 KASSERT(TS_ALL_WAITERS(ts) == 1); 160 LIST_REMOVE(ts, ts_chain); 161 } 162 163 (void)sleepq_remove(sq, l); 164 } 165 166 /* 167 * turnstile_lookup: 168 * 169 * Look up the turnstile for the specified lock. This acquires and 170 * holds the turnstile chain lock (sleep queue interlock). 171 */ 172 turnstile_t * 173 turnstile_lookup(wchan_t obj) 174 { 175 turnstile_t *ts; 176 tschain_t *tc; 177 178 tc = &turnstile_tab[TS_HASH(obj)]; 179 mutex_spin_enter(&tc->tc_mutex); 180 181 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 182 if (ts->ts_obj == obj) 183 return (ts); 184 185 /* 186 * No turnstile yet for this lock. No problem, turnstile_block() 187 * handles this by fetching the turnstile from the blocking thread. 188 */ 189 return (NULL); 190 } 191 192 /* 193 * turnstile_exit: 194 * 195 * Abort a turnstile operation. 196 */ 197 void 198 turnstile_exit(wchan_t obj) 199 { 200 tschain_t *tc; 201 202 tc = &turnstile_tab[TS_HASH(obj)]; 203 mutex_spin_exit(&tc->tc_mutex); 204 } 205 206 /* 207 * turnstile_block: 208 * 209 * Enter an object into the turnstile chain and prepare the current 210 * LWP for sleep. 211 */ 212 void 213 turnstile_block(turnstile_t *ts, int q, wchan_t obj, syncobj_t *sobj) 214 { 215 lwp_t *l; 216 lwp_t *cur; /* cached curlwp */ 217 lwp_t *owner; 218 turnstile_t *ots; 219 tschain_t *tc; 220 sleepq_t *sq; 221 pri_t prio; 222 223 tc = &turnstile_tab[TS_HASH(obj)]; 224 l = cur = curlwp; 225 226 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 227 KASSERT(mutex_owned(&tc->tc_mutex)); 228 KASSERT(l != NULL && l->l_ts != NULL); 229 230 if (ts == NULL) { 231 /* 232 * We are the first thread to wait for this object; 233 * lend our turnstile to it. 234 */ 235 ts = l->l_ts; 236 KASSERT(TS_ALL_WAITERS(ts) == 0); 237 KASSERT(TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) && 238 TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 239 ts->ts_obj = obj; 240 ts->ts_inheritor = NULL; 241 ts->ts_sleepq[TS_READER_Q].sq_mutex = &tc->tc_mutex; 242 ts->ts_sleepq[TS_WRITER_Q].sq_mutex = &tc->tc_mutex; 243 LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain); 244 } else { 245 /* 246 * Object already has a turnstile. Put our turnstile 247 * onto the free list, and reference the existing 248 * turnstile instead. 249 */ 250 ots = l->l_ts; 251 ots->ts_free = ts->ts_free; 252 ts->ts_free = ots; 253 l->l_ts = ts; 254 255 KASSERT(ts->ts_obj == obj); 256 KASSERT(TS_ALL_WAITERS(ts) != 0); 257 KASSERT(!TAILQ_EMPTY(&ts->ts_sleepq[TS_READER_Q].sq_queue) || 258 !TAILQ_EMPTY(&ts->ts_sleepq[TS_WRITER_Q].sq_queue)); 259 } 260 261 sq = &ts->ts_sleepq[q]; 262 sleepq_enter(sq, l); 263 LOCKDEBUG_BARRIER(&tc->tc_mutex, 1); 264 l->l_kpriority = true; 265 sleepq_enqueue(sq, obj, "tstile", sobj); 266 267 /* 268 * lend our priority to lwps on the blocking chain. 269 */ 270 prio = lwp_eprio(l); 271 for (;;) { 272 bool dolock; 273 274 if (l->l_wchan == NULL) 275 break; 276 277 owner = (*l->l_syncobj->sobj_owner)(l->l_wchan); 278 if (owner == NULL) 279 break; 280 281 KASSERT(l != owner); 282 KASSERT(cur != owner); 283 284 if (l->l_mutex != owner->l_mutex) 285 dolock = true; 286 else 287 dolock = false; 288 if (dolock && !lwp_trylock(owner)) { 289 /* 290 * restart from curlwp. 291 */ 292 lwp_unlock(l); 293 l = cur; 294 lwp_lock(l); 295 prio = lwp_eprio(l); 296 continue; 297 } 298 if (prio <= lwp_eprio(owner)) { 299 if (dolock) 300 lwp_unlock(owner); 301 break; 302 } 303 ts = l->l_ts; 304 KASSERT(ts->ts_inheritor == owner || ts->ts_inheritor == NULL); 305 if (ts->ts_inheritor == NULL) { 306 ts->ts_inheritor = owner; 307 ts->ts_eprio = prio; 308 SLIST_INSERT_HEAD(&owner->l_pi_lenders, ts, ts_pichain); 309 lwp_lendpri(owner, prio); 310 } else if (prio > ts->ts_eprio) { 311 ts->ts_eprio = prio; 312 lwp_lendpri(owner, prio); 313 } 314 if (dolock) 315 lwp_unlock(l); 316 l = owner; 317 } 318 LOCKDEBUG_BARRIER(l->l_mutex, 1); 319 if (cur->l_mutex != l->l_mutex) { 320 lwp_unlock(l); 321 lwp_lock(cur); 322 } 323 LOCKDEBUG_BARRIER(cur->l_mutex, 1); 324 325 sleepq_block(0, false); 326 } 327 328 /* 329 * turnstile_wakeup: 330 * 331 * Wake up the specified number of threads that are blocked 332 * in a turnstile. 333 */ 334 void 335 turnstile_wakeup(turnstile_t *ts, int q, int count, lwp_t *nl) 336 { 337 sleepq_t *sq; 338 tschain_t *tc; 339 lwp_t *l; 340 341 tc = &turnstile_tab[TS_HASH(ts->ts_obj)]; 342 sq = &ts->ts_sleepq[q]; 343 344 KASSERT(q == TS_READER_Q || q == TS_WRITER_Q); 345 KASSERT(count > 0 && count <= TS_WAITERS(ts, q)); 346 KASSERT(mutex_owned(&tc->tc_mutex) && sq->sq_mutex == &tc->tc_mutex); 347 KASSERT(ts->ts_inheritor == curlwp || ts->ts_inheritor == NULL); 348 349 /* 350 * restore inherited priority if necessary. 351 */ 352 353 if (ts->ts_inheritor != NULL) { 354 turnstile_t *iter; 355 turnstile_t *next; 356 turnstile_t *prev = NULL; 357 pri_t prio; 358 bool dolock; 359 360 ts->ts_inheritor = NULL; 361 l = curlwp; 362 363 dolock = l->l_mutex == &l->l_cpu->ci_schedstate.spc_lwplock; 364 if (dolock) { 365 lwp_lock(l); 366 } 367 368 /* 369 * the following loop does two things. 370 * 371 * - remove ts from the list. 372 * 373 * - from the rest of the list, find the highest priority. 374 */ 375 376 prio = -1; 377 KASSERT(!SLIST_EMPTY(&l->l_pi_lenders)); 378 for (iter = SLIST_FIRST(&l->l_pi_lenders); 379 iter != NULL; iter = next) { 380 KASSERT(lwp_eprio(l) >= ts->ts_eprio); 381 next = SLIST_NEXT(iter, ts_pichain); 382 if (iter == ts) { 383 if (prev == NULL) { 384 SLIST_REMOVE_HEAD(&l->l_pi_lenders, 385 ts_pichain); 386 } else { 387 SLIST_REMOVE_AFTER(prev, ts_pichain); 388 } 389 } else if (prio < iter->ts_eprio) { 390 prio = iter->ts_eprio; 391 } 392 prev = iter; 393 } 394 395 lwp_lendpri(l, prio); 396 397 if (dolock) { 398 lwp_unlock(l); 399 } 400 } 401 402 if (nl != NULL) { 403 #if defined(DEBUG) || defined(LOCKDEBUG) 404 TAILQ_FOREACH(l, &sq->sq_queue, l_sleepchain) { 405 if (l == nl) 406 break; 407 } 408 if (l == NULL) 409 panic("turnstile_wakeup: nl not on sleepq"); 410 #endif 411 turnstile_remove(ts, nl, sq); 412 } else { 413 while (count-- > 0) { 414 l = TAILQ_FIRST(&sq->sq_queue); 415 KASSERT(l != NULL); 416 turnstile_remove(ts, l, sq); 417 } 418 } 419 mutex_spin_exit(&tc->tc_mutex); 420 } 421 422 /* 423 * turnstile_unsleep: 424 * 425 * Remove an LWP from the turnstile. This is called when the LWP has 426 * not been awoken normally but instead interrupted: for example, if it 427 * has received a signal. It's not a valid action for turnstiles, 428 * since LWPs blocking on a turnstile are not interruptable. 429 */ 430 void 431 turnstile_unsleep(lwp_t *l) 432 { 433 434 lwp_unlock(l); 435 panic("turnstile_unsleep"); 436 } 437 438 /* 439 * turnstile_changepri: 440 * 441 * Adjust the priority of an LWP residing on a turnstile. 442 */ 443 void 444 turnstile_changepri(lwp_t *l, pri_t pri) 445 { 446 447 /* XXX priority inheritance */ 448 sleepq_changepri(l, pri); 449 } 450 451 #if defined(LOCKDEBUG) 452 /* 453 * turnstile_print: 454 * 455 * Given the address of a lock object, print the contents of a 456 * turnstile. 457 */ 458 void 459 turnstile_print(volatile void *obj, void (*pr)(const char *, ...)) 460 { 461 turnstile_t *ts; 462 tschain_t *tc; 463 sleepq_t *rsq, *wsq; 464 lwp_t *l; 465 466 tc = &turnstile_tab[TS_HASH(obj)]; 467 468 LIST_FOREACH(ts, &tc->tc_chain, ts_chain) 469 if (ts->ts_obj == obj) 470 break; 471 472 (*pr)("Turnstile chain at %p.\n", tc); 473 if (ts == NULL) { 474 (*pr)("=> No active turnstile for this lock.\n"); 475 return; 476 } 477 478 rsq = &ts->ts_sleepq[TS_READER_Q]; 479 wsq = &ts->ts_sleepq[TS_WRITER_Q]; 480 481 (*pr)("=> Turnstile at %p (wrq=%p, rdq=%p).\n", ts, rsq, wsq); 482 483 (*pr)("=> %d waiting readers:", rsq->sq_waiters); 484 TAILQ_FOREACH(l, &rsq->sq_queue, l_sleepchain) { 485 (*pr)(" %p", l); 486 } 487 (*pr)("\n"); 488 489 (*pr)("=> %d waiting writers:", wsq->sq_waiters); 490 TAILQ_FOREACH(l, &wsq->sq_queue, l_sleepchain) { 491 (*pr)(" %p", l); 492 } 493 (*pr)("\n"); 494 } 495 #endif /* LOCKDEBUG */ 496