1 /* $NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 34 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. The name of the author may not be used to endorse or promote products 47 * derived from this software without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 50 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 51 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 52 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 53 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 54 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 55 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 56 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 57 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 58 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.73 2022/10/29 00:19:21 riastradh Exp $"); 63 64 /* 65 * Timeouts are kept in a hierarchical timing wheel. The c_time is the 66 * value of c_cpu->cc_ticks when the timeout should be called. There are 67 * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and 68 * Hierarchical Timing Wheels: Efficient Data Structures for Implementing 69 * a Timer Facility" by George Varghese and Tony Lauck. 70 * 71 * Some of the "math" in here is a bit tricky. We have to beware of 72 * wrapping ints. 73 * 74 * We use the fact that any element added to the queue must be added with 75 * a positive time. That means that any element `to' on the queue cannot 76 * be scheduled to timeout further in time than INT_MAX, but c->c_time can 77 * be positive or negative so comparing it with anything is dangerous. 78 * The only way we can use the c->c_time value in any predictable way is 79 * when we calculate how far in the future `to' will timeout - "c->c_time 80 * - c->c_cpu->cc_ticks". The result will always be positive for future 81 * timeouts and 0 or negative for due timeouts. 82 */ 83 84 #define _CALLOUT_PRIVATE 85 86 #include <sys/param.h> 87 #include <sys/systm.h> 88 #include <sys/kernel.h> 89 #include <sys/callout.h> 90 #include <sys/lwp.h> 91 #include <sys/mutex.h> 92 #include <sys/proc.h> 93 #include <sys/sleepq.h> 94 #include <sys/syncobj.h> 95 #include <sys/evcnt.h> 96 #include <sys/intr.h> 97 #include <sys/cpu.h> 98 #include <sys/kmem.h> 99 #include <sys/sdt.h> 100 101 #ifdef DDB 102 #include <machine/db_machdep.h> 103 #include <ddb/db_interface.h> 104 #include <ddb/db_access.h> 105 #include <ddb/db_cpu.h> 106 #include <ddb/db_sym.h> 107 #include <ddb/db_output.h> 108 #endif 109 110 #define BUCKETS 1024 111 #define WHEELSIZE 256 112 #define WHEELMASK 255 113 #define WHEELBITS 8 114 115 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 116 117 #define BUCKET(cc, rel, abs) \ 118 (((rel) <= (1 << (2*WHEELBITS))) \ 119 ? ((rel) <= (1 << WHEELBITS)) \ 120 ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \ 121 : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ 122 : ((rel) <= (1 << (3*WHEELBITS))) \ 123 ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ 124 : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 125 126 #define MOVEBUCKET(cc, wheel, time) \ 127 CIRCQ_APPEND(&(cc)->cc_todo, \ 128 &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 129 130 /* 131 * Circular queue definitions. 132 */ 133 134 #define CIRCQ_INIT(list) \ 135 do { \ 136 (list)->cq_next_l = (list); \ 137 (list)->cq_prev_l = (list); \ 138 } while (/*CONSTCOND*/0) 139 140 #define CIRCQ_INSERT(elem, list) \ 141 do { \ 142 (elem)->cq_prev_e = (list)->cq_prev_e; \ 143 (elem)->cq_next_l = (list); \ 144 (list)->cq_prev_l->cq_next_l = (elem); \ 145 (list)->cq_prev_l = (elem); \ 146 } while (/*CONSTCOND*/0) 147 148 #define CIRCQ_APPEND(fst, snd) \ 149 do { \ 150 if (!CIRCQ_EMPTY(snd)) { \ 151 (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l; \ 152 (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l; \ 153 (snd)->cq_prev_l->cq_next_l = (fst); \ 154 (fst)->cq_prev_l = (snd)->cq_prev_l; \ 155 CIRCQ_INIT(snd); \ 156 } \ 157 } while (/*CONSTCOND*/0) 158 159 #define CIRCQ_REMOVE(elem) \ 160 do { \ 161 (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e; \ 162 (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e; \ 163 } while (/*CONSTCOND*/0) 164 165 #define CIRCQ_FIRST(list) ((list)->cq_next_e) 166 #define CIRCQ_NEXT(elem) ((elem)->cq_next_e) 167 #define CIRCQ_LAST(elem,list) ((elem)->cq_next_l == (list)) 168 #define CIRCQ_EMPTY(list) ((list)->cq_next_l == (list)) 169 170 struct callout_cpu { 171 kmutex_t *cc_lock; 172 sleepq_t cc_sleepq; 173 u_int cc_nwait; 174 u_int cc_ticks; 175 lwp_t *cc_lwp; 176 callout_impl_t *cc_active; 177 callout_impl_t *cc_cancel; 178 struct evcnt cc_ev_late; 179 struct evcnt cc_ev_block; 180 struct callout_circq cc_todo; /* Worklist */ 181 struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */ 182 char cc_name1[12]; 183 char cc_name2[12]; 184 struct cpu_info *cc_cpu; 185 }; 186 187 #ifdef DDB 188 static struct callout_cpu ccb; 189 #endif 190 191 #ifndef CRASH /* _KERNEL */ 192 static void callout_softclock(void *); 193 static void callout_wait(callout_impl_t *, void *, kmutex_t *); 194 195 static struct callout_cpu callout_cpu0 __cacheline_aligned; 196 static void *callout_sih __read_mostly; 197 198 SDT_PROBE_DEFINE2(sdt, kernel, callout, init, 199 "struct callout *"/*ch*/, 200 "unsigned"/*flags*/); 201 SDT_PROBE_DEFINE1(sdt, kernel, callout, destroy, 202 "struct callout *"/*ch*/); 203 SDT_PROBE_DEFINE4(sdt, kernel, callout, setfunc, 204 "struct callout *"/*ch*/, 205 "void (*)(void *)"/*func*/, 206 "void *"/*arg*/, 207 "unsigned"/*flags*/); 208 SDT_PROBE_DEFINE5(sdt, kernel, callout, schedule, 209 "struct callout *"/*ch*/, 210 "void (*)(void *)"/*func*/, 211 "void *"/*arg*/, 212 "unsigned"/*flags*/, 213 "int"/*ticks*/); 214 SDT_PROBE_DEFINE6(sdt, kernel, callout, migrate, 215 "struct callout *"/*ch*/, 216 "void (*)(void *)"/*func*/, 217 "void *"/*arg*/, 218 "unsigned"/*flags*/, 219 "struct cpu_info *"/*ocpu*/, 220 "struct cpu_info *"/*ncpu*/); 221 SDT_PROBE_DEFINE4(sdt, kernel, callout, entry, 222 "struct callout *"/*ch*/, 223 "void (*)(void *)"/*func*/, 224 "void *"/*arg*/, 225 "unsigned"/*flags*/); 226 SDT_PROBE_DEFINE4(sdt, kernel, callout, return, 227 "struct callout *"/*ch*/, 228 "void (*)(void *)"/*func*/, 229 "void *"/*arg*/, 230 "unsigned"/*flags*/); 231 SDT_PROBE_DEFINE5(sdt, kernel, callout, stop, 232 "struct callout *"/*ch*/, 233 "void (*)(void *)"/*func*/, 234 "void *"/*arg*/, 235 "unsigned"/*flags*/, 236 "bool"/*expired*/); 237 SDT_PROBE_DEFINE4(sdt, kernel, callout, halt, 238 "struct callout *"/*ch*/, 239 "void (*)(void *)"/*func*/, 240 "void *"/*arg*/, 241 "unsigned"/*flags*/); 242 SDT_PROBE_DEFINE5(sdt, kernel, callout, halt__done, 243 "struct callout *"/*ch*/, 244 "void (*)(void *)"/*func*/, 245 "void *"/*arg*/, 246 "unsigned"/*flags*/, 247 "bool"/*expired*/); 248 249 static inline kmutex_t * 250 callout_lock(callout_impl_t *c) 251 { 252 struct callout_cpu *cc; 253 kmutex_t *lock; 254 255 for (;;) { 256 cc = c->c_cpu; 257 lock = cc->cc_lock; 258 mutex_spin_enter(lock); 259 if (__predict_true(cc == c->c_cpu)) 260 return lock; 261 mutex_spin_exit(lock); 262 } 263 } 264 265 /* 266 * callout_startup: 267 * 268 * Initialize the callout facility, called at system startup time. 269 * Do just enough to allow callouts to be safely registered. 270 */ 271 void 272 callout_startup(void) 273 { 274 struct callout_cpu *cc; 275 int b; 276 277 KASSERT(curcpu()->ci_data.cpu_callout == NULL); 278 279 cc = &callout_cpu0; 280 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 281 CIRCQ_INIT(&cc->cc_todo); 282 for (b = 0; b < BUCKETS; b++) 283 CIRCQ_INIT(&cc->cc_wheel[b]); 284 curcpu()->ci_data.cpu_callout = cc; 285 } 286 287 /* 288 * callout_init_cpu: 289 * 290 * Per-CPU initialization. 291 */ 292 CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); 293 294 void 295 callout_init_cpu(struct cpu_info *ci) 296 { 297 struct callout_cpu *cc; 298 int b; 299 300 if ((cc = ci->ci_data.cpu_callout) == NULL) { 301 cc = kmem_zalloc(sizeof(*cc), KM_SLEEP); 302 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 303 CIRCQ_INIT(&cc->cc_todo); 304 for (b = 0; b < BUCKETS; b++) 305 CIRCQ_INIT(&cc->cc_wheel[b]); 306 } else { 307 /* Boot CPU, one time only. */ 308 callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, 309 callout_softclock, NULL); 310 if (callout_sih == NULL) 311 panic("callout_init_cpu (2)"); 312 } 313 314 sleepq_init(&cc->cc_sleepq); 315 316 snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u", 317 cpu_index(ci)); 318 evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC, 319 NULL, "callout", cc->cc_name1); 320 321 snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u", 322 cpu_index(ci)); 323 evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC, 324 NULL, "callout", cc->cc_name2); 325 326 cc->cc_cpu = ci; 327 ci->ci_data.cpu_callout = cc; 328 } 329 330 /* 331 * callout_init: 332 * 333 * Initialize a callout structure. This must be quick, so we fill 334 * only the minimum number of fields. 335 */ 336 void 337 callout_init(callout_t *cs, u_int flags) 338 { 339 callout_impl_t *c = (callout_impl_t *)cs; 340 struct callout_cpu *cc; 341 342 KASSERT((flags & ~CALLOUT_FLAGMASK) == 0); 343 344 SDT_PROBE2(sdt, kernel, callout, init, cs, flags); 345 346 cc = curcpu()->ci_data.cpu_callout; 347 c->c_func = NULL; 348 c->c_magic = CALLOUT_MAGIC; 349 if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) { 350 c->c_flags = flags; 351 c->c_cpu = cc; 352 return; 353 } 354 c->c_flags = flags | CALLOUT_BOUND; 355 c->c_cpu = &callout_cpu0; 356 } 357 358 /* 359 * callout_destroy: 360 * 361 * Destroy a callout structure. The callout must be stopped. 362 */ 363 void 364 callout_destroy(callout_t *cs) 365 { 366 callout_impl_t *c = (callout_impl_t *)cs; 367 368 SDT_PROBE1(sdt, kernel, callout, destroy, cs); 369 370 KASSERTMSG(c->c_magic == CALLOUT_MAGIC, 371 "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)", 372 c, c->c_magic, CALLOUT_MAGIC); 373 /* 374 * It's not necessary to lock in order to see the correct value 375 * of c->c_flags. If the callout could potentially have been 376 * running, the current thread should have stopped it. 377 */ 378 KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0, 379 "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 380 c, c->c_func, c->c_flags, __builtin_return_address(0)); 381 KASSERTMSG(c->c_cpu->cc_lwp == curlwp || c->c_cpu->cc_active != c, 382 "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 383 c, c->c_func, c->c_flags, __builtin_return_address(0)); 384 c->c_magic = 0; 385 } 386 387 /* 388 * callout_schedule_locked: 389 * 390 * Schedule a callout to run. The function and argument must 391 * already be set in the callout structure. Must be called with 392 * callout_lock. 393 */ 394 static void 395 callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) 396 { 397 struct callout_cpu *cc, *occ; 398 int old_time; 399 400 SDT_PROBE5(sdt, kernel, callout, schedule, 401 c, c->c_func, c->c_arg, c->c_flags, to_ticks); 402 403 KASSERT(to_ticks >= 0); 404 KASSERT(c->c_func != NULL); 405 406 /* Initialize the time here, it won't change. */ 407 occ = c->c_cpu; 408 c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING); 409 410 /* 411 * If this timeout is already scheduled and now is moved 412 * earlier, reschedule it now. Otherwise leave it in place 413 * and let it be rescheduled later. 414 */ 415 if ((c->c_flags & CALLOUT_PENDING) != 0) { 416 /* Leave on existing CPU. */ 417 old_time = c->c_time; 418 c->c_time = to_ticks + occ->cc_ticks; 419 if (c->c_time - old_time < 0) { 420 CIRCQ_REMOVE(&c->c_list); 421 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 422 } 423 mutex_spin_exit(lock); 424 return; 425 } 426 427 cc = curcpu()->ci_data.cpu_callout; 428 if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || 429 !mutex_tryenter(cc->cc_lock)) { 430 /* Leave on existing CPU. */ 431 c->c_time = to_ticks + occ->cc_ticks; 432 c->c_flags |= CALLOUT_PENDING; 433 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 434 } else { 435 /* Move to this CPU. */ 436 c->c_cpu = cc; 437 c->c_time = to_ticks + cc->cc_ticks; 438 c->c_flags |= CALLOUT_PENDING; 439 CIRCQ_INSERT(&c->c_list, &cc->cc_todo); 440 mutex_spin_exit(cc->cc_lock); 441 SDT_PROBE6(sdt, kernel, callout, migrate, 442 c, c->c_func, c->c_arg, c->c_flags, 443 occ->cc_cpu, cc->cc_cpu); 444 } 445 mutex_spin_exit(lock); 446 } 447 448 /* 449 * callout_reset: 450 * 451 * Reset a callout structure with a new function and argument, and 452 * schedule it to run. 453 */ 454 void 455 callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg) 456 { 457 callout_impl_t *c = (callout_impl_t *)cs; 458 kmutex_t *lock; 459 460 KASSERT(c->c_magic == CALLOUT_MAGIC); 461 KASSERT(func != NULL); 462 463 lock = callout_lock(c); 464 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 465 c->c_func = func; 466 c->c_arg = arg; 467 callout_schedule_locked(c, lock, to_ticks); 468 } 469 470 /* 471 * callout_schedule: 472 * 473 * Schedule a callout to run. The function and argument must 474 * already be set in the callout structure. 475 */ 476 void 477 callout_schedule(callout_t *cs, int to_ticks) 478 { 479 callout_impl_t *c = (callout_impl_t *)cs; 480 kmutex_t *lock; 481 482 KASSERT(c->c_magic == CALLOUT_MAGIC); 483 484 lock = callout_lock(c); 485 callout_schedule_locked(c, lock, to_ticks); 486 } 487 488 /* 489 * callout_stop: 490 * 491 * Try to cancel a pending callout. It may be too late: the callout 492 * could be running on another CPU. If called from interrupt context, 493 * the callout could already be in progress at a lower priority. 494 */ 495 bool 496 callout_stop(callout_t *cs) 497 { 498 callout_impl_t *c = (callout_impl_t *)cs; 499 struct callout_cpu *cc; 500 kmutex_t *lock; 501 bool expired; 502 503 KASSERT(c->c_magic == CALLOUT_MAGIC); 504 505 lock = callout_lock(c); 506 507 if ((c->c_flags & CALLOUT_PENDING) != 0) 508 CIRCQ_REMOVE(&c->c_list); 509 expired = ((c->c_flags & CALLOUT_FIRED) != 0); 510 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 511 512 cc = c->c_cpu; 513 if (cc->cc_active == c) { 514 /* 515 * This is for non-MPSAFE callouts only. To synchronize 516 * effectively we must be called with kernel_lock held. 517 * It's also taken in callout_softclock. 518 */ 519 cc->cc_cancel = c; 520 } 521 522 SDT_PROBE5(sdt, kernel, callout, stop, 523 c, c->c_func, c->c_arg, c->c_flags, expired); 524 525 mutex_spin_exit(lock); 526 527 return expired; 528 } 529 530 /* 531 * callout_halt: 532 * 533 * Cancel a pending callout. If in-flight, block until it completes. 534 * May not be called from a hard interrupt handler. If the callout 535 * can take locks, the caller of callout_halt() must not hold any of 536 * those locks, otherwise the two could deadlock. If 'interlock' is 537 * non-NULL and we must wait for the callout to complete, it will be 538 * released and re-acquired before returning. 539 */ 540 bool 541 callout_halt(callout_t *cs, void *interlock) 542 { 543 callout_impl_t *c = (callout_impl_t *)cs; 544 kmutex_t *lock; 545 int flags; 546 547 KASSERT(c->c_magic == CALLOUT_MAGIC); 548 KASSERT(!cpu_intr_p()); 549 KASSERT(interlock == NULL || mutex_owned(interlock)); 550 551 /* Fast path. */ 552 lock = callout_lock(c); 553 SDT_PROBE4(sdt, kernel, callout, halt, 554 c, c->c_func, c->c_arg, c->c_flags); 555 flags = c->c_flags; 556 if ((flags & CALLOUT_PENDING) != 0) 557 CIRCQ_REMOVE(&c->c_list); 558 c->c_flags = flags & ~(CALLOUT_PENDING|CALLOUT_FIRED); 559 if (__predict_false(flags & CALLOUT_FIRED)) { 560 callout_wait(c, interlock, lock); 561 return true; 562 } 563 SDT_PROBE5(sdt, kernel, callout, halt__done, 564 c, c->c_func, c->c_arg, c->c_flags, /*expired*/false); 565 mutex_spin_exit(lock); 566 return false; 567 } 568 569 /* 570 * callout_wait: 571 * 572 * Slow path for callout_halt(). Deliberately marked __noinline to 573 * prevent unneeded overhead in the caller. 574 */ 575 static void __noinline 576 callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock) 577 { 578 struct callout_cpu *cc; 579 struct lwp *l; 580 kmutex_t *relock; 581 582 l = curlwp; 583 relock = NULL; 584 for (;;) { 585 /* 586 * At this point we know the callout is not pending, but it 587 * could be running on a CPU somewhere. That can be curcpu 588 * in a few cases: 589 * 590 * - curlwp is a higher priority soft interrupt 591 * - the callout blocked on a lock and is currently asleep 592 * - the callout itself has called callout_halt() (nice!) 593 */ 594 cc = c->c_cpu; 595 if (__predict_true(cc->cc_active != c || cc->cc_lwp == l)) 596 break; 597 598 /* It's running - need to wait for it to complete. */ 599 if (interlock != NULL) { 600 /* 601 * Avoid potential scheduler lock order problems by 602 * dropping the interlock without the callout lock 603 * held; then retry. 604 */ 605 mutex_spin_exit(lock); 606 mutex_exit(interlock); 607 relock = interlock; 608 interlock = NULL; 609 } else { 610 /* XXX Better to do priority inheritance. */ 611 KASSERT(l->l_wchan == NULL); 612 cc->cc_nwait++; 613 cc->cc_ev_block.ev_count++; 614 l->l_kpriority = true; 615 sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock); 616 sleepq_enqueue(&cc->cc_sleepq, cc, "callout", 617 &sleep_syncobj, false); 618 sleepq_block(0, false, &sleep_syncobj); 619 } 620 621 /* 622 * Re-lock the callout and check the state of play again. 623 * It's a common design pattern for callouts to re-schedule 624 * themselves so put a stop to it again if needed. 625 */ 626 lock = callout_lock(c); 627 if ((c->c_flags & CALLOUT_PENDING) != 0) 628 CIRCQ_REMOVE(&c->c_list); 629 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 630 } 631 632 SDT_PROBE5(sdt, kernel, callout, halt__done, 633 c, c->c_func, c->c_arg, c->c_flags, /*expired*/true); 634 635 mutex_spin_exit(lock); 636 if (__predict_false(relock != NULL)) 637 mutex_enter(relock); 638 } 639 640 #ifdef notyet 641 /* 642 * callout_bind: 643 * 644 * Bind a callout so that it will only execute on one CPU. 645 * The callout must be stopped, and must be MPSAFE. 646 * 647 * XXX Disabled for now until it is decided how to handle 648 * offlined CPUs. We may want weak+strong binding. 649 */ 650 void 651 callout_bind(callout_t *cs, struct cpu_info *ci) 652 { 653 callout_impl_t *c = (callout_impl_t *)cs; 654 struct callout_cpu *cc; 655 kmutex_t *lock; 656 657 KASSERT((c->c_flags & CALLOUT_PENDING) == 0); 658 KASSERT(c->c_cpu->cc_active != c); 659 KASSERT(c->c_magic == CALLOUT_MAGIC); 660 KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0); 661 662 lock = callout_lock(c); 663 cc = ci->ci_data.cpu_callout; 664 c->c_flags |= CALLOUT_BOUND; 665 if (c->c_cpu != cc) { 666 /* 667 * Assigning c_cpu effectively unlocks the callout 668 * structure, as we don't hold the new CPU's lock. 669 * Issue memory barrier to prevent accesses being 670 * reordered. 671 */ 672 membar_exit(); 673 c->c_cpu = cc; 674 } 675 mutex_spin_exit(lock); 676 } 677 #endif 678 679 void 680 callout_setfunc(callout_t *cs, void (*func)(void *), void *arg) 681 { 682 callout_impl_t *c = (callout_impl_t *)cs; 683 kmutex_t *lock; 684 685 KASSERT(c->c_magic == CALLOUT_MAGIC); 686 KASSERT(func != NULL); 687 688 lock = callout_lock(c); 689 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 690 c->c_func = func; 691 c->c_arg = arg; 692 mutex_spin_exit(lock); 693 } 694 695 bool 696 callout_expired(callout_t *cs) 697 { 698 callout_impl_t *c = (callout_impl_t *)cs; 699 kmutex_t *lock; 700 bool rv; 701 702 KASSERT(c->c_magic == CALLOUT_MAGIC); 703 704 lock = callout_lock(c); 705 rv = ((c->c_flags & CALLOUT_FIRED) != 0); 706 mutex_spin_exit(lock); 707 708 return rv; 709 } 710 711 bool 712 callout_active(callout_t *cs) 713 { 714 callout_impl_t *c = (callout_impl_t *)cs; 715 kmutex_t *lock; 716 bool rv; 717 718 KASSERT(c->c_magic == CALLOUT_MAGIC); 719 720 lock = callout_lock(c); 721 rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0); 722 mutex_spin_exit(lock); 723 724 return rv; 725 } 726 727 bool 728 callout_pending(callout_t *cs) 729 { 730 callout_impl_t *c = (callout_impl_t *)cs; 731 kmutex_t *lock; 732 bool rv; 733 734 KASSERT(c->c_magic == CALLOUT_MAGIC); 735 736 lock = callout_lock(c); 737 rv = ((c->c_flags & CALLOUT_PENDING) != 0); 738 mutex_spin_exit(lock); 739 740 return rv; 741 } 742 743 bool 744 callout_invoking(callout_t *cs) 745 { 746 callout_impl_t *c = (callout_impl_t *)cs; 747 kmutex_t *lock; 748 bool rv; 749 750 KASSERT(c->c_magic == CALLOUT_MAGIC); 751 752 lock = callout_lock(c); 753 rv = ((c->c_flags & CALLOUT_INVOKING) != 0); 754 mutex_spin_exit(lock); 755 756 return rv; 757 } 758 759 void 760 callout_ack(callout_t *cs) 761 { 762 callout_impl_t *c = (callout_impl_t *)cs; 763 kmutex_t *lock; 764 765 KASSERT(c->c_magic == CALLOUT_MAGIC); 766 767 lock = callout_lock(c); 768 c->c_flags &= ~CALLOUT_INVOKING; 769 mutex_spin_exit(lock); 770 } 771 772 /* 773 * callout_hardclock: 774 * 775 * Called from hardclock() once every tick. We schedule a soft 776 * interrupt if there is work to be done. 777 */ 778 void 779 callout_hardclock(void) 780 { 781 struct callout_cpu *cc; 782 int needsoftclock, ticks; 783 784 cc = curcpu()->ci_data.cpu_callout; 785 mutex_spin_enter(cc->cc_lock); 786 787 ticks = ++cc->cc_ticks; 788 789 MOVEBUCKET(cc, 0, ticks); 790 if (MASKWHEEL(0, ticks) == 0) { 791 MOVEBUCKET(cc, 1, ticks); 792 if (MASKWHEEL(1, ticks) == 0) { 793 MOVEBUCKET(cc, 2, ticks); 794 if (MASKWHEEL(2, ticks) == 0) 795 MOVEBUCKET(cc, 3, ticks); 796 } 797 } 798 799 needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo); 800 mutex_spin_exit(cc->cc_lock); 801 802 if (needsoftclock) 803 softint_schedule(callout_sih); 804 } 805 806 /* 807 * callout_softclock: 808 * 809 * Soft interrupt handler, scheduled above if there is work to 810 * be done. Callouts are made in soft interrupt context. 811 */ 812 static void 813 callout_softclock(void *v) 814 { 815 callout_impl_t *c; 816 struct callout_cpu *cc; 817 void (*func)(void *); 818 void *arg; 819 int mpsafe, count, ticks, delta; 820 u_int flags __unused; 821 lwp_t *l; 822 823 l = curlwp; 824 KASSERT(l->l_cpu == curcpu()); 825 cc = l->l_cpu->ci_data.cpu_callout; 826 827 mutex_spin_enter(cc->cc_lock); 828 cc->cc_lwp = l; 829 while (!CIRCQ_EMPTY(&cc->cc_todo)) { 830 c = CIRCQ_FIRST(&cc->cc_todo); 831 KASSERT(c->c_magic == CALLOUT_MAGIC); 832 KASSERT(c->c_func != NULL); 833 KASSERT(c->c_cpu == cc); 834 KASSERT((c->c_flags & CALLOUT_PENDING) != 0); 835 KASSERT((c->c_flags & CALLOUT_FIRED) == 0); 836 CIRCQ_REMOVE(&c->c_list); 837 838 /* If due run it, otherwise insert it into the right bucket. */ 839 ticks = cc->cc_ticks; 840 delta = (int)((unsigned)c->c_time - (unsigned)ticks); 841 if (delta > 0) { 842 CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time)); 843 continue; 844 } 845 if (delta < 0) 846 cc->cc_ev_late.ev_count++; 847 848 c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | 849 (CALLOUT_FIRED | CALLOUT_INVOKING); 850 mpsafe = (c->c_flags & CALLOUT_MPSAFE); 851 func = c->c_func; 852 arg = c->c_arg; 853 cc->cc_active = c; 854 flags = c->c_flags; 855 856 mutex_spin_exit(cc->cc_lock); 857 KASSERT(func != NULL); 858 SDT_PROBE4(sdt, kernel, callout, entry, c, func, arg, flags); 859 if (__predict_false(!mpsafe)) { 860 KERNEL_LOCK(1, NULL); 861 (*func)(arg); 862 KERNEL_UNLOCK_ONE(NULL); 863 } else 864 (*func)(arg); 865 SDT_PROBE4(sdt, kernel, callout, return, c, func, arg, flags); 866 KASSERTMSG(l->l_blcnt == 0, 867 "callout %p func %p leaked %d biglocks", 868 c, func, l->l_blcnt); 869 mutex_spin_enter(cc->cc_lock); 870 871 /* 872 * We can't touch 'c' here because it might be 873 * freed already. If LWPs waiting for callout 874 * to complete, awaken them. 875 */ 876 cc->cc_active = NULL; 877 if ((count = cc->cc_nwait) != 0) { 878 cc->cc_nwait = 0; 879 /* sleepq_wake() drops the lock. */ 880 sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock); 881 mutex_spin_enter(cc->cc_lock); 882 } 883 } 884 cc->cc_lwp = NULL; 885 mutex_spin_exit(cc->cc_lock); 886 } 887 #endif /* !CRASH */ 888 889 #ifdef DDB 890 static void 891 db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket, 892 struct callout_circq *bucket) 893 { 894 callout_impl_t *c, ci; 895 db_expr_t offset; 896 const char *name; 897 static char question[] = "?"; 898 int b; 899 900 if (CIRCQ_LAST(bucket, kbucket)) 901 return; 902 903 for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) { 904 db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci); 905 c = &ci; 906 db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name, 907 &offset); 908 name = name ? name : question; 909 b = (bucket - cc->cc_wheel); 910 if (b < 0) 911 b = -WHEELSIZE; 912 db_printf("%9d %2d/%-4d %16lx %s\n", 913 c->c_time - cc->cc_ticks, b / WHEELSIZE, b, 914 (u_long)c->c_arg, name); 915 if (CIRCQ_LAST(&c->c_list, kbucket)) 916 break; 917 } 918 } 919 920 void 921 db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) 922 { 923 struct callout_cpu *cc; 924 struct cpu_info *ci; 925 int b; 926 927 #ifndef CRASH 928 db_printf("hardclock_ticks now: %d\n", getticks()); 929 #endif 930 db_printf(" ticks wheel arg func\n"); 931 932 /* 933 * Don't lock the callwheel; all the other CPUs are paused 934 * anyhow, and we might be called in a circumstance where 935 * some other CPU was paused while holding the lock. 936 */ 937 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 938 db_read_bytes((db_addr_t)ci + 939 offsetof(struct cpu_info, ci_data.cpu_callout), 940 sizeof(cc), (char *)&cc); 941 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 942 db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo); 943 } 944 for (b = 0; b < BUCKETS; b++) { 945 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 946 db_read_bytes((db_addr_t)ci + 947 offsetof(struct cpu_info, ci_data.cpu_callout), 948 sizeof(cc), (char *)&cc); 949 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 950 db_show_callout_bucket(&ccb, &cc->cc_wheel[b], 951 &ccb.cc_wheel[b]); 952 } 953 } 954 } 955 #endif /* DDB */ 956