1 /* $NetBSD: kern_timeout.c,v 1.79 2023/10/08 13:23:05 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019, 2023 5 * The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Jason R. Thorpe, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 35 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 36 * All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. The name of the author may not be used to endorse or promote products 48 * derived from this software without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 51 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 52 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 53 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 54 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 55 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 56 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 57 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 58 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 59 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.79 2023/10/08 13:23:05 ad Exp $"); 64 65 /* 66 * Timeouts are kept in a hierarchical timing wheel. The c_time is the 67 * value of c_cpu->cc_ticks when the timeout should be called. There are 68 * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and 69 * Hierarchical Timing Wheels: Efficient Data Structures for Implementing 70 * a Timer Facility" by George Varghese and Tony Lauck. 71 * 72 * Some of the "math" in here is a bit tricky. We have to beware of 73 * wrapping ints. 74 * 75 * We use the fact that any element added to the queue must be added with 76 * a positive time. That means that any element `to' on the queue cannot 77 * be scheduled to timeout further in time than INT_MAX, but c->c_time can 78 * be positive or negative so comparing it with anything is dangerous. 79 * The only way we can use the c->c_time value in any predictable way is 80 * when we calculate how far in the future `to' will timeout - "c->c_time 81 * - c->c_cpu->cc_ticks". The result will always be positive for future 82 * timeouts and 0 or negative for due timeouts. 83 */ 84 85 #define _CALLOUT_PRIVATE 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/kernel.h> 90 #include <sys/callout.h> 91 #include <sys/lwp.h> 92 #include <sys/mutex.h> 93 #include <sys/proc.h> 94 #include <sys/sleepq.h> 95 #include <sys/syncobj.h> 96 #include <sys/evcnt.h> 97 #include <sys/intr.h> 98 #include <sys/cpu.h> 99 #include <sys/kmem.h> 100 #include <sys/sdt.h> 101 102 #ifdef DDB 103 #include <machine/db_machdep.h> 104 #include <ddb/db_interface.h> 105 #include <ddb/db_access.h> 106 #include <ddb/db_cpu.h> 107 #include <ddb/db_sym.h> 108 #include <ddb/db_output.h> 109 #endif 110 111 #define BUCKETS 1024 112 #define WHEELSIZE 256 113 #define WHEELMASK 255 114 #define WHEELBITS 8 115 116 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 117 118 #define BUCKET(cc, rel, abs) \ 119 (((rel) <= (1 << (2*WHEELBITS))) \ 120 ? ((rel) <= (1 << WHEELBITS)) \ 121 ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \ 122 : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ 123 : ((rel) <= (1 << (3*WHEELBITS))) \ 124 ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ 125 : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 126 127 #define MOVEBUCKET(cc, wheel, time) \ 128 CIRCQ_APPEND(&(cc)->cc_todo, \ 129 &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 130 131 /* 132 * Circular queue definitions. 133 */ 134 135 #define CIRCQ_INIT(list) \ 136 do { \ 137 (list)->cq_next_l = (list); \ 138 (list)->cq_prev_l = (list); \ 139 } while (/*CONSTCOND*/0) 140 141 #define CIRCQ_INSERT(elem, list) \ 142 do { \ 143 (elem)->cq_prev_e = (list)->cq_prev_e; \ 144 (elem)->cq_next_l = (list); \ 145 (list)->cq_prev_l->cq_next_l = (elem); \ 146 (list)->cq_prev_l = (elem); \ 147 } while (/*CONSTCOND*/0) 148 149 #define CIRCQ_APPEND(fst, snd) \ 150 do { \ 151 if (!CIRCQ_EMPTY(snd)) { \ 152 (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l; \ 153 (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l; \ 154 (snd)->cq_prev_l->cq_next_l = (fst); \ 155 (fst)->cq_prev_l = (snd)->cq_prev_l; \ 156 CIRCQ_INIT(snd); \ 157 } \ 158 } while (/*CONSTCOND*/0) 159 160 #define CIRCQ_REMOVE(elem) \ 161 do { \ 162 (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e; \ 163 (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e; \ 164 } while (/*CONSTCOND*/0) 165 166 #define CIRCQ_FIRST(list) ((list)->cq_next_e) 167 #define CIRCQ_NEXT(elem) ((elem)->cq_next_e) 168 #define CIRCQ_LAST(elem,list) ((elem)->cq_next_l == (list)) 169 #define CIRCQ_EMPTY(list) ((list)->cq_next_l == (list)) 170 171 struct callout_cpu { 172 kmutex_t *cc_lock; 173 sleepq_t cc_sleepq; 174 u_int cc_nwait; 175 u_int cc_ticks; 176 lwp_t *cc_lwp; 177 callout_impl_t *cc_active; 178 struct evcnt cc_ev_late; 179 struct evcnt cc_ev_block; 180 struct callout_circq cc_todo; /* Worklist */ 181 struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */ 182 char cc_name1[12]; 183 char cc_name2[12]; 184 struct cpu_info *cc_cpu; 185 }; 186 187 #ifdef DDB 188 static struct callout_cpu ccb; 189 #endif 190 191 #ifndef CRASH /* _KERNEL */ 192 static void callout_softclock(void *); 193 static void callout_wait(callout_impl_t *, void *, kmutex_t *); 194 195 static struct callout_cpu callout_cpu0 __cacheline_aligned; 196 static void *callout_sih __read_mostly; 197 198 SDT_PROBE_DEFINE2(sdt, kernel, callout, init, 199 "struct callout *"/*ch*/, 200 "unsigned"/*flags*/); 201 SDT_PROBE_DEFINE1(sdt, kernel, callout, destroy, 202 "struct callout *"/*ch*/); 203 SDT_PROBE_DEFINE4(sdt, kernel, callout, setfunc, 204 "struct callout *"/*ch*/, 205 "void (*)(void *)"/*func*/, 206 "void *"/*arg*/, 207 "unsigned"/*flags*/); 208 SDT_PROBE_DEFINE5(sdt, kernel, callout, schedule, 209 "struct callout *"/*ch*/, 210 "void (*)(void *)"/*func*/, 211 "void *"/*arg*/, 212 "unsigned"/*flags*/, 213 "int"/*ticks*/); 214 SDT_PROBE_DEFINE6(sdt, kernel, callout, migrate, 215 "struct callout *"/*ch*/, 216 "void (*)(void *)"/*func*/, 217 "void *"/*arg*/, 218 "unsigned"/*flags*/, 219 "struct cpu_info *"/*ocpu*/, 220 "struct cpu_info *"/*ncpu*/); 221 SDT_PROBE_DEFINE4(sdt, kernel, callout, entry, 222 "struct callout *"/*ch*/, 223 "void (*)(void *)"/*func*/, 224 "void *"/*arg*/, 225 "unsigned"/*flags*/); 226 SDT_PROBE_DEFINE4(sdt, kernel, callout, return, 227 "struct callout *"/*ch*/, 228 "void (*)(void *)"/*func*/, 229 "void *"/*arg*/, 230 "unsigned"/*flags*/); 231 SDT_PROBE_DEFINE5(sdt, kernel, callout, stop, 232 "struct callout *"/*ch*/, 233 "void (*)(void *)"/*func*/, 234 "void *"/*arg*/, 235 "unsigned"/*flags*/, 236 "bool"/*expired*/); 237 SDT_PROBE_DEFINE4(sdt, kernel, callout, halt, 238 "struct callout *"/*ch*/, 239 "void (*)(void *)"/*func*/, 240 "void *"/*arg*/, 241 "unsigned"/*flags*/); 242 SDT_PROBE_DEFINE5(sdt, kernel, callout, halt__done, 243 "struct callout *"/*ch*/, 244 "void (*)(void *)"/*func*/, 245 "void *"/*arg*/, 246 "unsigned"/*flags*/, 247 "bool"/*expired*/); 248 249 syncobj_t callout_syncobj = { 250 .sobj_name = "callout", 251 .sobj_flag = SOBJ_SLEEPQ_SORTED, 252 .sobj_boostpri = PRI_KERNEL, 253 .sobj_unsleep = sleepq_unsleep, 254 .sobj_changepri = sleepq_changepri, 255 .sobj_lendpri = sleepq_lendpri, 256 .sobj_owner = syncobj_noowner, 257 }; 258 259 static inline kmutex_t * 260 callout_lock(callout_impl_t *c) 261 { 262 struct callout_cpu *cc; 263 kmutex_t *lock; 264 265 for (;;) { 266 cc = c->c_cpu; 267 lock = cc->cc_lock; 268 mutex_spin_enter(lock); 269 if (__predict_true(cc == c->c_cpu)) 270 return lock; 271 mutex_spin_exit(lock); 272 } 273 } 274 275 /* 276 * Check if the callout is currently running on an LWP that isn't curlwp. 277 */ 278 static inline bool 279 callout_running_somewhere_else(callout_impl_t *c, struct callout_cpu *cc) 280 { 281 KASSERT(c->c_cpu == cc); 282 283 return cc->cc_active == c && cc->cc_lwp != curlwp; 284 } 285 286 /* 287 * callout_startup: 288 * 289 * Initialize the callout facility, called at system startup time. 290 * Do just enough to allow callouts to be safely registered. 291 */ 292 void 293 callout_startup(void) 294 { 295 struct callout_cpu *cc; 296 int b; 297 298 KASSERT(curcpu()->ci_data.cpu_callout == NULL); 299 300 cc = &callout_cpu0; 301 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 302 CIRCQ_INIT(&cc->cc_todo); 303 for (b = 0; b < BUCKETS; b++) 304 CIRCQ_INIT(&cc->cc_wheel[b]); 305 curcpu()->ci_data.cpu_callout = cc; 306 } 307 308 /* 309 * callout_init_cpu: 310 * 311 * Per-CPU initialization. 312 */ 313 CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); 314 315 void 316 callout_init_cpu(struct cpu_info *ci) 317 { 318 struct callout_cpu *cc; 319 int b; 320 321 if ((cc = ci->ci_data.cpu_callout) == NULL) { 322 cc = kmem_zalloc(sizeof(*cc), KM_SLEEP); 323 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 324 CIRCQ_INIT(&cc->cc_todo); 325 for (b = 0; b < BUCKETS; b++) 326 CIRCQ_INIT(&cc->cc_wheel[b]); 327 } else { 328 /* Boot CPU, one time only. */ 329 callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, 330 callout_softclock, NULL); 331 if (callout_sih == NULL) 332 panic("callout_init_cpu (2)"); 333 } 334 335 sleepq_init(&cc->cc_sleepq); 336 337 snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u", 338 cpu_index(ci)); 339 evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC, 340 NULL, "callout", cc->cc_name1); 341 342 snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u", 343 cpu_index(ci)); 344 evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC, 345 NULL, "callout", cc->cc_name2); 346 347 cc->cc_cpu = ci; 348 ci->ci_data.cpu_callout = cc; 349 } 350 351 /* 352 * callout_init: 353 * 354 * Initialize a callout structure. This must be quick, so we fill 355 * only the minimum number of fields. 356 */ 357 void 358 callout_init(callout_t *cs, u_int flags) 359 { 360 callout_impl_t *c = (callout_impl_t *)cs; 361 struct callout_cpu *cc; 362 363 KASSERT((flags & ~CALLOUT_FLAGMASK) == 0); 364 365 SDT_PROBE2(sdt, kernel, callout, init, cs, flags); 366 367 cc = curcpu()->ci_data.cpu_callout; 368 c->c_func = NULL; 369 c->c_magic = CALLOUT_MAGIC; 370 if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) { 371 c->c_flags = flags; 372 c->c_cpu = cc; 373 return; 374 } 375 c->c_flags = flags | CALLOUT_BOUND; 376 c->c_cpu = &callout_cpu0; 377 } 378 379 /* 380 * callout_destroy: 381 * 382 * Destroy a callout structure. The callout must be stopped. 383 */ 384 void 385 callout_destroy(callout_t *cs) 386 { 387 callout_impl_t *c = (callout_impl_t *)cs; 388 389 SDT_PROBE1(sdt, kernel, callout, destroy, cs); 390 391 KASSERTMSG(c->c_magic == CALLOUT_MAGIC, 392 "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)", 393 c, c->c_magic, CALLOUT_MAGIC); 394 /* 395 * It's not necessary to lock in order to see the correct value 396 * of c->c_flags. If the callout could potentially have been 397 * running, the current thread should have stopped it. 398 */ 399 KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0, 400 "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 401 c, c->c_func, c->c_flags, __builtin_return_address(0)); 402 KASSERTMSG(!callout_running_somewhere_else(c, c->c_cpu), 403 "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 404 c, c->c_func, c->c_flags, __builtin_return_address(0)); 405 c->c_magic = 0; 406 } 407 408 /* 409 * callout_schedule_locked: 410 * 411 * Schedule a callout to run. The function and argument must 412 * already be set in the callout structure. Must be called with 413 * callout_lock. 414 */ 415 static void 416 callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) 417 { 418 struct callout_cpu *cc, *occ; 419 int old_time; 420 421 SDT_PROBE5(sdt, kernel, callout, schedule, 422 c, c->c_func, c->c_arg, c->c_flags, to_ticks); 423 424 KASSERT(to_ticks >= 0); 425 KASSERT(c->c_func != NULL); 426 427 /* Initialize the time here, it won't change. */ 428 occ = c->c_cpu; 429 c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING); 430 431 /* 432 * If this timeout is already scheduled and now is moved 433 * earlier, reschedule it now. Otherwise leave it in place 434 * and let it be rescheduled later. 435 */ 436 if ((c->c_flags & CALLOUT_PENDING) != 0) { 437 /* Leave on existing CPU. */ 438 old_time = c->c_time; 439 c->c_time = to_ticks + occ->cc_ticks; 440 if (c->c_time - old_time < 0) { 441 CIRCQ_REMOVE(&c->c_list); 442 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 443 } 444 mutex_spin_exit(lock); 445 return; 446 } 447 448 cc = curcpu()->ci_data.cpu_callout; 449 if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || 450 !mutex_tryenter(cc->cc_lock)) { 451 /* Leave on existing CPU. */ 452 c->c_time = to_ticks + occ->cc_ticks; 453 c->c_flags |= CALLOUT_PENDING; 454 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 455 } else { 456 /* Move to this CPU. */ 457 c->c_cpu = cc; 458 c->c_time = to_ticks + cc->cc_ticks; 459 c->c_flags |= CALLOUT_PENDING; 460 CIRCQ_INSERT(&c->c_list, &cc->cc_todo); 461 mutex_spin_exit(cc->cc_lock); 462 SDT_PROBE6(sdt, kernel, callout, migrate, 463 c, c->c_func, c->c_arg, c->c_flags, 464 occ->cc_cpu, cc->cc_cpu); 465 } 466 mutex_spin_exit(lock); 467 } 468 469 /* 470 * callout_reset: 471 * 472 * Reset a callout structure with a new function and argument, and 473 * schedule it to run. 474 */ 475 void 476 callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg) 477 { 478 callout_impl_t *c = (callout_impl_t *)cs; 479 kmutex_t *lock; 480 481 KASSERT(c->c_magic == CALLOUT_MAGIC); 482 KASSERT(func != NULL); 483 484 lock = callout_lock(c); 485 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 486 c->c_func = func; 487 c->c_arg = arg; 488 callout_schedule_locked(c, lock, to_ticks); 489 } 490 491 /* 492 * callout_schedule: 493 * 494 * Schedule a callout to run. The function and argument must 495 * already be set in the callout structure. 496 */ 497 void 498 callout_schedule(callout_t *cs, int to_ticks) 499 { 500 callout_impl_t *c = (callout_impl_t *)cs; 501 kmutex_t *lock; 502 503 KASSERT(c->c_magic == CALLOUT_MAGIC); 504 505 lock = callout_lock(c); 506 callout_schedule_locked(c, lock, to_ticks); 507 } 508 509 /* 510 * callout_stop: 511 * 512 * Try to cancel a pending callout. It may be too late: the callout 513 * could be running on another CPU. If called from interrupt context, 514 * the callout could already be in progress at a lower priority. 515 */ 516 bool 517 callout_stop(callout_t *cs) 518 { 519 callout_impl_t *c = (callout_impl_t *)cs; 520 kmutex_t *lock; 521 bool expired; 522 523 KASSERT(c->c_magic == CALLOUT_MAGIC); 524 525 lock = callout_lock(c); 526 527 if ((c->c_flags & CALLOUT_PENDING) != 0) 528 CIRCQ_REMOVE(&c->c_list); 529 expired = ((c->c_flags & CALLOUT_FIRED) != 0); 530 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 531 532 SDT_PROBE5(sdt, kernel, callout, stop, 533 c, c->c_func, c->c_arg, c->c_flags, expired); 534 535 mutex_spin_exit(lock); 536 537 return expired; 538 } 539 540 /* 541 * callout_halt: 542 * 543 * Cancel a pending callout. If in-flight, block until it completes. 544 * May not be called from a hard interrupt handler. If the callout 545 * can take locks, the caller of callout_halt() must not hold any of 546 * those locks, otherwise the two could deadlock. If 'interlock' is 547 * non-NULL and we must wait for the callout to complete, it will be 548 * released and re-acquired before returning. 549 */ 550 bool 551 callout_halt(callout_t *cs, void *interlock) 552 { 553 callout_impl_t *c = (callout_impl_t *)cs; 554 kmutex_t *lock; 555 556 KASSERT(c->c_magic == CALLOUT_MAGIC); 557 KASSERT(!cpu_intr_p()); 558 KASSERT(interlock == NULL || mutex_owned(interlock)); 559 560 /* Fast path. */ 561 lock = callout_lock(c); 562 SDT_PROBE4(sdt, kernel, callout, halt, 563 c, c->c_func, c->c_arg, c->c_flags); 564 if ((c->c_flags & CALLOUT_PENDING) != 0) 565 CIRCQ_REMOVE(&c->c_list); 566 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 567 if (__predict_false(callout_running_somewhere_else(c, c->c_cpu))) { 568 callout_wait(c, interlock, lock); 569 return true; 570 } 571 SDT_PROBE5(sdt, kernel, callout, halt__done, 572 c, c->c_func, c->c_arg, c->c_flags, /*expired*/false); 573 mutex_spin_exit(lock); 574 return false; 575 } 576 577 /* 578 * callout_wait: 579 * 580 * Slow path for callout_halt(). Deliberately marked __noinline to 581 * prevent unneeded overhead in the caller. 582 */ 583 static void __noinline 584 callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock) 585 { 586 struct callout_cpu *cc; 587 struct lwp *l; 588 kmutex_t *relock; 589 int nlocks; 590 591 l = curlwp; 592 relock = NULL; 593 for (;;) { 594 /* 595 * At this point we know the callout is not pending, but it 596 * could be running on a CPU somewhere. That can be curcpu 597 * in a few cases: 598 * 599 * - curlwp is a higher priority soft interrupt 600 * - the callout blocked on a lock and is currently asleep 601 * - the callout itself has called callout_halt() (nice!) 602 */ 603 cc = c->c_cpu; 604 if (__predict_true(!callout_running_somewhere_else(c, cc))) 605 break; 606 607 /* It's running - need to wait for it to complete. */ 608 if (interlock != NULL) { 609 /* 610 * Avoid potential scheduler lock order problems by 611 * dropping the interlock without the callout lock 612 * held; then retry. 613 */ 614 mutex_spin_exit(lock); 615 mutex_exit(interlock); 616 relock = interlock; 617 interlock = NULL; 618 } else { 619 /* XXX Better to do priority inheritance. */ 620 KASSERT(l->l_wchan == NULL); 621 cc->cc_nwait++; 622 cc->cc_ev_block.ev_count++; 623 nlocks = sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock); 624 sleepq_enqueue(&cc->cc_sleepq, cc, "callout", 625 &callout_syncobj, false); 626 sleepq_block(0, false, &callout_syncobj, nlocks); 627 } 628 629 /* 630 * Re-lock the callout and check the state of play again. 631 * It's a common design pattern for callouts to re-schedule 632 * themselves so put a stop to it again if needed. 633 */ 634 lock = callout_lock(c); 635 if ((c->c_flags & CALLOUT_PENDING) != 0) 636 CIRCQ_REMOVE(&c->c_list); 637 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 638 } 639 640 SDT_PROBE5(sdt, kernel, callout, halt__done, 641 c, c->c_func, c->c_arg, c->c_flags, /*expired*/true); 642 643 mutex_spin_exit(lock); 644 if (__predict_false(relock != NULL)) 645 mutex_enter(relock); 646 } 647 648 #ifdef notyet 649 /* 650 * callout_bind: 651 * 652 * Bind a callout so that it will only execute on one CPU. 653 * The callout must be stopped, and must be MPSAFE. 654 * 655 * XXX Disabled for now until it is decided how to handle 656 * offlined CPUs. We may want weak+strong binding. 657 */ 658 void 659 callout_bind(callout_t *cs, struct cpu_info *ci) 660 { 661 callout_impl_t *c = (callout_impl_t *)cs; 662 struct callout_cpu *cc; 663 kmutex_t *lock; 664 665 KASSERT((c->c_flags & CALLOUT_PENDING) == 0); 666 KASSERT(c->c_cpu->cc_active != c); 667 KASSERT(c->c_magic == CALLOUT_MAGIC); 668 KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0); 669 670 lock = callout_lock(c); 671 cc = ci->ci_data.cpu_callout; 672 c->c_flags |= CALLOUT_BOUND; 673 if (c->c_cpu != cc) { 674 /* 675 * Assigning c_cpu effectively unlocks the callout 676 * structure, as we don't hold the new CPU's lock. 677 * Issue memory barrier to prevent accesses being 678 * reordered. 679 */ 680 membar_exit(); 681 c->c_cpu = cc; 682 } 683 mutex_spin_exit(lock); 684 } 685 #endif 686 687 void 688 callout_setfunc(callout_t *cs, void (*func)(void *), void *arg) 689 { 690 callout_impl_t *c = (callout_impl_t *)cs; 691 kmutex_t *lock; 692 693 KASSERT(c->c_magic == CALLOUT_MAGIC); 694 KASSERT(func != NULL); 695 696 lock = callout_lock(c); 697 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 698 c->c_func = func; 699 c->c_arg = arg; 700 mutex_spin_exit(lock); 701 } 702 703 bool 704 callout_expired(callout_t *cs) 705 { 706 callout_impl_t *c = (callout_impl_t *)cs; 707 kmutex_t *lock; 708 bool rv; 709 710 KASSERT(c->c_magic == CALLOUT_MAGIC); 711 712 lock = callout_lock(c); 713 rv = ((c->c_flags & CALLOUT_FIRED) != 0); 714 mutex_spin_exit(lock); 715 716 return rv; 717 } 718 719 bool 720 callout_active(callout_t *cs) 721 { 722 callout_impl_t *c = (callout_impl_t *)cs; 723 kmutex_t *lock; 724 bool rv; 725 726 KASSERT(c->c_magic == CALLOUT_MAGIC); 727 728 lock = callout_lock(c); 729 rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0); 730 mutex_spin_exit(lock); 731 732 return rv; 733 } 734 735 bool 736 callout_pending(callout_t *cs) 737 { 738 callout_impl_t *c = (callout_impl_t *)cs; 739 kmutex_t *lock; 740 bool rv; 741 742 KASSERT(c->c_magic == CALLOUT_MAGIC); 743 744 lock = callout_lock(c); 745 rv = ((c->c_flags & CALLOUT_PENDING) != 0); 746 mutex_spin_exit(lock); 747 748 return rv; 749 } 750 751 bool 752 callout_invoking(callout_t *cs) 753 { 754 callout_impl_t *c = (callout_impl_t *)cs; 755 kmutex_t *lock; 756 bool rv; 757 758 KASSERT(c->c_magic == CALLOUT_MAGIC); 759 760 lock = callout_lock(c); 761 rv = ((c->c_flags & CALLOUT_INVOKING) != 0); 762 mutex_spin_exit(lock); 763 764 return rv; 765 } 766 767 void 768 callout_ack(callout_t *cs) 769 { 770 callout_impl_t *c = (callout_impl_t *)cs; 771 kmutex_t *lock; 772 773 KASSERT(c->c_magic == CALLOUT_MAGIC); 774 775 lock = callout_lock(c); 776 c->c_flags &= ~CALLOUT_INVOKING; 777 mutex_spin_exit(lock); 778 } 779 780 /* 781 * callout_hardclock: 782 * 783 * Called from hardclock() once every tick. We schedule a soft 784 * interrupt if there is work to be done. 785 */ 786 void 787 callout_hardclock(void) 788 { 789 struct callout_cpu *cc; 790 int needsoftclock, ticks; 791 792 cc = curcpu()->ci_data.cpu_callout; 793 mutex_spin_enter(cc->cc_lock); 794 795 ticks = ++cc->cc_ticks; 796 797 MOVEBUCKET(cc, 0, ticks); 798 if (MASKWHEEL(0, ticks) == 0) { 799 MOVEBUCKET(cc, 1, ticks); 800 if (MASKWHEEL(1, ticks) == 0) { 801 MOVEBUCKET(cc, 2, ticks); 802 if (MASKWHEEL(2, ticks) == 0) 803 MOVEBUCKET(cc, 3, ticks); 804 } 805 } 806 807 needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo); 808 mutex_spin_exit(cc->cc_lock); 809 810 if (needsoftclock) 811 softint_schedule(callout_sih); 812 } 813 814 /* 815 * callout_softclock: 816 * 817 * Soft interrupt handler, scheduled above if there is work to 818 * be done. Callouts are made in soft interrupt context. 819 */ 820 static void 821 callout_softclock(void *v) 822 { 823 callout_impl_t *c; 824 struct callout_cpu *cc; 825 void (*func)(void *); 826 void *arg; 827 int mpsafe, count, ticks, delta; 828 u_int flags __unused; 829 lwp_t *l; 830 831 l = curlwp; 832 KASSERT(l->l_cpu == curcpu()); 833 cc = l->l_cpu->ci_data.cpu_callout; 834 835 mutex_spin_enter(cc->cc_lock); 836 cc->cc_lwp = l; 837 while (!CIRCQ_EMPTY(&cc->cc_todo)) { 838 c = CIRCQ_FIRST(&cc->cc_todo); 839 KASSERT(c->c_magic == CALLOUT_MAGIC); 840 KASSERT(c->c_func != NULL); 841 KASSERT(c->c_cpu == cc); 842 KASSERT((c->c_flags & CALLOUT_PENDING) != 0); 843 KASSERT((c->c_flags & CALLOUT_FIRED) == 0); 844 CIRCQ_REMOVE(&c->c_list); 845 846 /* If due run it, otherwise insert it into the right bucket. */ 847 ticks = cc->cc_ticks; 848 delta = (int)((unsigned)c->c_time - (unsigned)ticks); 849 if (delta > 0) { 850 CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time)); 851 continue; 852 } 853 if (delta < 0) 854 cc->cc_ev_late.ev_count++; 855 856 c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | 857 (CALLOUT_FIRED | CALLOUT_INVOKING); 858 mpsafe = (c->c_flags & CALLOUT_MPSAFE); 859 func = c->c_func; 860 arg = c->c_arg; 861 cc->cc_active = c; 862 flags = c->c_flags; 863 864 mutex_spin_exit(cc->cc_lock); 865 KASSERT(func != NULL); 866 SDT_PROBE4(sdt, kernel, callout, entry, c, func, arg, flags); 867 if (__predict_false(!mpsafe)) { 868 KERNEL_LOCK(1, NULL); 869 (*func)(arg); 870 KERNEL_UNLOCK_ONE(NULL); 871 } else 872 (*func)(arg); 873 SDT_PROBE4(sdt, kernel, callout, return, c, func, arg, flags); 874 KASSERTMSG(l->l_blcnt == 0, 875 "callout %p func %p leaked %d biglocks", 876 c, func, l->l_blcnt); 877 mutex_spin_enter(cc->cc_lock); 878 879 /* 880 * We can't touch 'c' here because it might be 881 * freed already. If LWPs waiting for callout 882 * to complete, awaken them. 883 */ 884 cc->cc_active = NULL; 885 if ((count = cc->cc_nwait) != 0) { 886 cc->cc_nwait = 0; 887 /* sleepq_wake() drops the lock. */ 888 sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock); 889 mutex_spin_enter(cc->cc_lock); 890 } 891 } 892 cc->cc_lwp = NULL; 893 mutex_spin_exit(cc->cc_lock); 894 } 895 #endif /* !CRASH */ 896 897 #ifdef DDB 898 static void 899 db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket, 900 struct callout_circq *bucket) 901 { 902 callout_impl_t *c, ci; 903 db_expr_t offset; 904 const char *name; 905 static char question[] = "?"; 906 int b; 907 908 if (CIRCQ_LAST(bucket, kbucket)) 909 return; 910 911 for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) { 912 db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci); 913 c = &ci; 914 db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name, 915 &offset); 916 name = name ? name : question; 917 b = (bucket - cc->cc_wheel); 918 if (b < 0) 919 b = -WHEELSIZE; 920 db_printf("%9d %2d/%-4d %16lx %s\n", 921 c->c_time - cc->cc_ticks, b / WHEELSIZE, b, 922 (u_long)c->c_arg, name); 923 if (CIRCQ_LAST(&c->c_list, kbucket)) 924 break; 925 } 926 } 927 928 void 929 db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) 930 { 931 struct callout_cpu *cc; 932 struct cpu_info *ci; 933 int b; 934 935 #ifndef CRASH 936 db_printf("hardclock_ticks now: %d\n", getticks()); 937 #endif 938 db_printf(" ticks wheel arg func\n"); 939 940 /* 941 * Don't lock the callwheel; all the other CPUs are paused 942 * anyhow, and we might be called in a circumstance where 943 * some other CPU was paused while holding the lock. 944 */ 945 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 946 db_read_bytes((db_addr_t)ci + 947 offsetof(struct cpu_info, ci_data.cpu_callout), 948 sizeof(cc), (char *)&cc); 949 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 950 db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo); 951 } 952 for (b = 0; b < BUCKETS; b++) { 953 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 954 db_read_bytes((db_addr_t)ci + 955 offsetof(struct cpu_info, ci_data.cpu_callout), 956 sizeof(cc), (char *)&cc); 957 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 958 db_show_callout_bucket(&ccb, &cc->cc_wheel[b], 959 &ccb.cc_wheel[b]); 960 } 961 } 962 } 963 #endif /* DDB */ 964