1 /* $NetBSD: kern_timeout.c,v 1.76 2023/06/27 01:19:44 pho Exp $ */ 2 3 /*- 4 * Copyright (c) 2003, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 34 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 35 * All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. The name of the author may not be used to endorse or promote products 47 * derived from this software without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 50 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 51 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 52 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 53 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 54 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 55 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 56 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 57 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 58 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59 */ 60 61 #include <sys/cdefs.h> 62 __KERNEL_RCSID(0, "$NetBSD: kern_timeout.c,v 1.76 2023/06/27 01:19:44 pho Exp $"); 63 64 /* 65 * Timeouts are kept in a hierarchical timing wheel. The c_time is the 66 * value of c_cpu->cc_ticks when the timeout should be called. There are 67 * four levels with 256 buckets each. See 'Scheme 7' in "Hashed and 68 * Hierarchical Timing Wheels: Efficient Data Structures for Implementing 69 * a Timer Facility" by George Varghese and Tony Lauck. 70 * 71 * Some of the "math" in here is a bit tricky. We have to beware of 72 * wrapping ints. 73 * 74 * We use the fact that any element added to the queue must be added with 75 * a positive time. That means that any element `to' on the queue cannot 76 * be scheduled to timeout further in time than INT_MAX, but c->c_time can 77 * be positive or negative so comparing it with anything is dangerous. 78 * The only way we can use the c->c_time value in any predictable way is 79 * when we calculate how far in the future `to' will timeout - "c->c_time 80 * - c->c_cpu->cc_ticks". The result will always be positive for future 81 * timeouts and 0 or negative for due timeouts. 82 */ 83 84 #define _CALLOUT_PRIVATE 85 86 #include <sys/param.h> 87 #include <sys/systm.h> 88 #include <sys/kernel.h> 89 #include <sys/callout.h> 90 #include <sys/lwp.h> 91 #include <sys/mutex.h> 92 #include <sys/proc.h> 93 #include <sys/sleepq.h> 94 #include <sys/syncobj.h> 95 #include <sys/evcnt.h> 96 #include <sys/intr.h> 97 #include <sys/cpu.h> 98 #include <sys/kmem.h> 99 #include <sys/sdt.h> 100 101 #ifdef DDB 102 #include <machine/db_machdep.h> 103 #include <ddb/db_interface.h> 104 #include <ddb/db_access.h> 105 #include <ddb/db_cpu.h> 106 #include <ddb/db_sym.h> 107 #include <ddb/db_output.h> 108 #endif 109 110 #define BUCKETS 1024 111 #define WHEELSIZE 256 112 #define WHEELMASK 255 113 #define WHEELBITS 8 114 115 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 116 117 #define BUCKET(cc, rel, abs) \ 118 (((rel) <= (1 << (2*WHEELBITS))) \ 119 ? ((rel) <= (1 << WHEELBITS)) \ 120 ? &(cc)->cc_wheel[MASKWHEEL(0, (abs))] \ 121 : &(cc)->cc_wheel[MASKWHEEL(1, (abs)) + WHEELSIZE] \ 122 : ((rel) <= (1 << (3*WHEELBITS))) \ 123 ? &(cc)->cc_wheel[MASKWHEEL(2, (abs)) + 2*WHEELSIZE] \ 124 : &(cc)->cc_wheel[MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 125 126 #define MOVEBUCKET(cc, wheel, time) \ 127 CIRCQ_APPEND(&(cc)->cc_todo, \ 128 &(cc)->cc_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 129 130 /* 131 * Circular queue definitions. 132 */ 133 134 #define CIRCQ_INIT(list) \ 135 do { \ 136 (list)->cq_next_l = (list); \ 137 (list)->cq_prev_l = (list); \ 138 } while (/*CONSTCOND*/0) 139 140 #define CIRCQ_INSERT(elem, list) \ 141 do { \ 142 (elem)->cq_prev_e = (list)->cq_prev_e; \ 143 (elem)->cq_next_l = (list); \ 144 (list)->cq_prev_l->cq_next_l = (elem); \ 145 (list)->cq_prev_l = (elem); \ 146 } while (/*CONSTCOND*/0) 147 148 #define CIRCQ_APPEND(fst, snd) \ 149 do { \ 150 if (!CIRCQ_EMPTY(snd)) { \ 151 (fst)->cq_prev_l->cq_next_l = (snd)->cq_next_l; \ 152 (snd)->cq_next_l->cq_prev_l = (fst)->cq_prev_l; \ 153 (snd)->cq_prev_l->cq_next_l = (fst); \ 154 (fst)->cq_prev_l = (snd)->cq_prev_l; \ 155 CIRCQ_INIT(snd); \ 156 } \ 157 } while (/*CONSTCOND*/0) 158 159 #define CIRCQ_REMOVE(elem) \ 160 do { \ 161 (elem)->cq_next_l->cq_prev_e = (elem)->cq_prev_e; \ 162 (elem)->cq_prev_l->cq_next_e = (elem)->cq_next_e; \ 163 } while (/*CONSTCOND*/0) 164 165 #define CIRCQ_FIRST(list) ((list)->cq_next_e) 166 #define CIRCQ_NEXT(elem) ((elem)->cq_next_e) 167 #define CIRCQ_LAST(elem,list) ((elem)->cq_next_l == (list)) 168 #define CIRCQ_EMPTY(list) ((list)->cq_next_l == (list)) 169 170 struct callout_cpu { 171 kmutex_t *cc_lock; 172 sleepq_t cc_sleepq; 173 u_int cc_nwait; 174 u_int cc_ticks; 175 lwp_t *cc_lwp; 176 callout_impl_t *cc_active; 177 struct evcnt cc_ev_late; 178 struct evcnt cc_ev_block; 179 struct callout_circq cc_todo; /* Worklist */ 180 struct callout_circq cc_wheel[BUCKETS]; /* Queues of timeouts */ 181 char cc_name1[12]; 182 char cc_name2[12]; 183 struct cpu_info *cc_cpu; 184 }; 185 186 #ifdef DDB 187 static struct callout_cpu ccb; 188 #endif 189 190 #ifndef CRASH /* _KERNEL */ 191 static void callout_softclock(void *); 192 static void callout_wait(callout_impl_t *, void *, kmutex_t *); 193 194 static struct callout_cpu callout_cpu0 __cacheline_aligned; 195 static void *callout_sih __read_mostly; 196 197 SDT_PROBE_DEFINE2(sdt, kernel, callout, init, 198 "struct callout *"/*ch*/, 199 "unsigned"/*flags*/); 200 SDT_PROBE_DEFINE1(sdt, kernel, callout, destroy, 201 "struct callout *"/*ch*/); 202 SDT_PROBE_DEFINE4(sdt, kernel, callout, setfunc, 203 "struct callout *"/*ch*/, 204 "void (*)(void *)"/*func*/, 205 "void *"/*arg*/, 206 "unsigned"/*flags*/); 207 SDT_PROBE_DEFINE5(sdt, kernel, callout, schedule, 208 "struct callout *"/*ch*/, 209 "void (*)(void *)"/*func*/, 210 "void *"/*arg*/, 211 "unsigned"/*flags*/, 212 "int"/*ticks*/); 213 SDT_PROBE_DEFINE6(sdt, kernel, callout, migrate, 214 "struct callout *"/*ch*/, 215 "void (*)(void *)"/*func*/, 216 "void *"/*arg*/, 217 "unsigned"/*flags*/, 218 "struct cpu_info *"/*ocpu*/, 219 "struct cpu_info *"/*ncpu*/); 220 SDT_PROBE_DEFINE4(sdt, kernel, callout, entry, 221 "struct callout *"/*ch*/, 222 "void (*)(void *)"/*func*/, 223 "void *"/*arg*/, 224 "unsigned"/*flags*/); 225 SDT_PROBE_DEFINE4(sdt, kernel, callout, return, 226 "struct callout *"/*ch*/, 227 "void (*)(void *)"/*func*/, 228 "void *"/*arg*/, 229 "unsigned"/*flags*/); 230 SDT_PROBE_DEFINE5(sdt, kernel, callout, stop, 231 "struct callout *"/*ch*/, 232 "void (*)(void *)"/*func*/, 233 "void *"/*arg*/, 234 "unsigned"/*flags*/, 235 "bool"/*expired*/); 236 SDT_PROBE_DEFINE4(sdt, kernel, callout, halt, 237 "struct callout *"/*ch*/, 238 "void (*)(void *)"/*func*/, 239 "void *"/*arg*/, 240 "unsigned"/*flags*/); 241 SDT_PROBE_DEFINE5(sdt, kernel, callout, halt__done, 242 "struct callout *"/*ch*/, 243 "void (*)(void *)"/*func*/, 244 "void *"/*arg*/, 245 "unsigned"/*flags*/, 246 "bool"/*expired*/); 247 248 static inline kmutex_t * 249 callout_lock(callout_impl_t *c) 250 { 251 struct callout_cpu *cc; 252 kmutex_t *lock; 253 254 for (;;) { 255 cc = c->c_cpu; 256 lock = cc->cc_lock; 257 mutex_spin_enter(lock); 258 if (__predict_true(cc == c->c_cpu)) 259 return lock; 260 mutex_spin_exit(lock); 261 } 262 } 263 264 /* 265 * Check if the callout is currently running on an LWP that isn't curlwp. 266 */ 267 static inline bool 268 callout_running_somewhere_else(callout_impl_t *c, struct callout_cpu *cc) 269 { 270 KASSERT(c->c_cpu == cc); 271 272 return cc->cc_active == c && cc->cc_lwp != curlwp; 273 } 274 275 /* 276 * callout_startup: 277 * 278 * Initialize the callout facility, called at system startup time. 279 * Do just enough to allow callouts to be safely registered. 280 */ 281 void 282 callout_startup(void) 283 { 284 struct callout_cpu *cc; 285 int b; 286 287 KASSERT(curcpu()->ci_data.cpu_callout == NULL); 288 289 cc = &callout_cpu0; 290 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 291 CIRCQ_INIT(&cc->cc_todo); 292 for (b = 0; b < BUCKETS; b++) 293 CIRCQ_INIT(&cc->cc_wheel[b]); 294 curcpu()->ci_data.cpu_callout = cc; 295 } 296 297 /* 298 * callout_init_cpu: 299 * 300 * Per-CPU initialization. 301 */ 302 CTASSERT(sizeof(callout_impl_t) <= sizeof(callout_t)); 303 304 void 305 callout_init_cpu(struct cpu_info *ci) 306 { 307 struct callout_cpu *cc; 308 int b; 309 310 if ((cc = ci->ci_data.cpu_callout) == NULL) { 311 cc = kmem_zalloc(sizeof(*cc), KM_SLEEP); 312 cc->cc_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SCHED); 313 CIRCQ_INIT(&cc->cc_todo); 314 for (b = 0; b < BUCKETS; b++) 315 CIRCQ_INIT(&cc->cc_wheel[b]); 316 } else { 317 /* Boot CPU, one time only. */ 318 callout_sih = softint_establish(SOFTINT_CLOCK | SOFTINT_MPSAFE, 319 callout_softclock, NULL); 320 if (callout_sih == NULL) 321 panic("callout_init_cpu (2)"); 322 } 323 324 sleepq_init(&cc->cc_sleepq); 325 326 snprintf(cc->cc_name1, sizeof(cc->cc_name1), "late/%u", 327 cpu_index(ci)); 328 evcnt_attach_dynamic(&cc->cc_ev_late, EVCNT_TYPE_MISC, 329 NULL, "callout", cc->cc_name1); 330 331 snprintf(cc->cc_name2, sizeof(cc->cc_name2), "wait/%u", 332 cpu_index(ci)); 333 evcnt_attach_dynamic(&cc->cc_ev_block, EVCNT_TYPE_MISC, 334 NULL, "callout", cc->cc_name2); 335 336 cc->cc_cpu = ci; 337 ci->ci_data.cpu_callout = cc; 338 } 339 340 /* 341 * callout_init: 342 * 343 * Initialize a callout structure. This must be quick, so we fill 344 * only the minimum number of fields. 345 */ 346 void 347 callout_init(callout_t *cs, u_int flags) 348 { 349 callout_impl_t *c = (callout_impl_t *)cs; 350 struct callout_cpu *cc; 351 352 KASSERT((flags & ~CALLOUT_FLAGMASK) == 0); 353 354 SDT_PROBE2(sdt, kernel, callout, init, cs, flags); 355 356 cc = curcpu()->ci_data.cpu_callout; 357 c->c_func = NULL; 358 c->c_magic = CALLOUT_MAGIC; 359 if (__predict_true((flags & CALLOUT_MPSAFE) != 0 && cc != NULL)) { 360 c->c_flags = flags; 361 c->c_cpu = cc; 362 return; 363 } 364 c->c_flags = flags | CALLOUT_BOUND; 365 c->c_cpu = &callout_cpu0; 366 } 367 368 /* 369 * callout_destroy: 370 * 371 * Destroy a callout structure. The callout must be stopped. 372 */ 373 void 374 callout_destroy(callout_t *cs) 375 { 376 callout_impl_t *c = (callout_impl_t *)cs; 377 378 SDT_PROBE1(sdt, kernel, callout, destroy, cs); 379 380 KASSERTMSG(c->c_magic == CALLOUT_MAGIC, 381 "callout %p: c_magic (%#x) != CALLOUT_MAGIC (%#x)", 382 c, c->c_magic, CALLOUT_MAGIC); 383 /* 384 * It's not necessary to lock in order to see the correct value 385 * of c->c_flags. If the callout could potentially have been 386 * running, the current thread should have stopped it. 387 */ 388 KASSERTMSG((c->c_flags & CALLOUT_PENDING) == 0, 389 "pending callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 390 c, c->c_func, c->c_flags, __builtin_return_address(0)); 391 KASSERTMSG(!callout_running_somewhere_else(c, c->c_cpu), 392 "running callout %p: c_func (%p) c_flags (%#x) destroyed from %p", 393 c, c->c_func, c->c_flags, __builtin_return_address(0)); 394 c->c_magic = 0; 395 } 396 397 /* 398 * callout_schedule_locked: 399 * 400 * Schedule a callout to run. The function and argument must 401 * already be set in the callout structure. Must be called with 402 * callout_lock. 403 */ 404 static void 405 callout_schedule_locked(callout_impl_t *c, kmutex_t *lock, int to_ticks) 406 { 407 struct callout_cpu *cc, *occ; 408 int old_time; 409 410 SDT_PROBE5(sdt, kernel, callout, schedule, 411 c, c->c_func, c->c_arg, c->c_flags, to_ticks); 412 413 KASSERT(to_ticks >= 0); 414 KASSERT(c->c_func != NULL); 415 416 /* Initialize the time here, it won't change. */ 417 occ = c->c_cpu; 418 c->c_flags &= ~(CALLOUT_FIRED | CALLOUT_INVOKING); 419 420 /* 421 * If this timeout is already scheduled and now is moved 422 * earlier, reschedule it now. Otherwise leave it in place 423 * and let it be rescheduled later. 424 */ 425 if ((c->c_flags & CALLOUT_PENDING) != 0) { 426 /* Leave on existing CPU. */ 427 old_time = c->c_time; 428 c->c_time = to_ticks + occ->cc_ticks; 429 if (c->c_time - old_time < 0) { 430 CIRCQ_REMOVE(&c->c_list); 431 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 432 } 433 mutex_spin_exit(lock); 434 return; 435 } 436 437 cc = curcpu()->ci_data.cpu_callout; 438 if ((c->c_flags & CALLOUT_BOUND) != 0 || cc == occ || 439 !mutex_tryenter(cc->cc_lock)) { 440 /* Leave on existing CPU. */ 441 c->c_time = to_ticks + occ->cc_ticks; 442 c->c_flags |= CALLOUT_PENDING; 443 CIRCQ_INSERT(&c->c_list, &occ->cc_todo); 444 } else { 445 /* Move to this CPU. */ 446 c->c_cpu = cc; 447 c->c_time = to_ticks + cc->cc_ticks; 448 c->c_flags |= CALLOUT_PENDING; 449 CIRCQ_INSERT(&c->c_list, &cc->cc_todo); 450 mutex_spin_exit(cc->cc_lock); 451 SDT_PROBE6(sdt, kernel, callout, migrate, 452 c, c->c_func, c->c_arg, c->c_flags, 453 occ->cc_cpu, cc->cc_cpu); 454 } 455 mutex_spin_exit(lock); 456 } 457 458 /* 459 * callout_reset: 460 * 461 * Reset a callout structure with a new function and argument, and 462 * schedule it to run. 463 */ 464 void 465 callout_reset(callout_t *cs, int to_ticks, void (*func)(void *), void *arg) 466 { 467 callout_impl_t *c = (callout_impl_t *)cs; 468 kmutex_t *lock; 469 470 KASSERT(c->c_magic == CALLOUT_MAGIC); 471 KASSERT(func != NULL); 472 473 lock = callout_lock(c); 474 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 475 c->c_func = func; 476 c->c_arg = arg; 477 callout_schedule_locked(c, lock, to_ticks); 478 } 479 480 /* 481 * callout_schedule: 482 * 483 * Schedule a callout to run. The function and argument must 484 * already be set in the callout structure. 485 */ 486 void 487 callout_schedule(callout_t *cs, int to_ticks) 488 { 489 callout_impl_t *c = (callout_impl_t *)cs; 490 kmutex_t *lock; 491 492 KASSERT(c->c_magic == CALLOUT_MAGIC); 493 494 lock = callout_lock(c); 495 callout_schedule_locked(c, lock, to_ticks); 496 } 497 498 /* 499 * callout_stop: 500 * 501 * Try to cancel a pending callout. It may be too late: the callout 502 * could be running on another CPU. If called from interrupt context, 503 * the callout could already be in progress at a lower priority. 504 */ 505 bool 506 callout_stop(callout_t *cs) 507 { 508 callout_impl_t *c = (callout_impl_t *)cs; 509 kmutex_t *lock; 510 bool expired; 511 512 KASSERT(c->c_magic == CALLOUT_MAGIC); 513 514 lock = callout_lock(c); 515 516 if ((c->c_flags & CALLOUT_PENDING) != 0) 517 CIRCQ_REMOVE(&c->c_list); 518 expired = ((c->c_flags & CALLOUT_FIRED) != 0); 519 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 520 521 SDT_PROBE5(sdt, kernel, callout, stop, 522 c, c->c_func, c->c_arg, c->c_flags, expired); 523 524 mutex_spin_exit(lock); 525 526 return expired; 527 } 528 529 /* 530 * callout_halt: 531 * 532 * Cancel a pending callout. If in-flight, block until it completes. 533 * May not be called from a hard interrupt handler. If the callout 534 * can take locks, the caller of callout_halt() must not hold any of 535 * those locks, otherwise the two could deadlock. If 'interlock' is 536 * non-NULL and we must wait for the callout to complete, it will be 537 * released and re-acquired before returning. 538 */ 539 bool 540 callout_halt(callout_t *cs, void *interlock) 541 { 542 callout_impl_t *c = (callout_impl_t *)cs; 543 kmutex_t *lock; 544 545 KASSERT(c->c_magic == CALLOUT_MAGIC); 546 KASSERT(!cpu_intr_p()); 547 KASSERT(interlock == NULL || mutex_owned(interlock)); 548 549 /* Fast path. */ 550 lock = callout_lock(c); 551 SDT_PROBE4(sdt, kernel, callout, halt, 552 c, c->c_func, c->c_arg, c->c_flags); 553 if ((c->c_flags & CALLOUT_PENDING) != 0) 554 CIRCQ_REMOVE(&c->c_list); 555 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 556 if (__predict_false(callout_running_somewhere_else(c, c->c_cpu))) { 557 callout_wait(c, interlock, lock); 558 return true; 559 } 560 SDT_PROBE5(sdt, kernel, callout, halt__done, 561 c, c->c_func, c->c_arg, c->c_flags, /*expired*/false); 562 mutex_spin_exit(lock); 563 return false; 564 } 565 566 /* 567 * callout_wait: 568 * 569 * Slow path for callout_halt(). Deliberately marked __noinline to 570 * prevent unneeded overhead in the caller. 571 */ 572 static void __noinline 573 callout_wait(callout_impl_t *c, void *interlock, kmutex_t *lock) 574 { 575 struct callout_cpu *cc; 576 struct lwp *l; 577 kmutex_t *relock; 578 579 l = curlwp; 580 relock = NULL; 581 for (;;) { 582 /* 583 * At this point we know the callout is not pending, but it 584 * could be running on a CPU somewhere. That can be curcpu 585 * in a few cases: 586 * 587 * - curlwp is a higher priority soft interrupt 588 * - the callout blocked on a lock and is currently asleep 589 * - the callout itself has called callout_halt() (nice!) 590 */ 591 cc = c->c_cpu; 592 if (__predict_true(!callout_running_somewhere_else(c, cc))) 593 break; 594 595 /* It's running - need to wait for it to complete. */ 596 if (interlock != NULL) { 597 /* 598 * Avoid potential scheduler lock order problems by 599 * dropping the interlock without the callout lock 600 * held; then retry. 601 */ 602 mutex_spin_exit(lock); 603 mutex_exit(interlock); 604 relock = interlock; 605 interlock = NULL; 606 } else { 607 /* XXX Better to do priority inheritance. */ 608 KASSERT(l->l_wchan == NULL); 609 cc->cc_nwait++; 610 cc->cc_ev_block.ev_count++; 611 l->l_kpriority = true; 612 sleepq_enter(&cc->cc_sleepq, l, cc->cc_lock); 613 sleepq_enqueue(&cc->cc_sleepq, cc, "callout", 614 &sleep_syncobj, false); 615 sleepq_block(0, false, &sleep_syncobj); 616 } 617 618 /* 619 * Re-lock the callout and check the state of play again. 620 * It's a common design pattern for callouts to re-schedule 621 * themselves so put a stop to it again if needed. 622 */ 623 lock = callout_lock(c); 624 if ((c->c_flags & CALLOUT_PENDING) != 0) 625 CIRCQ_REMOVE(&c->c_list); 626 c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_FIRED); 627 } 628 629 SDT_PROBE5(sdt, kernel, callout, halt__done, 630 c, c->c_func, c->c_arg, c->c_flags, /*expired*/true); 631 632 mutex_spin_exit(lock); 633 if (__predict_false(relock != NULL)) 634 mutex_enter(relock); 635 } 636 637 #ifdef notyet 638 /* 639 * callout_bind: 640 * 641 * Bind a callout so that it will only execute on one CPU. 642 * The callout must be stopped, and must be MPSAFE. 643 * 644 * XXX Disabled for now until it is decided how to handle 645 * offlined CPUs. We may want weak+strong binding. 646 */ 647 void 648 callout_bind(callout_t *cs, struct cpu_info *ci) 649 { 650 callout_impl_t *c = (callout_impl_t *)cs; 651 struct callout_cpu *cc; 652 kmutex_t *lock; 653 654 KASSERT((c->c_flags & CALLOUT_PENDING) == 0); 655 KASSERT(c->c_cpu->cc_active != c); 656 KASSERT(c->c_magic == CALLOUT_MAGIC); 657 KASSERT((c->c_flags & CALLOUT_MPSAFE) != 0); 658 659 lock = callout_lock(c); 660 cc = ci->ci_data.cpu_callout; 661 c->c_flags |= CALLOUT_BOUND; 662 if (c->c_cpu != cc) { 663 /* 664 * Assigning c_cpu effectively unlocks the callout 665 * structure, as we don't hold the new CPU's lock. 666 * Issue memory barrier to prevent accesses being 667 * reordered. 668 */ 669 membar_exit(); 670 c->c_cpu = cc; 671 } 672 mutex_spin_exit(lock); 673 } 674 #endif 675 676 void 677 callout_setfunc(callout_t *cs, void (*func)(void *), void *arg) 678 { 679 callout_impl_t *c = (callout_impl_t *)cs; 680 kmutex_t *lock; 681 682 KASSERT(c->c_magic == CALLOUT_MAGIC); 683 KASSERT(func != NULL); 684 685 lock = callout_lock(c); 686 SDT_PROBE4(sdt, kernel, callout, setfunc, cs, func, arg, c->c_flags); 687 c->c_func = func; 688 c->c_arg = arg; 689 mutex_spin_exit(lock); 690 } 691 692 bool 693 callout_expired(callout_t *cs) 694 { 695 callout_impl_t *c = (callout_impl_t *)cs; 696 kmutex_t *lock; 697 bool rv; 698 699 KASSERT(c->c_magic == CALLOUT_MAGIC); 700 701 lock = callout_lock(c); 702 rv = ((c->c_flags & CALLOUT_FIRED) != 0); 703 mutex_spin_exit(lock); 704 705 return rv; 706 } 707 708 bool 709 callout_active(callout_t *cs) 710 { 711 callout_impl_t *c = (callout_impl_t *)cs; 712 kmutex_t *lock; 713 bool rv; 714 715 KASSERT(c->c_magic == CALLOUT_MAGIC); 716 717 lock = callout_lock(c); 718 rv = ((c->c_flags & (CALLOUT_PENDING|CALLOUT_FIRED)) != 0); 719 mutex_spin_exit(lock); 720 721 return rv; 722 } 723 724 bool 725 callout_pending(callout_t *cs) 726 { 727 callout_impl_t *c = (callout_impl_t *)cs; 728 kmutex_t *lock; 729 bool rv; 730 731 KASSERT(c->c_magic == CALLOUT_MAGIC); 732 733 lock = callout_lock(c); 734 rv = ((c->c_flags & CALLOUT_PENDING) != 0); 735 mutex_spin_exit(lock); 736 737 return rv; 738 } 739 740 bool 741 callout_invoking(callout_t *cs) 742 { 743 callout_impl_t *c = (callout_impl_t *)cs; 744 kmutex_t *lock; 745 bool rv; 746 747 KASSERT(c->c_magic == CALLOUT_MAGIC); 748 749 lock = callout_lock(c); 750 rv = ((c->c_flags & CALLOUT_INVOKING) != 0); 751 mutex_spin_exit(lock); 752 753 return rv; 754 } 755 756 void 757 callout_ack(callout_t *cs) 758 { 759 callout_impl_t *c = (callout_impl_t *)cs; 760 kmutex_t *lock; 761 762 KASSERT(c->c_magic == CALLOUT_MAGIC); 763 764 lock = callout_lock(c); 765 c->c_flags &= ~CALLOUT_INVOKING; 766 mutex_spin_exit(lock); 767 } 768 769 /* 770 * callout_hardclock: 771 * 772 * Called from hardclock() once every tick. We schedule a soft 773 * interrupt if there is work to be done. 774 */ 775 void 776 callout_hardclock(void) 777 { 778 struct callout_cpu *cc; 779 int needsoftclock, ticks; 780 781 cc = curcpu()->ci_data.cpu_callout; 782 mutex_spin_enter(cc->cc_lock); 783 784 ticks = ++cc->cc_ticks; 785 786 MOVEBUCKET(cc, 0, ticks); 787 if (MASKWHEEL(0, ticks) == 0) { 788 MOVEBUCKET(cc, 1, ticks); 789 if (MASKWHEEL(1, ticks) == 0) { 790 MOVEBUCKET(cc, 2, ticks); 791 if (MASKWHEEL(2, ticks) == 0) 792 MOVEBUCKET(cc, 3, ticks); 793 } 794 } 795 796 needsoftclock = !CIRCQ_EMPTY(&cc->cc_todo); 797 mutex_spin_exit(cc->cc_lock); 798 799 if (needsoftclock) 800 softint_schedule(callout_sih); 801 } 802 803 /* 804 * callout_softclock: 805 * 806 * Soft interrupt handler, scheduled above if there is work to 807 * be done. Callouts are made in soft interrupt context. 808 */ 809 static void 810 callout_softclock(void *v) 811 { 812 callout_impl_t *c; 813 struct callout_cpu *cc; 814 void (*func)(void *); 815 void *arg; 816 int mpsafe, count, ticks, delta; 817 u_int flags __unused; 818 lwp_t *l; 819 820 l = curlwp; 821 KASSERT(l->l_cpu == curcpu()); 822 cc = l->l_cpu->ci_data.cpu_callout; 823 824 mutex_spin_enter(cc->cc_lock); 825 cc->cc_lwp = l; 826 while (!CIRCQ_EMPTY(&cc->cc_todo)) { 827 c = CIRCQ_FIRST(&cc->cc_todo); 828 KASSERT(c->c_magic == CALLOUT_MAGIC); 829 KASSERT(c->c_func != NULL); 830 KASSERT(c->c_cpu == cc); 831 KASSERT((c->c_flags & CALLOUT_PENDING) != 0); 832 KASSERT((c->c_flags & CALLOUT_FIRED) == 0); 833 CIRCQ_REMOVE(&c->c_list); 834 835 /* If due run it, otherwise insert it into the right bucket. */ 836 ticks = cc->cc_ticks; 837 delta = (int)((unsigned)c->c_time - (unsigned)ticks); 838 if (delta > 0) { 839 CIRCQ_INSERT(&c->c_list, BUCKET(cc, delta, c->c_time)); 840 continue; 841 } 842 if (delta < 0) 843 cc->cc_ev_late.ev_count++; 844 845 c->c_flags = (c->c_flags & ~CALLOUT_PENDING) | 846 (CALLOUT_FIRED | CALLOUT_INVOKING); 847 mpsafe = (c->c_flags & CALLOUT_MPSAFE); 848 func = c->c_func; 849 arg = c->c_arg; 850 cc->cc_active = c; 851 flags = c->c_flags; 852 853 mutex_spin_exit(cc->cc_lock); 854 KASSERT(func != NULL); 855 SDT_PROBE4(sdt, kernel, callout, entry, c, func, arg, flags); 856 if (__predict_false(!mpsafe)) { 857 KERNEL_LOCK(1, NULL); 858 (*func)(arg); 859 KERNEL_UNLOCK_ONE(NULL); 860 } else 861 (*func)(arg); 862 SDT_PROBE4(sdt, kernel, callout, return, c, func, arg, flags); 863 KASSERTMSG(l->l_blcnt == 0, 864 "callout %p func %p leaked %d biglocks", 865 c, func, l->l_blcnt); 866 mutex_spin_enter(cc->cc_lock); 867 868 /* 869 * We can't touch 'c' here because it might be 870 * freed already. If LWPs waiting for callout 871 * to complete, awaken them. 872 */ 873 cc->cc_active = NULL; 874 if ((count = cc->cc_nwait) != 0) { 875 cc->cc_nwait = 0; 876 /* sleepq_wake() drops the lock. */ 877 sleepq_wake(&cc->cc_sleepq, cc, count, cc->cc_lock); 878 mutex_spin_enter(cc->cc_lock); 879 } 880 } 881 cc->cc_lwp = NULL; 882 mutex_spin_exit(cc->cc_lock); 883 } 884 #endif /* !CRASH */ 885 886 #ifdef DDB 887 static void 888 db_show_callout_bucket(struct callout_cpu *cc, struct callout_circq *kbucket, 889 struct callout_circq *bucket) 890 { 891 callout_impl_t *c, ci; 892 db_expr_t offset; 893 const char *name; 894 static char question[] = "?"; 895 int b; 896 897 if (CIRCQ_LAST(bucket, kbucket)) 898 return; 899 900 for (c = CIRCQ_FIRST(bucket); /*nothing*/; c = CIRCQ_NEXT(&c->c_list)) { 901 db_read_bytes((db_addr_t)c, sizeof(ci), (char *)&ci); 902 c = &ci; 903 db_find_sym_and_offset((db_addr_t)(intptr_t)c->c_func, &name, 904 &offset); 905 name = name ? name : question; 906 b = (bucket - cc->cc_wheel); 907 if (b < 0) 908 b = -WHEELSIZE; 909 db_printf("%9d %2d/%-4d %16lx %s\n", 910 c->c_time - cc->cc_ticks, b / WHEELSIZE, b, 911 (u_long)c->c_arg, name); 912 if (CIRCQ_LAST(&c->c_list, kbucket)) 913 break; 914 } 915 } 916 917 void 918 db_show_callout(db_expr_t addr, bool haddr, db_expr_t count, const char *modif) 919 { 920 struct callout_cpu *cc; 921 struct cpu_info *ci; 922 int b; 923 924 #ifndef CRASH 925 db_printf("hardclock_ticks now: %d\n", getticks()); 926 #endif 927 db_printf(" ticks wheel arg func\n"); 928 929 /* 930 * Don't lock the callwheel; all the other CPUs are paused 931 * anyhow, and we might be called in a circumstance where 932 * some other CPU was paused while holding the lock. 933 */ 934 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 935 db_read_bytes((db_addr_t)ci + 936 offsetof(struct cpu_info, ci_data.cpu_callout), 937 sizeof(cc), (char *)&cc); 938 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 939 db_show_callout_bucket(&ccb, &cc->cc_todo, &ccb.cc_todo); 940 } 941 for (b = 0; b < BUCKETS; b++) { 942 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 943 db_read_bytes((db_addr_t)ci + 944 offsetof(struct cpu_info, ci_data.cpu_callout), 945 sizeof(cc), (char *)&cc); 946 db_read_bytes((db_addr_t)cc, sizeof(ccb), (char *)&ccb); 947 db_show_callout_bucket(&ccb, &cc->cc_wheel[b], 948 &ccb.cc_wheel[b]); 949 } 950 } 951 } 952 #endif /* DDB */ 953