1 /* $OpenBSD: kern_timeout.c,v 1.97 2024/02/23 16:51:39 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 #include "kcov.h" 47 #if NKCOV > 0 48 #include <sys/kcov.h> 49 #endif 50 51 /* 52 * Locks used to protect global variables in this file: 53 * 54 * I immutable after initialization 55 * T timeout_mutex 56 */ 57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 58 59 void *softclock_si; /* [I] softclock() interrupt handle */ 60 struct timeoutstat tostat; /* [T] statistics and totals */ 61 62 /* 63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 64 * of the global variable "ticks" when the timeout should be called. There are 65 * four levels with 256 buckets each. 66 */ 67 #define WHEELCOUNT 4 68 #define WHEELSIZE 256 69 #define WHEELMASK 255 70 #define WHEELBITS 8 71 #define BUCKETS (WHEELCOUNT * WHEELSIZE) 72 73 struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ 74 struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ 75 struct circq timeout_new; /* [T] New, unscheduled timeouts */ 76 struct circq timeout_todo; /* [T] Due or needs rescheduling */ 77 struct circq timeout_proc; /* [T] Due + needs process context */ 78 #ifdef MULTIPROCESSOR 79 struct circq timeout_proc_mp; /* [T] Process ctx + no kernel lock */ 80 #endif 81 82 time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ 83 struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ 84 85 struct kclock { 86 struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ 87 struct timespec kc_late; /* [T] Late if due prior */ 88 struct timespec kc_offset; /* [T] Offset from primary kclock */ 89 } timeout_kclock[KCLOCK_MAX]; 90 91 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 92 93 #define BUCKET(rel, abs) \ 94 (timeout_wheel[ \ 95 ((rel) <= (1 << (2*WHEELBITS))) \ 96 ? ((rel) <= (1 << WHEELBITS)) \ 97 ? MASKWHEEL(0, (abs)) \ 98 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 99 : ((rel) <= (1 << (3*WHEELBITS))) \ 100 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 101 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 102 103 #define MOVEBUCKET(wheel, time) \ 104 CIRCQ_CONCAT(&timeout_todo, \ 105 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 106 107 /* 108 * Circular queue definitions. 109 */ 110 111 #define CIRCQ_INIT(elem) do { \ 112 (elem)->next = (elem); \ 113 (elem)->prev = (elem); \ 114 } while (0) 115 116 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 117 (elem)->prev = (list)->prev; \ 118 (elem)->next = (list); \ 119 (list)->prev->next = (elem); \ 120 (list)->prev = (elem); \ 121 tostat.tos_pending++; \ 122 } while (0) 123 124 #define CIRCQ_CONCAT(fst, snd) do { \ 125 if (!CIRCQ_EMPTY(snd)) { \ 126 (fst)->prev->next = (snd)->next;\ 127 (snd)->next->prev = (fst)->prev;\ 128 (snd)->prev->next = (fst); \ 129 (fst)->prev = (snd)->prev; \ 130 CIRCQ_INIT(snd); \ 131 } \ 132 } while (0) 133 134 #define CIRCQ_REMOVE(elem) do { \ 135 (elem)->next->prev = (elem)->prev; \ 136 (elem)->prev->next = (elem)->next; \ 137 _Q_INVALIDATE((elem)->prev); \ 138 _Q_INVALIDATE((elem)->next); \ 139 tostat.tos_pending--; \ 140 } while (0) 141 142 #define CIRCQ_FIRST(elem) ((elem)->next) 143 144 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 145 146 #define CIRCQ_FOREACH(elem, list) \ 147 for ((elem) = CIRCQ_FIRST(list); \ 148 (elem) != (list); \ 149 (elem) = CIRCQ_FIRST(elem)) 150 151 #ifdef WITNESS 152 struct lock_object timeout_sleeplock_obj = { 153 .lo_name = "timeout", 154 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 155 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 156 }; 157 struct lock_object timeout_spinlock_obj = { 158 .lo_name = "timeout", 159 .lo_flags = LO_WITNESS | LO_INITIALIZED | 160 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 161 }; 162 struct lock_type timeout_sleeplock_type = { 163 .lt_name = "timeout" 164 }; 165 struct lock_type timeout_spinlock_type = { 166 .lt_name = "timeout" 167 }; 168 #define TIMEOUT_LOCK_OBJ(needsproc) \ 169 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 170 #endif 171 172 void softclock(void *); 173 void softclock_create_thread(void *); 174 void softclock_process_kclock_timeout(struct timeout *, int); 175 void softclock_process_tick_timeout(struct timeout *, int); 176 void softclock_thread(void *); 177 #ifdef MULTIPROCESSOR 178 void softclock_thread_mp(void *); 179 #endif 180 void timeout_barrier_timeout(void *); 181 uint32_t timeout_bucket(const struct timeout *); 182 uint32_t timeout_maskwheel(uint32_t, const struct timespec *); 183 void timeout_run(struct timeout *); 184 185 /* 186 * The first thing in a struct timeout is its struct circq, so we 187 * can get back from a pointer to the latter to a pointer to the 188 * whole timeout with just a cast. 189 */ 190 static inline struct timeout * 191 timeout_from_circq(struct circq *p) 192 { 193 return ((struct timeout *)(p)); 194 } 195 196 static inline void 197 timeout_sync_order(int needsproc) 198 { 199 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 200 } 201 202 static inline void 203 timeout_sync_enter(int needsproc) 204 { 205 timeout_sync_order(needsproc); 206 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 207 } 208 209 static inline void 210 timeout_sync_leave(int needsproc) 211 { 212 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 213 } 214 215 /* 216 * Some of the "math" in here is a bit tricky. 217 * 218 * We have to beware of wrapping ints. 219 * We use the fact that any element added to the queue must be added with a 220 * positive time. That means that any element `to' on the queue cannot be 221 * scheduled to timeout further in time than INT_MAX, but to->to_time can 222 * be positive or negative so comparing it with anything is dangerous. 223 * The only way we can use the to->to_time value in any predictable way 224 * is when we calculate how far in the future `to' will timeout - 225 * "to->to_time - ticks". The result will always be positive for future 226 * timeouts and 0 or negative for due timeouts. 227 */ 228 229 void 230 timeout_startup(void) 231 { 232 int b, level; 233 234 CIRCQ_INIT(&timeout_new); 235 CIRCQ_INIT(&timeout_todo); 236 CIRCQ_INIT(&timeout_proc); 237 #ifdef MULTIPROCESSOR 238 CIRCQ_INIT(&timeout_proc_mp); 239 #endif 240 for (b = 0; b < nitems(timeout_wheel); b++) 241 CIRCQ_INIT(&timeout_wheel[b]); 242 for (b = 0; b < nitems(timeout_wheel_kc); b++) 243 CIRCQ_INIT(&timeout_wheel_kc[b]); 244 245 for (level = 0; level < nitems(timeout_level_width); level++) 246 timeout_level_width[level] = 2 << (level * WHEELBITS); 247 NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); 248 } 249 250 void 251 timeout_proc_init(void) 252 { 253 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 254 if (softclock_si == NULL) 255 panic("%s: unable to register softclock interrupt", __func__); 256 257 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 258 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 259 260 kthread_create_deferred(softclock_create_thread, NULL); 261 } 262 263 void 264 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 265 { 266 timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0); 267 } 268 269 void 270 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock, 271 int flags) 272 { 273 KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE))); 274 KASSERT(kclock >= KCLOCK_NONE && kclock < KCLOCK_MAX); 275 276 to->to_func = fn; 277 to->to_arg = arg; 278 to->to_kclock = kclock; 279 to->to_flags = flags | TIMEOUT_INITIALIZED; 280 281 /* For now, only process context timeouts may be marked MP-safe. */ 282 if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) 283 KASSERT(ISSET(to->to_flags, TIMEOUT_PROC)); 284 } 285 286 void 287 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 288 { 289 timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC); 290 } 291 292 int 293 timeout_add(struct timeout *new, int to_ticks) 294 { 295 int old_time; 296 int ret = 1; 297 298 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 299 KASSERT(new->to_kclock == KCLOCK_NONE); 300 KASSERT(to_ticks >= 0); 301 302 mtx_enter(&timeout_mutex); 303 304 /* Initialize the time here, it won't change. */ 305 old_time = new->to_time; 306 new->to_time = to_ticks + ticks; 307 CLR(new->to_flags, TIMEOUT_TRIGGERED); 308 309 /* 310 * If this timeout already is scheduled and now is moved 311 * earlier, reschedule it now. Otherwise leave it in place 312 * and let it be rescheduled later. 313 */ 314 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 315 if (new->to_time - ticks < old_time - ticks) { 316 CIRCQ_REMOVE(&new->to_list); 317 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 318 } 319 tostat.tos_readded++; 320 ret = 0; 321 } else { 322 SET(new->to_flags, TIMEOUT_ONQUEUE); 323 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 324 } 325 #if NKCOV > 0 326 if (!kcov_cold) 327 new->to_process = curproc->p_p; 328 #endif 329 tostat.tos_added++; 330 mtx_leave(&timeout_mutex); 331 332 return ret; 333 } 334 335 int 336 timeout_add_tv(struct timeout *to, const struct timeval *tv) 337 { 338 uint64_t to_ticks; 339 340 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 341 if (to_ticks > INT_MAX) 342 to_ticks = INT_MAX; 343 if (to_ticks == 0 && tv->tv_usec > 0) 344 to_ticks = 1; 345 346 return timeout_add(to, (int)to_ticks); 347 } 348 349 int 350 timeout_add_sec(struct timeout *to, int secs) 351 { 352 uint64_t to_ticks; 353 354 to_ticks = (uint64_t)hz * secs; 355 if (to_ticks > INT_MAX) 356 to_ticks = INT_MAX; 357 if (to_ticks == 0) 358 to_ticks = 1; 359 360 return timeout_add(to, (int)to_ticks); 361 } 362 363 int 364 timeout_add_msec(struct timeout *to, int msecs) 365 { 366 uint64_t to_ticks; 367 368 to_ticks = (uint64_t)msecs * 1000 / tick; 369 if (to_ticks > INT_MAX) 370 to_ticks = INT_MAX; 371 if (to_ticks == 0 && msecs > 0) 372 to_ticks = 1; 373 374 return timeout_add(to, (int)to_ticks); 375 } 376 377 int 378 timeout_add_usec(struct timeout *to, int usecs) 379 { 380 int to_ticks = usecs / tick; 381 382 if (to_ticks == 0 && usecs > 0) 383 to_ticks = 1; 384 385 return timeout_add(to, to_ticks); 386 } 387 388 int 389 timeout_add_nsec(struct timeout *to, int nsecs) 390 { 391 int to_ticks = nsecs / (tick * 1000); 392 393 if (to_ticks == 0 && nsecs > 0) 394 to_ticks = 1; 395 396 return timeout_add(to, to_ticks); 397 } 398 399 int 400 timeout_abs_ts(struct timeout *to, const struct timespec *abstime) 401 { 402 struct timespec old_abstime; 403 int ret = 1; 404 405 mtx_enter(&timeout_mutex); 406 407 KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); 408 KASSERT(to->to_kclock == KCLOCK_UPTIME); 409 410 old_abstime = to->to_abstime; 411 to->to_abstime = *abstime; 412 CLR(to->to_flags, TIMEOUT_TRIGGERED); 413 414 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 415 if (timespeccmp(abstime, &old_abstime, <)) { 416 CIRCQ_REMOVE(&to->to_list); 417 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 418 } 419 tostat.tos_readded++; 420 ret = 0; 421 } else { 422 SET(to->to_flags, TIMEOUT_ONQUEUE); 423 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 424 } 425 #if NKCOV > 0 426 if (!kcov_cold) 427 to->to_process = curproc->p_p; 428 #endif 429 tostat.tos_added++; 430 431 mtx_leave(&timeout_mutex); 432 433 return ret; 434 } 435 436 int 437 timeout_del(struct timeout *to) 438 { 439 int ret = 0; 440 441 mtx_enter(&timeout_mutex); 442 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 443 CIRCQ_REMOVE(&to->to_list); 444 CLR(to->to_flags, TIMEOUT_ONQUEUE); 445 tostat.tos_cancelled++; 446 ret = 1; 447 } 448 CLR(to->to_flags, TIMEOUT_TRIGGERED); 449 tostat.tos_deleted++; 450 mtx_leave(&timeout_mutex); 451 452 return ret; 453 } 454 455 int 456 timeout_del_barrier(struct timeout *to) 457 { 458 int removed; 459 460 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 461 462 removed = timeout_del(to); 463 if (!removed) 464 timeout_barrier(to); 465 466 return removed; 467 } 468 469 void 470 timeout_barrier(struct timeout *to) 471 { 472 struct timeout barrier; 473 struct cond c; 474 int flags; 475 476 flags = to->to_flags & (TIMEOUT_PROC | TIMEOUT_MPSAFE); 477 timeout_sync_order(ISSET(flags, TIMEOUT_PROC)); 478 479 timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE, 480 flags); 481 barrier.to_process = curproc->p_p; 482 cond_init(&c); 483 484 mtx_enter(&timeout_mutex); 485 486 barrier.to_time = ticks; 487 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 488 if (ISSET(flags, TIMEOUT_PROC)) { 489 #ifdef MULTIPROCESSOR 490 if (ISSET(flags, TIMEOUT_MPSAFE)) 491 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &barrier.to_list); 492 else 493 #endif 494 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 495 } else 496 CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list); 497 498 mtx_leave(&timeout_mutex); 499 500 if (ISSET(flags, TIMEOUT_PROC)) { 501 #ifdef MULTIPROCESSOR 502 if (ISSET(flags, TIMEOUT_MPSAFE)) 503 wakeup_one(&timeout_proc_mp); 504 else 505 #endif 506 wakeup_one(&timeout_proc); 507 } else 508 softintr_schedule(softclock_si); 509 510 cond_wait(&c, "tmobar"); 511 } 512 513 void 514 timeout_barrier_timeout(void *arg) 515 { 516 struct cond *c = arg; 517 518 cond_signal(c); 519 } 520 521 uint32_t 522 timeout_bucket(const struct timeout *to) 523 { 524 struct timespec diff, shifted_abstime; 525 struct kclock *kc; 526 uint32_t level; 527 528 KASSERT(to->to_kclock == KCLOCK_UPTIME); 529 kc = &timeout_kclock[to->to_kclock]; 530 531 KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); 532 timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); 533 for (level = 0; level < nitems(timeout_level_width) - 1; level++) { 534 if (diff.tv_sec < timeout_level_width[level]) 535 break; 536 } 537 timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime); 538 return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime); 539 } 540 541 /* 542 * Hash the absolute time into a bucket on a given level of the wheel. 543 * 544 * The complete hash is 32 bits. The upper 25 bits are seconds, the 545 * lower 7 bits are nanoseconds. tv_nsec is a positive value less 546 * than one billion so we need to divide it to isolate the desired 547 * bits. We can't just shift it. 548 * 549 * The level is used to isolate an 8-bit portion of the hash. The 550 * resulting number indicates which bucket the absolute time belongs 551 * in on the given level of the wheel. 552 */ 553 uint32_t 554 timeout_maskwheel(uint32_t level, const struct timespec *abstime) 555 { 556 uint32_t hi, lo; 557 558 hi = abstime->tv_sec << 7; 559 lo = abstime->tv_nsec / 7812500; 560 561 return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; 562 } 563 564 /* 565 * This is called from hardclock() on the primary CPU at the start of 566 * every tick. 567 */ 568 void 569 timeout_hardclock_update(void) 570 { 571 struct timespec elapsed, now; 572 struct kclock *kc; 573 struct timespec *lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; 574 int b, done, first, i, last, level, need_softclock = 1, off; 575 576 mtx_enter(&timeout_mutex); 577 578 MOVEBUCKET(0, ticks); 579 if (MASKWHEEL(0, ticks) == 0) { 580 MOVEBUCKET(1, ticks); 581 if (MASKWHEEL(1, ticks) == 0) { 582 MOVEBUCKET(2, ticks); 583 if (MASKWHEEL(2, ticks) == 0) 584 MOVEBUCKET(3, ticks); 585 } 586 } 587 588 /* 589 * Dump the buckets that expired while we were away. 590 * 591 * If the elapsed time has exceeded a level's limit then we need 592 * to dump every bucket in the level. We have necessarily completed 593 * a lap of that level, too, so we need to process buckets in the 594 * next level. 595 * 596 * Otherwise we need to compare indices: if the index of the first 597 * expired bucket is greater than that of the last then we have 598 * completed a lap of the level and need to process buckets in the 599 * next level. 600 */ 601 nanouptime(&now); 602 timespecsub(&now, lastscan, &elapsed); 603 for (level = 0; level < nitems(timeout_level_width); level++) { 604 first = timeout_maskwheel(level, lastscan); 605 if (elapsed.tv_sec >= timeout_level_width[level]) { 606 last = (first == 0) ? WHEELSIZE - 1 : first - 1; 607 done = 0; 608 } else { 609 last = timeout_maskwheel(level, &now); 610 done = first <= last; 611 } 612 off = level * WHEELSIZE; 613 for (b = first;; b = (b + 1) % WHEELSIZE) { 614 CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); 615 if (b == last) 616 break; 617 } 618 if (done) 619 break; 620 } 621 622 /* 623 * Update the cached state for each kclock. 624 */ 625 for (i = 0; i < nitems(timeout_kclock); i++) { 626 kc = &timeout_kclock[i]; 627 timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); 628 timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); 629 } 630 631 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) 632 need_softclock = 0; 633 634 mtx_leave(&timeout_mutex); 635 636 if (need_softclock) 637 softintr_schedule(softclock_si); 638 } 639 640 void 641 timeout_run(struct timeout *to) 642 { 643 void (*fn)(void *); 644 void *arg; 645 int needsproc; 646 647 MUTEX_ASSERT_LOCKED(&timeout_mutex); 648 649 CLR(to->to_flags, TIMEOUT_ONQUEUE); 650 SET(to->to_flags, TIMEOUT_TRIGGERED); 651 652 fn = to->to_func; 653 arg = to->to_arg; 654 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 655 #if NKCOV > 0 656 struct process *kcov_process = to->to_process; 657 #endif 658 659 mtx_leave(&timeout_mutex); 660 timeout_sync_enter(needsproc); 661 #if NKCOV > 0 662 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); 663 #endif 664 fn(arg); 665 #if NKCOV > 0 666 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); 667 #endif 668 timeout_sync_leave(needsproc); 669 mtx_enter(&timeout_mutex); 670 } 671 672 void 673 softclock_process_kclock_timeout(struct timeout *to, int new) 674 { 675 struct kclock *kc = &timeout_kclock[to->to_kclock]; 676 677 if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { 678 tostat.tos_scheduled++; 679 if (!new) 680 tostat.tos_rescheduled++; 681 CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], 682 &to->to_list); 683 return; 684 } 685 if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) 686 tostat.tos_late++; 687 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 688 #ifdef MULTIPROCESSOR 689 if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) 690 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); 691 else 692 #endif 693 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 694 return; 695 } 696 timeout_run(to); 697 tostat.tos_run_softclock++; 698 } 699 700 void 701 softclock_process_tick_timeout(struct timeout *to, int new) 702 { 703 int delta = to->to_time - ticks; 704 705 if (delta > 0) { 706 tostat.tos_scheduled++; 707 if (!new) 708 tostat.tos_rescheduled++; 709 CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); 710 return; 711 } 712 if (!new && delta < 0) 713 tostat.tos_late++; 714 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 715 #ifdef MULTIPROCESSOR 716 if (ISSET(to->to_flags, TIMEOUT_MPSAFE)) 717 CIRCQ_INSERT_TAIL(&timeout_proc_mp, &to->to_list); 718 else 719 #endif 720 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 721 return; 722 } 723 timeout_run(to); 724 tostat.tos_run_softclock++; 725 } 726 727 /* 728 * Timeouts are processed here instead of timeout_hardclock_update() 729 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 730 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 731 * so the system remains responsive even if there is a surge of timeouts. 732 */ 733 void 734 softclock(void *arg) 735 { 736 struct timeout *first_new, *to; 737 int needsproc, new; 738 #ifdef MULTIPROCESSOR 739 int need_proc_mp; 740 #endif 741 742 first_new = NULL; 743 new = 0; 744 745 mtx_enter(&timeout_mutex); 746 if (!CIRCQ_EMPTY(&timeout_new)) 747 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); 748 CIRCQ_CONCAT(&timeout_todo, &timeout_new); 749 while (!CIRCQ_EMPTY(&timeout_todo)) { 750 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 751 CIRCQ_REMOVE(&to->to_list); 752 if (to == first_new) 753 new = 1; 754 if (to->to_kclock == KCLOCK_NONE) 755 softclock_process_tick_timeout(to, new); 756 else if (to->to_kclock == KCLOCK_UPTIME) 757 softclock_process_kclock_timeout(to, new); 758 else { 759 panic("%s: invalid to_clock: %d", 760 __func__, to->to_kclock); 761 } 762 } 763 tostat.tos_softclocks++; 764 needsproc = !CIRCQ_EMPTY(&timeout_proc); 765 #ifdef MULTIPROCESSOR 766 need_proc_mp = !CIRCQ_EMPTY(&timeout_proc_mp); 767 #endif 768 mtx_leave(&timeout_mutex); 769 770 if (needsproc) 771 wakeup(&timeout_proc); 772 #ifdef MULTIPROCESSOR 773 if (need_proc_mp) 774 wakeup(&timeout_proc_mp); 775 #endif 776 } 777 778 void 779 softclock_create_thread(void *arg) 780 { 781 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 782 panic("fork softclock"); 783 #ifdef MULTIPROCESSOR 784 if (kthread_create(softclock_thread_mp, NULL, NULL, "softclockmp")) 785 panic("kthread_create softclock_thread_mp"); 786 #endif 787 } 788 789 void 790 softclock_thread(void *arg) 791 { 792 CPU_INFO_ITERATOR cii; 793 struct cpu_info *ci; 794 struct timeout *to; 795 int s; 796 797 KERNEL_ASSERT_LOCKED(); 798 799 /* Be conservative for the moment */ 800 CPU_INFO_FOREACH(cii, ci) { 801 if (CPU_IS_PRIMARY(ci)) 802 break; 803 } 804 KASSERT(ci != NULL); 805 sched_peg_curproc(ci); 806 807 s = splsoftclock(); 808 mtx_enter(&timeout_mutex); 809 for (;;) { 810 while (!CIRCQ_EMPTY(&timeout_proc)) { 811 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 812 CIRCQ_REMOVE(&to->to_list); 813 timeout_run(to); 814 tostat.tos_run_thread++; 815 } 816 tostat.tos_thread_wakeups++; 817 msleep_nsec(&timeout_proc, &timeout_mutex, PSWP, "tmoslp", 818 INFSLP); 819 } 820 splx(s); 821 } 822 823 #ifdef MULTIPROCESSOR 824 void 825 softclock_thread_mp(void *arg) 826 { 827 struct timeout *to; 828 829 KERNEL_ASSERT_LOCKED(); 830 KERNEL_UNLOCK(); 831 832 mtx_enter(&timeout_mutex); 833 for (;;) { 834 while (!CIRCQ_EMPTY(&timeout_proc_mp)) { 835 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc_mp)); 836 CIRCQ_REMOVE(&to->to_list); 837 timeout_run(to); 838 tostat.tos_run_thread++; 839 } 840 tostat.tos_thread_wakeups++; 841 msleep_nsec(&timeout_proc_mp, &timeout_mutex, PSWP, "tmoslp", 842 INFSLP); 843 } 844 } 845 #endif /* MULTIPROCESSOR */ 846 847 #ifndef SMALL_KERNEL 848 void 849 timeout_adjust_ticks(int adj) 850 { 851 struct timeout *to; 852 struct circq *p; 853 int new_ticks, b; 854 855 /* adjusting the monotonic clock backwards would be a Bad Thing */ 856 if (adj <= 0) 857 return; 858 859 mtx_enter(&timeout_mutex); 860 new_ticks = ticks + adj; 861 for (b = 0; b < nitems(timeout_wheel); b++) { 862 p = CIRCQ_FIRST(&timeout_wheel[b]); 863 while (p != &timeout_wheel[b]) { 864 to = timeout_from_circq(p); 865 p = CIRCQ_FIRST(p); 866 867 /* when moving a timeout forward need to reinsert it */ 868 if (to->to_time - ticks < adj) 869 to->to_time = new_ticks; 870 CIRCQ_REMOVE(&to->to_list); 871 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 872 } 873 } 874 ticks = new_ticks; 875 mtx_leave(&timeout_mutex); 876 } 877 #endif 878 879 int 880 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 881 { 882 struct timeoutstat status; 883 884 mtx_enter(&timeout_mutex); 885 memcpy(&status, &tostat, sizeof(status)); 886 mtx_leave(&timeout_mutex); 887 888 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 889 } 890 891 #ifdef DDB 892 const char *db_kclock(int); 893 void db_show_callout_bucket(struct circq *); 894 void db_show_timeout(struct timeout *, struct circq *); 895 const char *db_timespec(const struct timespec *); 896 897 const char * 898 db_kclock(int kclock) 899 { 900 switch (kclock) { 901 case KCLOCK_UPTIME: 902 return "uptime"; 903 default: 904 return "invalid"; 905 } 906 } 907 908 const char * 909 db_timespec(const struct timespec *ts) 910 { 911 static char buf[32]; 912 struct timespec tmp, zero; 913 914 if (ts->tv_sec >= 0) { 915 snprintf(buf, sizeof(buf), "%lld.%09ld", 916 ts->tv_sec, ts->tv_nsec); 917 return buf; 918 } 919 920 timespecclear(&zero); 921 timespecsub(&zero, ts, &tmp); 922 snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec); 923 return buf; 924 } 925 926 void 927 db_show_callout_bucket(struct circq *bucket) 928 { 929 struct circq *p; 930 931 CIRCQ_FOREACH(p, bucket) 932 db_show_timeout(timeout_from_circq(p), bucket); 933 } 934 935 void 936 db_show_timeout(struct timeout *to, struct circq *bucket) 937 { 938 struct timespec remaining; 939 struct kclock *kc; 940 char buf[8]; 941 db_expr_t offset; 942 struct circq *wheel; 943 char *name, *where; 944 int width = sizeof(long) * 2; 945 946 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 947 name = name ? name : "?"; 948 if (bucket == &timeout_new) 949 where = "new"; 950 else if (bucket == &timeout_todo) 951 where = "softint"; 952 else if (bucket == &timeout_proc) 953 where = "thread"; 954 #ifdef MULTIPROCESSOR 955 else if (bucket == &timeout_proc_mp) 956 where = "thread-mp"; 957 #endif 958 else { 959 if (to->to_kclock == KCLOCK_UPTIME) 960 wheel = timeout_wheel_kc; 961 else if (to->to_kclock == KCLOCK_NONE) 962 wheel = timeout_wheel; 963 else 964 goto invalid; 965 snprintf(buf, sizeof(buf), "%3ld/%1ld", 966 (bucket - wheel) % WHEELSIZE, 967 (bucket - wheel) / WHEELSIZE); 968 where = buf; 969 } 970 if (to->to_kclock == KCLOCK_UPTIME) { 971 kc = &timeout_kclock[to->to_kclock]; 972 timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); 973 db_printf("%20s %8s %9s 0x%0*lx %s\n", 974 db_timespec(&remaining), db_kclock(to->to_kclock), where, 975 width, (ulong)to->to_arg, name); 976 } else if (to->to_kclock == KCLOCK_NONE) { 977 db_printf("%20d %8s %9s 0x%0*lx %s\n", 978 to->to_time - ticks, "ticks", where, 979 width, (ulong)to->to_arg, name); 980 } else 981 goto invalid; 982 return; 983 984 invalid: 985 db_printf("%s: timeout 0x%p: invalid to_kclock: %d", 986 __func__, to, to->to_kclock); 987 } 988 989 void 990 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 991 { 992 struct kclock *kc; 993 int width = sizeof(long) * 2 + 2; 994 int b, i; 995 996 db_printf("%20s %8s\n", "lastscan", "clock"); 997 db_printf("%20d %8s\n", ticks, "ticks"); 998 for (i = 0; i < nitems(timeout_kclock); i++) { 999 kc = &timeout_kclock[i]; 1000 db_printf("%20s %8s\n", 1001 db_timespec(&kc->kc_lastscan), db_kclock(i)); 1002 } 1003 db_printf("\n"); 1004 db_printf("%20s %8s %9s %*s %s\n", 1005 "remaining", "clock", "wheel", width, "arg", "func"); 1006 db_show_callout_bucket(&timeout_new); 1007 db_show_callout_bucket(&timeout_todo); 1008 db_show_callout_bucket(&timeout_proc); 1009 #ifdef MULTIPROCESSOR 1010 db_show_callout_bucket(&timeout_proc_mp); 1011 #endif 1012 for (b = 0; b < nitems(timeout_wheel); b++) 1013 db_show_callout_bucket(&timeout_wheel[b]); 1014 for (b = 0; b < nitems(timeout_wheel_kc); b++) 1015 db_show_callout_bucket(&timeout_wheel_kc[b]); 1016 } 1017 #endif 1018