1 /* $OpenBSD: kern_timeout.c,v 1.90 2022/12/31 16:06:24 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 #include "kcov.h" 47 #if NKCOV > 0 48 #include <sys/kcov.h> 49 #endif 50 51 /* 52 * Locks used to protect global variables in this file: 53 * 54 * I immutable after initialization 55 * T timeout_mutex 56 */ 57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 58 59 void *softclock_si; /* [I] softclock() interrupt handle */ 60 struct timeoutstat tostat; /* [T] statistics and totals */ 61 62 /* 63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 64 * of the global variable "ticks" when the timeout should be called. There are 65 * four levels with 256 buckets each. 66 */ 67 #define WHEELCOUNT 4 68 #define WHEELSIZE 256 69 #define WHEELMASK 255 70 #define WHEELBITS 8 71 #define BUCKETS (WHEELCOUNT * WHEELSIZE) 72 73 struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ 74 struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ 75 struct circq timeout_new; /* [T] New, unscheduled timeouts */ 76 struct circq timeout_todo; /* [T] Due or needs rescheduling */ 77 struct circq timeout_proc; /* [T] Due + needs process context */ 78 79 time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ 80 struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ 81 82 struct kclock { 83 struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ 84 struct timespec kc_late; /* [T] Late if due prior */ 85 struct timespec kc_offset; /* [T] Offset from primary kclock */ 86 } timeout_kclock[KCLOCK_MAX]; 87 88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 89 90 #define BUCKET(rel, abs) \ 91 (timeout_wheel[ \ 92 ((rel) <= (1 << (2*WHEELBITS))) \ 93 ? ((rel) <= (1 << WHEELBITS)) \ 94 ? MASKWHEEL(0, (abs)) \ 95 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 96 : ((rel) <= (1 << (3*WHEELBITS))) \ 97 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 98 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 99 100 #define MOVEBUCKET(wheel, time) \ 101 CIRCQ_CONCAT(&timeout_todo, \ 102 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 103 104 /* 105 * Circular queue definitions. 106 */ 107 108 #define CIRCQ_INIT(elem) do { \ 109 (elem)->next = (elem); \ 110 (elem)->prev = (elem); \ 111 } while (0) 112 113 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 114 (elem)->prev = (list)->prev; \ 115 (elem)->next = (list); \ 116 (list)->prev->next = (elem); \ 117 (list)->prev = (elem); \ 118 tostat.tos_pending++; \ 119 } while (0) 120 121 #define CIRCQ_CONCAT(fst, snd) do { \ 122 if (!CIRCQ_EMPTY(snd)) { \ 123 (fst)->prev->next = (snd)->next;\ 124 (snd)->next->prev = (fst)->prev;\ 125 (snd)->prev->next = (fst); \ 126 (fst)->prev = (snd)->prev; \ 127 CIRCQ_INIT(snd); \ 128 } \ 129 } while (0) 130 131 #define CIRCQ_REMOVE(elem) do { \ 132 (elem)->next->prev = (elem)->prev; \ 133 (elem)->prev->next = (elem)->next; \ 134 _Q_INVALIDATE((elem)->prev); \ 135 _Q_INVALIDATE((elem)->next); \ 136 tostat.tos_pending--; \ 137 } while (0) 138 139 #define CIRCQ_FIRST(elem) ((elem)->next) 140 141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 142 143 #define CIRCQ_FOREACH(elem, list) \ 144 for ((elem) = CIRCQ_FIRST(list); \ 145 (elem) != (list); \ 146 (elem) = CIRCQ_FIRST(elem)) 147 148 #ifdef WITNESS 149 struct lock_object timeout_sleeplock_obj = { 150 .lo_name = "timeout", 151 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 152 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 153 }; 154 struct lock_object timeout_spinlock_obj = { 155 .lo_name = "timeout", 156 .lo_flags = LO_WITNESS | LO_INITIALIZED | 157 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 158 }; 159 struct lock_type timeout_sleeplock_type = { 160 .lt_name = "timeout" 161 }; 162 struct lock_type timeout_spinlock_type = { 163 .lt_name = "timeout" 164 }; 165 #define TIMEOUT_LOCK_OBJ(needsproc) \ 166 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 167 #endif 168 169 void softclock(void *); 170 void softclock_create_thread(void *); 171 void softclock_process_kclock_timeout(struct timeout *, int); 172 void softclock_process_tick_timeout(struct timeout *, int); 173 void softclock_thread(void *); 174 void timeout_barrier_timeout(void *); 175 uint32_t timeout_bucket(const struct timeout *); 176 uint32_t timeout_maskwheel(uint32_t, const struct timespec *); 177 void timeout_run(struct timeout *); 178 179 /* 180 * The first thing in a struct timeout is its struct circq, so we 181 * can get back from a pointer to the latter to a pointer to the 182 * whole timeout with just a cast. 183 */ 184 static inline struct timeout * 185 timeout_from_circq(struct circq *p) 186 { 187 return ((struct timeout *)(p)); 188 } 189 190 static inline void 191 timeout_sync_order(int needsproc) 192 { 193 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 194 } 195 196 static inline void 197 timeout_sync_enter(int needsproc) 198 { 199 timeout_sync_order(needsproc); 200 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 201 } 202 203 static inline void 204 timeout_sync_leave(int needsproc) 205 { 206 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 207 } 208 209 /* 210 * Some of the "math" in here is a bit tricky. 211 * 212 * We have to beware of wrapping ints. 213 * We use the fact that any element added to the queue must be added with a 214 * positive time. That means that any element `to' on the queue cannot be 215 * scheduled to timeout further in time than INT_MAX, but to->to_time can 216 * be positive or negative so comparing it with anything is dangerous. 217 * The only way we can use the to->to_time value in any predictable way 218 * is when we calculate how far in the future `to' will timeout - 219 * "to->to_time - ticks". The result will always be positive for future 220 * timeouts and 0 or negative for due timeouts. 221 */ 222 223 void 224 timeout_startup(void) 225 { 226 int b, level; 227 228 CIRCQ_INIT(&timeout_new); 229 CIRCQ_INIT(&timeout_todo); 230 CIRCQ_INIT(&timeout_proc); 231 for (b = 0; b < nitems(timeout_wheel); b++) 232 CIRCQ_INIT(&timeout_wheel[b]); 233 for (b = 0; b < nitems(timeout_wheel_kc); b++) 234 CIRCQ_INIT(&timeout_wheel_kc[b]); 235 236 for (level = 0; level < nitems(timeout_level_width); level++) 237 timeout_level_width[level] = 2 << (level * WHEELBITS); 238 NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); 239 } 240 241 void 242 timeout_proc_init(void) 243 { 244 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 245 if (softclock_si == NULL) 246 panic("%s: unable to register softclock interrupt", __func__); 247 248 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 249 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 250 251 kthread_create_deferred(softclock_create_thread, NULL); 252 } 253 254 void 255 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 256 { 257 timeout_set_flags(new, fn, arg, KCLOCK_NONE, 0); 258 } 259 260 void 261 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int kclock, 262 int flags) 263 { 264 to->to_func = fn; 265 to->to_arg = arg; 266 to->to_kclock = kclock; 267 to->to_flags = flags | TIMEOUT_INITIALIZED; 268 } 269 270 void 271 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 272 { 273 timeout_set_flags(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC); 274 } 275 276 int 277 timeout_add(struct timeout *new, int to_ticks) 278 { 279 int old_time; 280 int ret = 1; 281 282 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 283 KASSERT(new->to_kclock == KCLOCK_NONE); 284 KASSERT(to_ticks >= 0); 285 286 mtx_enter(&timeout_mutex); 287 288 /* Initialize the time here, it won't change. */ 289 old_time = new->to_time; 290 new->to_time = to_ticks + ticks; 291 CLR(new->to_flags, TIMEOUT_TRIGGERED); 292 293 /* 294 * If this timeout already is scheduled and now is moved 295 * earlier, reschedule it now. Otherwise leave it in place 296 * and let it be rescheduled later. 297 */ 298 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 299 if (new->to_time - ticks < old_time - ticks) { 300 CIRCQ_REMOVE(&new->to_list); 301 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 302 } 303 tostat.tos_readded++; 304 ret = 0; 305 } else { 306 SET(new->to_flags, TIMEOUT_ONQUEUE); 307 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 308 } 309 #if NKCOV > 0 310 new->to_process = curproc->p_p; 311 #endif 312 tostat.tos_added++; 313 mtx_leave(&timeout_mutex); 314 315 return ret; 316 } 317 318 int 319 timeout_add_tv(struct timeout *to, const struct timeval *tv) 320 { 321 uint64_t to_ticks; 322 323 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 324 if (to_ticks > INT_MAX) 325 to_ticks = INT_MAX; 326 if (to_ticks == 0 && tv->tv_usec > 0) 327 to_ticks = 1; 328 329 return timeout_add(to, (int)to_ticks); 330 } 331 332 int 333 timeout_add_sec(struct timeout *to, int secs) 334 { 335 uint64_t to_ticks; 336 337 to_ticks = (uint64_t)hz * secs; 338 if (to_ticks > INT_MAX) 339 to_ticks = INT_MAX; 340 if (to_ticks == 0) 341 to_ticks = 1; 342 343 return timeout_add(to, (int)to_ticks); 344 } 345 346 int 347 timeout_add_msec(struct timeout *to, int msecs) 348 { 349 uint64_t to_ticks; 350 351 to_ticks = (uint64_t)msecs * 1000 / tick; 352 if (to_ticks > INT_MAX) 353 to_ticks = INT_MAX; 354 if (to_ticks == 0 && msecs > 0) 355 to_ticks = 1; 356 357 return timeout_add(to, (int)to_ticks); 358 } 359 360 int 361 timeout_add_usec(struct timeout *to, int usecs) 362 { 363 int to_ticks = usecs / tick; 364 365 if (to_ticks == 0 && usecs > 0) 366 to_ticks = 1; 367 368 return timeout_add(to, to_ticks); 369 } 370 371 int 372 timeout_add_nsec(struct timeout *to, int nsecs) 373 { 374 int to_ticks = nsecs / (tick * 1000); 375 376 if (to_ticks == 0 && nsecs > 0) 377 to_ticks = 1; 378 379 return timeout_add(to, to_ticks); 380 } 381 382 int 383 timeout_abs_ts(struct timeout *to, const struct timespec *abstime) 384 { 385 struct timespec old_abstime; 386 int ret = 1; 387 388 mtx_enter(&timeout_mutex); 389 390 KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED)); 391 KASSERT(to->to_kclock != KCLOCK_NONE); 392 393 old_abstime = to->to_abstime; 394 to->to_abstime = *abstime; 395 CLR(to->to_flags, TIMEOUT_TRIGGERED); 396 397 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 398 if (timespeccmp(abstime, &old_abstime, <)) { 399 CIRCQ_REMOVE(&to->to_list); 400 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 401 } 402 tostat.tos_readded++; 403 ret = 0; 404 } else { 405 SET(to->to_flags, TIMEOUT_ONQUEUE); 406 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 407 } 408 #if NKCOV > 0 409 to->to_process = curproc->p_p; 410 #endif 411 tostat.tos_added++; 412 413 mtx_leave(&timeout_mutex); 414 415 return ret; 416 } 417 418 int 419 timeout_del(struct timeout *to) 420 { 421 int ret = 0; 422 423 mtx_enter(&timeout_mutex); 424 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 425 CIRCQ_REMOVE(&to->to_list); 426 CLR(to->to_flags, TIMEOUT_ONQUEUE); 427 tostat.tos_cancelled++; 428 ret = 1; 429 } 430 CLR(to->to_flags, TIMEOUT_TRIGGERED); 431 tostat.tos_deleted++; 432 mtx_leave(&timeout_mutex); 433 434 return ret; 435 } 436 437 int 438 timeout_del_barrier(struct timeout *to) 439 { 440 int removed; 441 442 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 443 444 removed = timeout_del(to); 445 if (!removed) 446 timeout_barrier(to); 447 448 return removed; 449 } 450 451 void 452 timeout_barrier(struct timeout *to) 453 { 454 struct timeout barrier; 455 struct cond c; 456 int procflag; 457 458 procflag = (to->to_flags & TIMEOUT_PROC); 459 timeout_sync_order(procflag); 460 461 timeout_set_flags(&barrier, timeout_barrier_timeout, &c, KCLOCK_NONE, 462 procflag); 463 barrier.to_process = curproc->p_p; 464 cond_init(&c); 465 466 mtx_enter(&timeout_mutex); 467 468 barrier.to_time = ticks; 469 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 470 if (procflag) 471 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 472 else 473 CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list); 474 475 mtx_leave(&timeout_mutex); 476 477 if (procflag) 478 wakeup_one(&timeout_proc); 479 else 480 softintr_schedule(softclock_si); 481 482 cond_wait(&c, "tmobar"); 483 } 484 485 void 486 timeout_barrier_timeout(void *arg) 487 { 488 struct cond *c = arg; 489 490 cond_signal(c); 491 } 492 493 uint32_t 494 timeout_bucket(const struct timeout *to) 495 { 496 struct timespec diff, shifted_abstime; 497 struct kclock *kc; 498 uint32_t level; 499 500 KASSERT(to->to_kclock == KCLOCK_UPTIME); 501 kc = &timeout_kclock[to->to_kclock]; 502 503 KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); 504 timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); 505 for (level = 0; level < nitems(timeout_level_width) - 1; level++) { 506 if (diff.tv_sec < timeout_level_width[level]) 507 break; 508 } 509 timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime); 510 return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime); 511 } 512 513 /* 514 * Hash the absolute time into a bucket on a given level of the wheel. 515 * 516 * The complete hash is 32 bits. The upper 25 bits are seconds, the 517 * lower 7 bits are nanoseconds. tv_nsec is a positive value less 518 * than one billion so we need to divide it to isolate the desired 519 * bits. We can't just shift it. 520 * 521 * The level is used to isolate an 8-bit portion of the hash. The 522 * resulting number indicates which bucket the absolute time belongs 523 * in on the given level of the wheel. 524 */ 525 uint32_t 526 timeout_maskwheel(uint32_t level, const struct timespec *abstime) 527 { 528 uint32_t hi, lo; 529 530 hi = abstime->tv_sec << 7; 531 lo = abstime->tv_nsec / 7812500; 532 533 return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; 534 } 535 536 /* 537 * This is called from hardclock() on the primary CPU at the start of 538 * every tick. 539 */ 540 void 541 timeout_hardclock_update(void) 542 { 543 struct timespec elapsed, now; 544 struct kclock *kc; 545 struct timespec *lastscan; 546 int b, done, first, i, last, level, need_softclock, off; 547 548 nanouptime(&now); 549 lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; 550 timespecsub(&now, lastscan, &elapsed); 551 need_softclock = 1; 552 553 mtx_enter(&timeout_mutex); 554 555 MOVEBUCKET(0, ticks); 556 if (MASKWHEEL(0, ticks) == 0) { 557 MOVEBUCKET(1, ticks); 558 if (MASKWHEEL(1, ticks) == 0) { 559 MOVEBUCKET(2, ticks); 560 if (MASKWHEEL(2, ticks) == 0) 561 MOVEBUCKET(3, ticks); 562 } 563 } 564 565 /* 566 * Dump the buckets that expired while we were away. 567 * 568 * If the elapsed time has exceeded a level's limit then we need 569 * to dump every bucket in the level. We have necessarily completed 570 * a lap of that level, too, so we need to process buckets in the 571 * next level. 572 * 573 * Otherwise we need to compare indices: if the index of the first 574 * expired bucket is greater than that of the last then we have 575 * completed a lap of the level and need to process buckets in the 576 * next level. 577 */ 578 for (level = 0; level < nitems(timeout_level_width); level++) { 579 first = timeout_maskwheel(level, lastscan); 580 if (elapsed.tv_sec >= timeout_level_width[level]) { 581 last = (first == 0) ? WHEELSIZE - 1 : first - 1; 582 done = 0; 583 } else { 584 last = timeout_maskwheel(level, &now); 585 done = first <= last; 586 } 587 off = level * WHEELSIZE; 588 for (b = first;; b = (b + 1) % WHEELSIZE) { 589 CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); 590 if (b == last) 591 break; 592 } 593 if (done) 594 break; 595 } 596 597 /* 598 * Update the cached state for each kclock. 599 */ 600 for (i = 0; i < nitems(timeout_kclock); i++) { 601 kc = &timeout_kclock[i]; 602 timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); 603 timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); 604 } 605 606 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) 607 need_softclock = 0; 608 609 mtx_leave(&timeout_mutex); 610 611 if (need_softclock) 612 softintr_schedule(softclock_si); 613 } 614 615 void 616 timeout_run(struct timeout *to) 617 { 618 void (*fn)(void *); 619 void *arg; 620 int needsproc; 621 622 MUTEX_ASSERT_LOCKED(&timeout_mutex); 623 624 CLR(to->to_flags, TIMEOUT_ONQUEUE); 625 SET(to->to_flags, TIMEOUT_TRIGGERED); 626 627 fn = to->to_func; 628 arg = to->to_arg; 629 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 630 #if NKCOV > 0 631 struct process *kcov_process = to->to_process; 632 #endif 633 634 mtx_leave(&timeout_mutex); 635 timeout_sync_enter(needsproc); 636 #if NKCOV > 0 637 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); 638 #endif 639 fn(arg); 640 #if NKCOV > 0 641 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); 642 #endif 643 timeout_sync_leave(needsproc); 644 mtx_enter(&timeout_mutex); 645 } 646 647 void 648 softclock_process_kclock_timeout(struct timeout *to, int new) 649 { 650 struct kclock *kc = &timeout_kclock[to->to_kclock]; 651 652 if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { 653 tostat.tos_scheduled++; 654 if (!new) 655 tostat.tos_rescheduled++; 656 CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], 657 &to->to_list); 658 return; 659 } 660 if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) 661 tostat.tos_late++; 662 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 663 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 664 return; 665 } 666 timeout_run(to); 667 tostat.tos_run_softclock++; 668 } 669 670 void 671 softclock_process_tick_timeout(struct timeout *to, int new) 672 { 673 int delta = to->to_time - ticks; 674 675 if (delta > 0) { 676 tostat.tos_scheduled++; 677 if (!new) 678 tostat.tos_rescheduled++; 679 CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); 680 return; 681 } 682 if (!new && delta < 0) 683 tostat.tos_late++; 684 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 685 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 686 return; 687 } 688 timeout_run(to); 689 tostat.tos_run_softclock++; 690 } 691 692 /* 693 * Timeouts are processed here instead of timeout_hardclock_update() 694 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 695 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 696 * so the system remains responsive even if there is a surge of timeouts. 697 */ 698 void 699 softclock(void *arg) 700 { 701 struct timeout *first_new, *to; 702 int needsproc, new; 703 704 first_new = NULL; 705 new = 0; 706 707 mtx_enter(&timeout_mutex); 708 if (!CIRCQ_EMPTY(&timeout_new)) 709 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); 710 CIRCQ_CONCAT(&timeout_todo, &timeout_new); 711 while (!CIRCQ_EMPTY(&timeout_todo)) { 712 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 713 CIRCQ_REMOVE(&to->to_list); 714 if (to == first_new) 715 new = 1; 716 if (to->to_kclock != KCLOCK_NONE) 717 softclock_process_kclock_timeout(to, new); 718 else 719 softclock_process_tick_timeout(to, new); 720 } 721 tostat.tos_softclocks++; 722 needsproc = !CIRCQ_EMPTY(&timeout_proc); 723 mtx_leave(&timeout_mutex); 724 725 if (needsproc) 726 wakeup(&timeout_proc); 727 } 728 729 void 730 softclock_create_thread(void *arg) 731 { 732 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 733 panic("fork softclock"); 734 } 735 736 void 737 softclock_thread(void *arg) 738 { 739 CPU_INFO_ITERATOR cii; 740 struct cpu_info *ci; 741 struct sleep_state sls; 742 struct timeout *to; 743 int s; 744 745 KERNEL_ASSERT_LOCKED(); 746 747 /* Be conservative for the moment */ 748 CPU_INFO_FOREACH(cii, ci) { 749 if (CPU_IS_PRIMARY(ci)) 750 break; 751 } 752 KASSERT(ci != NULL); 753 sched_peg_curproc(ci); 754 755 s = splsoftclock(); 756 for (;;) { 757 sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0); 758 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 759 760 mtx_enter(&timeout_mutex); 761 while (!CIRCQ_EMPTY(&timeout_proc)) { 762 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 763 CIRCQ_REMOVE(&to->to_list); 764 timeout_run(to); 765 tostat.tos_run_thread++; 766 } 767 tostat.tos_thread_wakeups++; 768 mtx_leave(&timeout_mutex); 769 } 770 splx(s); 771 } 772 773 #ifndef SMALL_KERNEL 774 void 775 timeout_adjust_ticks(int adj) 776 { 777 struct timeout *to; 778 struct circq *p; 779 int new_ticks, b; 780 781 /* adjusting the monotonic clock backwards would be a Bad Thing */ 782 if (adj <= 0) 783 return; 784 785 mtx_enter(&timeout_mutex); 786 new_ticks = ticks + adj; 787 for (b = 0; b < nitems(timeout_wheel); b++) { 788 p = CIRCQ_FIRST(&timeout_wheel[b]); 789 while (p != &timeout_wheel[b]) { 790 to = timeout_from_circq(p); 791 p = CIRCQ_FIRST(p); 792 793 /* when moving a timeout forward need to reinsert it */ 794 if (to->to_time - ticks < adj) 795 to->to_time = new_ticks; 796 CIRCQ_REMOVE(&to->to_list); 797 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 798 } 799 } 800 ticks = new_ticks; 801 mtx_leave(&timeout_mutex); 802 } 803 #endif 804 805 int 806 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 807 { 808 struct timeoutstat status; 809 810 mtx_enter(&timeout_mutex); 811 memcpy(&status, &tostat, sizeof(status)); 812 mtx_leave(&timeout_mutex); 813 814 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 815 } 816 817 #ifdef DDB 818 const char *db_kclock(int); 819 void db_show_callout_bucket(struct circq *); 820 void db_show_timeout(struct timeout *, struct circq *); 821 const char *db_timespec(const struct timespec *); 822 823 const char * 824 db_kclock(int kclock) 825 { 826 switch (kclock) { 827 case KCLOCK_UPTIME: 828 return "uptime"; 829 default: 830 return "invalid"; 831 } 832 } 833 834 const char * 835 db_timespec(const struct timespec *ts) 836 { 837 static char buf[32]; 838 struct timespec tmp, zero; 839 840 if (ts->tv_sec >= 0) { 841 snprintf(buf, sizeof(buf), "%lld.%09ld", 842 ts->tv_sec, ts->tv_nsec); 843 return buf; 844 } 845 846 timespecclear(&zero); 847 timespecsub(&zero, ts, &tmp); 848 snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec); 849 return buf; 850 } 851 852 void 853 db_show_callout_bucket(struct circq *bucket) 854 { 855 struct circq *p; 856 857 CIRCQ_FOREACH(p, bucket) 858 db_show_timeout(timeout_from_circq(p), bucket); 859 } 860 861 void 862 db_show_timeout(struct timeout *to, struct circq *bucket) 863 { 864 struct timespec remaining; 865 struct kclock *kc; 866 char buf[8]; 867 db_expr_t offset; 868 struct circq *wheel; 869 char *name, *where; 870 int width = sizeof(long) * 2; 871 872 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 873 name = name ? name : "?"; 874 if (bucket == &timeout_new) 875 where = "new"; 876 else if (bucket == &timeout_todo) 877 where = "softint"; 878 else if (bucket == &timeout_proc) 879 where = "thread"; 880 else { 881 if (to->to_kclock != KCLOCK_NONE) 882 wheel = timeout_wheel_kc; 883 else 884 wheel = timeout_wheel; 885 snprintf(buf, sizeof(buf), "%3ld/%1ld", 886 (bucket - wheel) % WHEELSIZE, 887 (bucket - wheel) / WHEELSIZE); 888 where = buf; 889 } 890 if (to->to_kclock != KCLOCK_NONE) { 891 kc = &timeout_kclock[to->to_kclock]; 892 timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); 893 db_printf("%20s %8s %7s 0x%0*lx %s\n", 894 db_timespec(&remaining), db_kclock(to->to_kclock), where, 895 width, (ulong)to->to_arg, name); 896 } else { 897 db_printf("%20d %8s %7s 0x%0*lx %s\n", 898 to->to_time - ticks, "ticks", where, 899 width, (ulong)to->to_arg, name); 900 } 901 } 902 903 void 904 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 905 { 906 struct kclock *kc; 907 int width = sizeof(long) * 2 + 2; 908 int b, i; 909 910 db_printf("%20s %8s\n", "lastscan", "clock"); 911 db_printf("%20d %8s\n", ticks, "ticks"); 912 for (i = 0; i < nitems(timeout_kclock); i++) { 913 kc = &timeout_kclock[i]; 914 db_printf("%20s %8s\n", 915 db_timespec(&kc->kc_lastscan), db_kclock(i)); 916 } 917 db_printf("\n"); 918 db_printf("%20s %8s %7s %*s %s\n", 919 "remaining", "clock", "wheel", width, "arg", "func"); 920 db_show_callout_bucket(&timeout_new); 921 db_show_callout_bucket(&timeout_todo); 922 db_show_callout_bucket(&timeout_proc); 923 for (b = 0; b < nitems(timeout_wheel); b++) 924 db_show_callout_bucket(&timeout_wheel[b]); 925 for (b = 0; b < nitems(timeout_wheel_kc); b++) 926 db_show_callout_bucket(&timeout_wheel_kc[b]); 927 } 928 #endif 929