1 /* $OpenBSD: kern_timeout.c,v 1.85 2021/06/19 02:05:33 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 #include "kcov.h" 47 #if NKCOV > 0 48 #include <sys/kcov.h> 49 #endif 50 51 /* 52 * Locks used to protect global variables in this file: 53 * 54 * I immutable after initialization 55 * T timeout_mutex 56 */ 57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 58 59 void *softclock_si; /* [I] softclock() interrupt handle */ 60 struct timeoutstat tostat; /* [T] statistics and totals */ 61 62 /* 63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 64 * of the global variable "ticks" when the timeout should be called. There are 65 * four levels with 256 buckets each. 66 */ 67 #define WHEELCOUNT 4 68 #define WHEELSIZE 256 69 #define WHEELMASK 255 70 #define WHEELBITS 8 71 #define BUCKETS (WHEELCOUNT * WHEELSIZE) 72 73 struct circq timeout_wheel[BUCKETS]; /* [T] Tick-based timeouts */ 74 struct circq timeout_wheel_kc[BUCKETS]; /* [T] Clock-based timeouts */ 75 struct circq timeout_new; /* [T] New, unscheduled timeouts */ 76 struct circq timeout_todo; /* [T] Due or needs rescheduling */ 77 struct circq timeout_proc; /* [T] Due + needs process context */ 78 79 time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width (seconds) */ 80 struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */ 81 82 struct kclock { 83 struct timespec kc_lastscan; /* [T] Clock time at last wheel scan */ 84 struct timespec kc_late; /* [T] Late if due prior */ 85 struct timespec kc_offset; /* [T] Offset from primary kclock */ 86 } timeout_kclock[KCLOCK_MAX]; 87 88 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 89 90 #define BUCKET(rel, abs) \ 91 (timeout_wheel[ \ 92 ((rel) <= (1 << (2*WHEELBITS))) \ 93 ? ((rel) <= (1 << WHEELBITS)) \ 94 ? MASKWHEEL(0, (abs)) \ 95 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 96 : ((rel) <= (1 << (3*WHEELBITS))) \ 97 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 98 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 99 100 #define MOVEBUCKET(wheel, time) \ 101 CIRCQ_CONCAT(&timeout_todo, \ 102 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 103 104 /* 105 * Circular queue definitions. 106 */ 107 108 #define CIRCQ_INIT(elem) do { \ 109 (elem)->next = (elem); \ 110 (elem)->prev = (elem); \ 111 } while (0) 112 113 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 114 (elem)->prev = (list)->prev; \ 115 (elem)->next = (list); \ 116 (list)->prev->next = (elem); \ 117 (list)->prev = (elem); \ 118 tostat.tos_pending++; \ 119 } while (0) 120 121 #define CIRCQ_CONCAT(fst, snd) do { \ 122 if (!CIRCQ_EMPTY(snd)) { \ 123 (fst)->prev->next = (snd)->next;\ 124 (snd)->next->prev = (fst)->prev;\ 125 (snd)->prev->next = (fst); \ 126 (fst)->prev = (snd)->prev; \ 127 CIRCQ_INIT(snd); \ 128 } \ 129 } while (0) 130 131 #define CIRCQ_REMOVE(elem) do { \ 132 (elem)->next->prev = (elem)->prev; \ 133 (elem)->prev->next = (elem)->next; \ 134 _Q_INVALIDATE((elem)->prev); \ 135 _Q_INVALIDATE((elem)->next); \ 136 tostat.tos_pending--; \ 137 } while (0) 138 139 #define CIRCQ_FIRST(elem) ((elem)->next) 140 141 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 142 143 #define CIRCQ_FOREACH(elem, list) \ 144 for ((elem) = CIRCQ_FIRST(list); \ 145 (elem) != (list); \ 146 (elem) = CIRCQ_FIRST(elem)) 147 148 #ifdef WITNESS 149 struct lock_object timeout_sleeplock_obj = { 150 .lo_name = "timeout", 151 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 152 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 153 }; 154 struct lock_object timeout_spinlock_obj = { 155 .lo_name = "timeout", 156 .lo_flags = LO_WITNESS | LO_INITIALIZED | 157 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 158 }; 159 struct lock_type timeout_sleeplock_type = { 160 .lt_name = "timeout" 161 }; 162 struct lock_type timeout_spinlock_type = { 163 .lt_name = "timeout" 164 }; 165 #define TIMEOUT_LOCK_OBJ(needsproc) \ 166 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 167 #endif 168 169 void kclock_nanotime(int, struct timespec *); 170 void softclock(void *); 171 void softclock_create_thread(void *); 172 void softclock_process_kclock_timeout(struct timeout *, int); 173 void softclock_process_tick_timeout(struct timeout *, int); 174 void softclock_thread(void *); 175 void timeout_barrier_timeout(void *); 176 uint32_t timeout_bucket(const struct timeout *); 177 uint32_t timeout_maskwheel(uint32_t, const struct timespec *); 178 void timeout_run(struct timeout *); 179 180 /* 181 * The first thing in a struct timeout is its struct circq, so we 182 * can get back from a pointer to the latter to a pointer to the 183 * whole timeout with just a cast. 184 */ 185 static inline struct timeout * 186 timeout_from_circq(struct circq *p) 187 { 188 return ((struct timeout *)(p)); 189 } 190 191 static inline void 192 timeout_sync_order(int needsproc) 193 { 194 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 195 } 196 197 static inline void 198 timeout_sync_enter(int needsproc) 199 { 200 timeout_sync_order(needsproc); 201 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 202 } 203 204 static inline void 205 timeout_sync_leave(int needsproc) 206 { 207 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 208 } 209 210 /* 211 * Some of the "math" in here is a bit tricky. 212 * 213 * We have to beware of wrapping ints. 214 * We use the fact that any element added to the queue must be added with a 215 * positive time. That means that any element `to' on the queue cannot be 216 * scheduled to timeout further in time than INT_MAX, but to->to_time can 217 * be positive or negative so comparing it with anything is dangerous. 218 * The only way we can use the to->to_time value in any predictable way 219 * is when we calculate how far in the future `to' will timeout - 220 * "to->to_time - ticks". The result will always be positive for future 221 * timeouts and 0 or negative for due timeouts. 222 */ 223 224 void 225 timeout_startup(void) 226 { 227 int b, level; 228 229 CIRCQ_INIT(&timeout_new); 230 CIRCQ_INIT(&timeout_todo); 231 CIRCQ_INIT(&timeout_proc); 232 for (b = 0; b < nitems(timeout_wheel); b++) 233 CIRCQ_INIT(&timeout_wheel[b]); 234 for (b = 0; b < nitems(timeout_wheel_kc); b++) 235 CIRCQ_INIT(&timeout_wheel_kc[b]); 236 237 for (level = 0; level < nitems(timeout_level_width); level++) 238 timeout_level_width[level] = 2 << (level * WHEELBITS); 239 NSEC_TO_TIMESPEC(tick_nsec, &tick_ts); 240 } 241 242 void 243 timeout_proc_init(void) 244 { 245 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 246 if (softclock_si == NULL) 247 panic("%s: unable to register softclock interrupt", __func__); 248 249 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 250 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 251 252 kthread_create_deferred(softclock_create_thread, NULL); 253 } 254 255 static inline void 256 _timeout_set(struct timeout *to, void (*fn)(void *), void *arg, int kclock, 257 int flags) 258 { 259 to->to_func = fn; 260 to->to_arg = arg; 261 to->to_kclock = kclock; 262 to->to_flags = flags | TIMEOUT_INITIALIZED; 263 } 264 265 void 266 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 267 { 268 _timeout_set(new, fn, arg, KCLOCK_NONE, 0); 269 } 270 271 void 272 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) 273 { 274 _timeout_set(to, fn, arg, KCLOCK_NONE, flags); 275 } 276 277 void 278 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 279 { 280 _timeout_set(new, fn, arg, KCLOCK_NONE, TIMEOUT_PROC); 281 } 282 283 void 284 timeout_set_kclock(struct timeout *to, void (*fn)(void *), void *arg, 285 int kclock, int flags) 286 { 287 _timeout_set(to, fn, arg, kclock, flags | TIMEOUT_KCLOCK); 288 } 289 290 int 291 timeout_add(struct timeout *new, int to_ticks) 292 { 293 int old_time; 294 int ret = 1; 295 296 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 297 KASSERT(!ISSET(new->to_flags, TIMEOUT_KCLOCK)); 298 KASSERT(new->to_kclock == KCLOCK_NONE); 299 KASSERT(to_ticks >= 0); 300 301 mtx_enter(&timeout_mutex); 302 303 /* Initialize the time here, it won't change. */ 304 old_time = new->to_time; 305 new->to_time = to_ticks + ticks; 306 CLR(new->to_flags, TIMEOUT_TRIGGERED); 307 308 /* 309 * If this timeout already is scheduled and now is moved 310 * earlier, reschedule it now. Otherwise leave it in place 311 * and let it be rescheduled later. 312 */ 313 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 314 if (new->to_time - ticks < old_time - ticks) { 315 CIRCQ_REMOVE(&new->to_list); 316 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 317 } 318 tostat.tos_readded++; 319 ret = 0; 320 } else { 321 SET(new->to_flags, TIMEOUT_ONQUEUE); 322 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 323 } 324 #if NKCOV > 0 325 new->to_process = curproc->p_p; 326 #endif 327 tostat.tos_added++; 328 mtx_leave(&timeout_mutex); 329 330 return ret; 331 } 332 333 int 334 timeout_add_tv(struct timeout *to, const struct timeval *tv) 335 { 336 uint64_t to_ticks; 337 338 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 339 if (to_ticks > INT_MAX) 340 to_ticks = INT_MAX; 341 if (to_ticks == 0 && tv->tv_usec > 0) 342 to_ticks = 1; 343 344 return timeout_add(to, (int)to_ticks); 345 } 346 347 int 348 timeout_add_sec(struct timeout *to, int secs) 349 { 350 uint64_t to_ticks; 351 352 to_ticks = (uint64_t)hz * secs; 353 if (to_ticks > INT_MAX) 354 to_ticks = INT_MAX; 355 if (to_ticks == 0) 356 to_ticks = 1; 357 358 return timeout_add(to, (int)to_ticks); 359 } 360 361 int 362 timeout_add_msec(struct timeout *to, int msecs) 363 { 364 uint64_t to_ticks; 365 366 to_ticks = (uint64_t)msecs * 1000 / tick; 367 if (to_ticks > INT_MAX) 368 to_ticks = INT_MAX; 369 if (to_ticks == 0 && msecs > 0) 370 to_ticks = 1; 371 372 return timeout_add(to, (int)to_ticks); 373 } 374 375 int 376 timeout_add_usec(struct timeout *to, int usecs) 377 { 378 int to_ticks = usecs / tick; 379 380 if (to_ticks == 0 && usecs > 0) 381 to_ticks = 1; 382 383 return timeout_add(to, to_ticks); 384 } 385 386 int 387 timeout_add_nsec(struct timeout *to, int nsecs) 388 { 389 int to_ticks = nsecs / (tick * 1000); 390 391 if (to_ticks == 0 && nsecs > 0) 392 to_ticks = 1; 393 394 return timeout_add(to, to_ticks); 395 } 396 397 int 398 timeout_at_ts(struct timeout *to, const struct timespec *abstime) 399 { 400 struct timespec old_abstime; 401 int ret = 1; 402 403 mtx_enter(&timeout_mutex); 404 405 KASSERT(ISSET(to->to_flags, TIMEOUT_INITIALIZED | TIMEOUT_KCLOCK)); 406 KASSERT(to->to_kclock != KCLOCK_NONE); 407 408 old_abstime = to->to_abstime; 409 to->to_abstime = *abstime; 410 CLR(to->to_flags, TIMEOUT_TRIGGERED); 411 412 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 413 if (timespeccmp(abstime, &old_abstime, <)) { 414 CIRCQ_REMOVE(&to->to_list); 415 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 416 } 417 tostat.tos_readded++; 418 ret = 0; 419 } else { 420 SET(to->to_flags, TIMEOUT_ONQUEUE); 421 CIRCQ_INSERT_TAIL(&timeout_new, &to->to_list); 422 } 423 #if NKCOV > 0 424 to->to_process = curproc->p_p; 425 #endif 426 tostat.tos_added++; 427 428 mtx_leave(&timeout_mutex); 429 430 return ret; 431 } 432 433 int 434 timeout_in_nsec(struct timeout *to, uint64_t nsecs) 435 { 436 struct timespec abstime, interval, now; 437 438 kclock_nanotime(to->to_kclock, &now); 439 NSEC_TO_TIMESPEC(nsecs, &interval); 440 timespecadd(&now, &interval, &abstime); 441 442 return timeout_at_ts(to, &abstime); 443 } 444 445 void 446 kclock_nanotime(int kclock, struct timespec *now) 447 { 448 switch (kclock) { 449 case KCLOCK_UPTIME: 450 nanouptime(now); 451 break; 452 default: 453 panic("invalid kclock: 0x%x", kclock); 454 } 455 } 456 457 int 458 timeout_del(struct timeout *to) 459 { 460 int ret = 0; 461 462 mtx_enter(&timeout_mutex); 463 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 464 CIRCQ_REMOVE(&to->to_list); 465 CLR(to->to_flags, TIMEOUT_ONQUEUE); 466 tostat.tos_cancelled++; 467 ret = 1; 468 } 469 CLR(to->to_flags, TIMEOUT_TRIGGERED); 470 tostat.tos_deleted++; 471 mtx_leave(&timeout_mutex); 472 473 return ret; 474 } 475 476 int 477 timeout_del_barrier(struct timeout *to) 478 { 479 int removed; 480 481 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 482 483 removed = timeout_del(to); 484 if (!removed) 485 timeout_barrier(to); 486 487 return removed; 488 } 489 490 void 491 timeout_barrier(struct timeout *to) 492 { 493 struct timeout barrier; 494 struct cond c; 495 int procflag; 496 497 procflag = (to->to_flags & TIMEOUT_PROC); 498 timeout_sync_order(procflag); 499 500 timeout_set_flags(&barrier, timeout_barrier_timeout, &c, procflag); 501 barrier.to_process = curproc->p_p; 502 cond_init(&c); 503 504 mtx_enter(&timeout_mutex); 505 506 barrier.to_time = ticks; 507 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 508 if (procflag) 509 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 510 else 511 CIRCQ_INSERT_TAIL(&timeout_todo, &barrier.to_list); 512 513 mtx_leave(&timeout_mutex); 514 515 if (procflag) 516 wakeup_one(&timeout_proc); 517 else 518 softintr_schedule(softclock_si); 519 520 cond_wait(&c, "tmobar"); 521 } 522 523 void 524 timeout_barrier_timeout(void *arg) 525 { 526 struct cond *c = arg; 527 528 cond_signal(c); 529 } 530 531 uint32_t 532 timeout_bucket(const struct timeout *to) 533 { 534 struct kclock *kc = &timeout_kclock[to->to_kclock]; 535 struct timespec diff, shifted_abstime; 536 uint32_t level; 537 538 KASSERT(ISSET(to->to_flags, TIMEOUT_KCLOCK)); 539 KASSERT(timespeccmp(&kc->kc_lastscan, &to->to_abstime, <)); 540 541 timespecsub(&to->to_abstime, &kc->kc_lastscan, &diff); 542 for (level = 0; level < nitems(timeout_level_width) - 1; level++) { 543 if (diff.tv_sec < timeout_level_width[level]) 544 break; 545 } 546 timespecadd(&to->to_abstime, &kc->kc_offset, &shifted_abstime); 547 return level * WHEELSIZE + timeout_maskwheel(level, &shifted_abstime); 548 } 549 550 /* 551 * Hash the absolute time into a bucket on a given level of the wheel. 552 * 553 * The complete hash is 32 bits. The upper 25 bits are seconds, the 554 * lower 7 bits are nanoseconds. tv_nsec is a positive value less 555 * than one billion so we need to divide it to isolate the desired 556 * bits. We can't just shift it. 557 * 558 * The level is used to isolate an 8-bit portion of the hash. The 559 * resulting number indicates which bucket the absolute time belongs 560 * in on the given level of the wheel. 561 */ 562 uint32_t 563 timeout_maskwheel(uint32_t level, const struct timespec *abstime) 564 { 565 uint32_t hi, lo; 566 567 hi = abstime->tv_sec << 7; 568 lo = abstime->tv_nsec / 7812500; 569 570 return ((hi | lo) >> (level * WHEELBITS)) & WHEELMASK; 571 } 572 573 /* 574 * This is called from hardclock() on the primary CPU at the start of 575 * every tick. 576 */ 577 void 578 timeout_hardclock_update(void) 579 { 580 struct timespec elapsed, now; 581 struct kclock *kc; 582 struct timespec *lastscan; 583 int b, done, first, i, last, level, need_softclock, off; 584 585 nanouptime(&now); 586 lastscan = &timeout_kclock[KCLOCK_UPTIME].kc_lastscan; 587 timespecsub(&now, lastscan, &elapsed); 588 need_softclock = 1; 589 590 mtx_enter(&timeout_mutex); 591 592 MOVEBUCKET(0, ticks); 593 if (MASKWHEEL(0, ticks) == 0) { 594 MOVEBUCKET(1, ticks); 595 if (MASKWHEEL(1, ticks) == 0) { 596 MOVEBUCKET(2, ticks); 597 if (MASKWHEEL(2, ticks) == 0) 598 MOVEBUCKET(3, ticks); 599 } 600 } 601 602 /* 603 * Dump the buckets that expired while we were away. 604 * 605 * If the elapsed time has exceeded a level's limit then we need 606 * to dump every bucket in the level. We have necessarily completed 607 * a lap of that level, too, so we need to process buckets in the 608 * next level. 609 * 610 * Otherwise we need to compare indices: if the index of the first 611 * expired bucket is greater than that of the last then we have 612 * completed a lap of the level and need to process buckets in the 613 * next level. 614 */ 615 for (level = 0; level < nitems(timeout_level_width); level++) { 616 first = timeout_maskwheel(level, lastscan); 617 if (elapsed.tv_sec >= timeout_level_width[level]) { 618 last = (first == 0) ? WHEELSIZE - 1 : first - 1; 619 done = 0; 620 } else { 621 last = timeout_maskwheel(level, &now); 622 done = first <= last; 623 } 624 off = level * WHEELSIZE; 625 for (b = first;; b = (b + 1) % WHEELSIZE) { 626 CIRCQ_CONCAT(&timeout_todo, &timeout_wheel_kc[off + b]); 627 if (b == last) 628 break; 629 } 630 if (done) 631 break; 632 } 633 634 /* 635 * Update the cached state for each kclock. 636 */ 637 for (i = 0; i < nitems(timeout_kclock); i++) { 638 kc = &timeout_kclock[i]; 639 timespecadd(&now, &kc->kc_offset, &kc->kc_lastscan); 640 timespecsub(&kc->kc_lastscan, &tick_ts, &kc->kc_late); 641 } 642 643 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) 644 need_softclock = 0; 645 646 mtx_leave(&timeout_mutex); 647 648 if (need_softclock) 649 softintr_schedule(softclock_si); 650 } 651 652 void 653 timeout_run(struct timeout *to) 654 { 655 void (*fn)(void *); 656 void *arg; 657 int needsproc; 658 659 MUTEX_ASSERT_LOCKED(&timeout_mutex); 660 661 CLR(to->to_flags, TIMEOUT_ONQUEUE); 662 SET(to->to_flags, TIMEOUT_TRIGGERED); 663 664 fn = to->to_func; 665 arg = to->to_arg; 666 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 667 #if NKCOV > 0 668 struct process *kcov_process = to->to_process; 669 #endif 670 671 mtx_leave(&timeout_mutex); 672 timeout_sync_enter(needsproc); 673 #if NKCOV > 0 674 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); 675 #endif 676 fn(arg); 677 #if NKCOV > 0 678 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); 679 #endif 680 timeout_sync_leave(needsproc); 681 mtx_enter(&timeout_mutex); 682 } 683 684 void 685 softclock_process_kclock_timeout(struct timeout *to, int new) 686 { 687 struct kclock *kc = &timeout_kclock[to->to_kclock]; 688 689 if (timespeccmp(&to->to_abstime, &kc->kc_lastscan, >)) { 690 tostat.tos_scheduled++; 691 if (!new) 692 tostat.tos_rescheduled++; 693 CIRCQ_INSERT_TAIL(&timeout_wheel_kc[timeout_bucket(to)], 694 &to->to_list); 695 return; 696 } 697 if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=)) 698 tostat.tos_late++; 699 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 700 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 701 return; 702 } 703 timeout_run(to); 704 tostat.tos_run_softclock++; 705 } 706 707 void 708 softclock_process_tick_timeout(struct timeout *to, int new) 709 { 710 int delta = to->to_time - ticks; 711 712 if (delta > 0) { 713 tostat.tos_scheduled++; 714 if (!new) 715 tostat.tos_rescheduled++; 716 CIRCQ_INSERT_TAIL(&BUCKET(delta, to->to_time), &to->to_list); 717 return; 718 } 719 if (!new && delta < 0) 720 tostat.tos_late++; 721 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 722 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 723 return; 724 } 725 timeout_run(to); 726 tostat.tos_run_softclock++; 727 } 728 729 /* 730 * Timeouts are processed here instead of timeout_hardclock_update() 731 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 732 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 733 * so the system remains responsive even if there is a surge of timeouts. 734 */ 735 void 736 softclock(void *arg) 737 { 738 struct timeout *first_new, *to; 739 int needsproc, new; 740 741 first_new = NULL; 742 new = 0; 743 744 mtx_enter(&timeout_mutex); 745 if (!CIRCQ_EMPTY(&timeout_new)) 746 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); 747 CIRCQ_CONCAT(&timeout_todo, &timeout_new); 748 while (!CIRCQ_EMPTY(&timeout_todo)) { 749 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 750 CIRCQ_REMOVE(&to->to_list); 751 if (to == first_new) 752 new = 1; 753 if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) 754 softclock_process_kclock_timeout(to, new); 755 else 756 softclock_process_tick_timeout(to, new); 757 } 758 tostat.tos_softclocks++; 759 needsproc = !CIRCQ_EMPTY(&timeout_proc); 760 mtx_leave(&timeout_mutex); 761 762 if (needsproc) 763 wakeup(&timeout_proc); 764 } 765 766 void 767 softclock_create_thread(void *arg) 768 { 769 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 770 panic("fork softclock"); 771 } 772 773 void 774 softclock_thread(void *arg) 775 { 776 CPU_INFO_ITERATOR cii; 777 struct cpu_info *ci; 778 struct sleep_state sls; 779 struct timeout *to; 780 int s; 781 782 KERNEL_ASSERT_LOCKED(); 783 784 /* Be conservative for the moment */ 785 CPU_INFO_FOREACH(cii, ci) { 786 if (CPU_IS_PRIMARY(ci)) 787 break; 788 } 789 KASSERT(ci != NULL); 790 sched_peg_curproc(ci); 791 792 s = splsoftclock(); 793 for (;;) { 794 sleep_setup(&sls, &timeout_proc, PSWP, "bored", 0); 795 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 796 797 mtx_enter(&timeout_mutex); 798 while (!CIRCQ_EMPTY(&timeout_proc)) { 799 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 800 CIRCQ_REMOVE(&to->to_list); 801 timeout_run(to); 802 tostat.tos_run_thread++; 803 } 804 tostat.tos_thread_wakeups++; 805 mtx_leave(&timeout_mutex); 806 } 807 splx(s); 808 } 809 810 #ifndef SMALL_KERNEL 811 void 812 timeout_adjust_ticks(int adj) 813 { 814 struct timeout *to; 815 struct circq *p; 816 int new_ticks, b; 817 818 /* adjusting the monotonic clock backwards would be a Bad Thing */ 819 if (adj <= 0) 820 return; 821 822 mtx_enter(&timeout_mutex); 823 new_ticks = ticks + adj; 824 for (b = 0; b < nitems(timeout_wheel); b++) { 825 p = CIRCQ_FIRST(&timeout_wheel[b]); 826 while (p != &timeout_wheel[b]) { 827 to = timeout_from_circq(p); 828 p = CIRCQ_FIRST(p); 829 830 /* when moving a timeout forward need to reinsert it */ 831 if (to->to_time - ticks < adj) 832 to->to_time = new_ticks; 833 CIRCQ_REMOVE(&to->to_list); 834 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 835 } 836 } 837 ticks = new_ticks; 838 mtx_leave(&timeout_mutex); 839 } 840 #endif 841 842 int 843 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 844 { 845 struct timeoutstat status; 846 847 mtx_enter(&timeout_mutex); 848 memcpy(&status, &tostat, sizeof(status)); 849 mtx_leave(&timeout_mutex); 850 851 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 852 } 853 854 #ifdef DDB 855 const char *db_kclock(int); 856 void db_show_callout_bucket(struct circq *); 857 void db_show_timeout(struct timeout *, struct circq *); 858 const char *db_timespec(const struct timespec *); 859 860 const char * 861 db_kclock(int kclock) 862 { 863 switch (kclock) { 864 case KCLOCK_UPTIME: 865 return "uptime"; 866 default: 867 return "invalid"; 868 } 869 } 870 871 const char * 872 db_timespec(const struct timespec *ts) 873 { 874 static char buf[32]; 875 struct timespec tmp, zero; 876 877 if (ts->tv_sec >= 0) { 878 snprintf(buf, sizeof(buf), "%lld.%09ld", 879 ts->tv_sec, ts->tv_nsec); 880 return buf; 881 } 882 883 timespecclear(&zero); 884 timespecsub(&zero, ts, &tmp); 885 snprintf(buf, sizeof(buf), "-%lld.%09ld", tmp.tv_sec, tmp.tv_nsec); 886 return buf; 887 } 888 889 void 890 db_show_callout_bucket(struct circq *bucket) 891 { 892 struct circq *p; 893 894 CIRCQ_FOREACH(p, bucket) 895 db_show_timeout(timeout_from_circq(p), bucket); 896 } 897 898 void 899 db_show_timeout(struct timeout *to, struct circq *bucket) 900 { 901 struct timespec remaining; 902 struct kclock *kc; 903 char buf[8]; 904 db_expr_t offset; 905 struct circq *wheel; 906 char *name, *where; 907 int width = sizeof(long) * 2; 908 909 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 910 name = name ? name : "?"; 911 if (bucket == &timeout_new) 912 where = "new"; 913 else if (bucket == &timeout_todo) 914 where = "softint"; 915 else if (bucket == &timeout_proc) 916 where = "thread"; 917 else { 918 if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) 919 wheel = timeout_wheel_kc; 920 else 921 wheel = timeout_wheel; 922 snprintf(buf, sizeof(buf), "%3ld/%1ld", 923 (bucket - wheel) % WHEELSIZE, 924 (bucket - wheel) / WHEELSIZE); 925 where = buf; 926 } 927 if (ISSET(to->to_flags, TIMEOUT_KCLOCK)) { 928 kc = &timeout_kclock[to->to_kclock]; 929 timespecsub(&to->to_abstime, &kc->kc_lastscan, &remaining); 930 db_printf("%20s %8s %7s 0x%0*lx %s\n", 931 db_timespec(&remaining), db_kclock(to->to_kclock), where, 932 width, (ulong)to->to_arg, name); 933 } else { 934 db_printf("%20d %8s %7s 0x%0*lx %s\n", 935 to->to_time - ticks, "ticks", where, 936 width, (ulong)to->to_arg, name); 937 } 938 } 939 940 void 941 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 942 { 943 struct kclock *kc; 944 int width = sizeof(long) * 2 + 2; 945 int b, i; 946 947 db_printf("%20s %8s\n", "lastscan", "clock"); 948 db_printf("%20d %8s\n", ticks, "ticks"); 949 for (i = 0; i < nitems(timeout_kclock); i++) { 950 kc = &timeout_kclock[i]; 951 db_printf("%20s %8s\n", 952 db_timespec(&kc->kc_lastscan), db_kclock(i)); 953 } 954 db_printf("\n"); 955 db_printf("%20s %8s %7s %*s %s\n", 956 "remaining", "clock", "wheel", width, "arg", "func"); 957 db_show_callout_bucket(&timeout_new); 958 db_show_callout_bucket(&timeout_todo); 959 db_show_callout_bucket(&timeout_proc); 960 for (b = 0; b < nitems(timeout_wheel); b++) 961 db_show_callout_bucket(&timeout_wheel[b]); 962 for (b = 0; b < nitems(timeout_wheel_kc); b++) 963 db_show_callout_bucket(&timeout_wheel_kc[b]); 964 } 965 #endif 966