1 /* $OpenBSD: kern_timeout.c,v 1.71 2020/01/13 09:51:52 mpi Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/timeout.h> 32 #include <sys/mutex.h> 33 #include <sys/kernel.h> 34 #include <sys/queue.h> /* _Q_INVALIDATE */ 35 #include <sys/sysctl.h> 36 #include <sys/witness.h> 37 38 #ifdef DDB 39 #include <machine/db_machdep.h> 40 #include <ddb/db_interface.h> 41 #include <ddb/db_sym.h> 42 #include <ddb/db_output.h> 43 #endif 44 45 /* 46 * Locks used to protect global variables in this file: 47 * 48 * I immutable after initialization 49 * t timeout_mutex 50 */ 51 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 52 53 void *softclock_si; /* [I] softclock() interrupt handle */ 54 struct timeoutstat tostat; /* [t] statistics and totals */ 55 56 /* 57 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 58 * of the global variable "ticks" when the timeout should be called. There are 59 * four levels with 256 buckets each. 60 */ 61 #define BUCKETS 1024 62 #define WHEELSIZE 256 63 #define WHEELMASK 255 64 #define WHEELBITS 8 65 66 struct circq timeout_wheel[BUCKETS]; /* [t] Queues of timeouts */ 67 struct circq timeout_todo; /* [t] Due or needs scheduling */ 68 struct circq timeout_proc; /* [t] Due + needs process context */ 69 70 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 71 72 #define BUCKET(rel, abs) \ 73 (timeout_wheel[ \ 74 ((rel) <= (1 << (2*WHEELBITS))) \ 75 ? ((rel) <= (1 << WHEELBITS)) \ 76 ? MASKWHEEL(0, (abs)) \ 77 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 78 : ((rel) <= (1 << (3*WHEELBITS))) \ 79 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 80 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 81 82 #define MOVEBUCKET(wheel, time) \ 83 CIRCQ_CONCAT(&timeout_todo, \ 84 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 85 86 /* 87 * Circular queue definitions. 88 */ 89 90 #define CIRCQ_INIT(elem) do { \ 91 (elem)->next = (elem); \ 92 (elem)->prev = (elem); \ 93 } while (0) 94 95 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 96 (elem)->prev = (list)->prev; \ 97 (elem)->next = (list); \ 98 (list)->prev->next = (elem); \ 99 (list)->prev = (elem); \ 100 tostat.tos_pending++; \ 101 } while (0) 102 103 #define CIRCQ_CONCAT(fst, snd) do { \ 104 if (!CIRCQ_EMPTY(snd)) { \ 105 (fst)->prev->next = (snd)->next;\ 106 (snd)->next->prev = (fst)->prev;\ 107 (snd)->prev->next = (fst); \ 108 (fst)->prev = (snd)->prev; \ 109 CIRCQ_INIT(snd); \ 110 } \ 111 } while (0) 112 113 #define CIRCQ_REMOVE(elem) do { \ 114 (elem)->next->prev = (elem)->prev; \ 115 (elem)->prev->next = (elem)->next; \ 116 _Q_INVALIDATE((elem)->prev); \ 117 _Q_INVALIDATE((elem)->next); \ 118 tostat.tos_pending--; \ 119 } while (0) 120 121 #define CIRCQ_FIRST(elem) ((elem)->next) 122 123 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 124 125 #define CIRCQ_FOREACH(elem, list) \ 126 for ((elem) = CIRCQ_FIRST(list); \ 127 (elem) != (list); \ 128 (elem) = CIRCQ_FIRST(elem)) 129 130 #ifdef WITNESS 131 struct lock_object timeout_sleeplock_obj = { 132 .lo_name = "timeout", 133 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 134 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 135 }; 136 struct lock_object timeout_spinlock_obj = { 137 .lo_name = "timeout", 138 .lo_flags = LO_WITNESS | LO_INITIALIZED | 139 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 140 }; 141 struct lock_type timeout_sleeplock_type = { 142 .lt_name = "timeout" 143 }; 144 struct lock_type timeout_spinlock_type = { 145 .lt_name = "timeout" 146 }; 147 #define TIMEOUT_LOCK_OBJ(needsproc) \ 148 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 149 #endif 150 151 void softclock(void *); 152 void softclock_create_thread(void *); 153 void softclock_thread(void *); 154 void timeout_proc_barrier(void *); 155 156 /* 157 * The first thing in a struct timeout is its struct circq, so we 158 * can get back from a pointer to the latter to a pointer to the 159 * whole timeout with just a cast. 160 */ 161 static inline struct timeout * 162 timeout_from_circq(struct circq *p) 163 { 164 return ((struct timeout *)(p)); 165 } 166 167 static inline void 168 timeout_sync_order(int needsproc) 169 { 170 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 171 } 172 173 static inline void 174 timeout_sync_enter(int needsproc) 175 { 176 timeout_sync_order(needsproc); 177 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 178 } 179 180 static inline void 181 timeout_sync_leave(int needsproc) 182 { 183 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 184 } 185 186 /* 187 * Some of the "math" in here is a bit tricky. 188 * 189 * We have to beware of wrapping ints. 190 * We use the fact that any element added to the queue must be added with a 191 * positive time. That means that any element `to' on the queue cannot be 192 * scheduled to timeout further in time than INT_MAX, but to->to_time can 193 * be positive or negative so comparing it with anything is dangerous. 194 * The only way we can use the to->to_time value in any predictable way 195 * is when we calculate how far in the future `to' will timeout - 196 * "to->to_time - ticks". The result will always be positive for future 197 * timeouts and 0 or negative for due timeouts. 198 */ 199 200 void 201 timeout_startup(void) 202 { 203 int b; 204 205 CIRCQ_INIT(&timeout_todo); 206 CIRCQ_INIT(&timeout_proc); 207 for (b = 0; b < nitems(timeout_wheel); b++) 208 CIRCQ_INIT(&timeout_wheel[b]); 209 } 210 211 void 212 timeout_proc_init(void) 213 { 214 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 215 if (softclock_si == NULL) 216 panic("%s: unable to register softclock interrupt", __func__); 217 218 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 219 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 220 221 kthread_create_deferred(softclock_create_thread, NULL); 222 } 223 224 void 225 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 226 { 227 timeout_set_flags(new, fn, arg, 0); 228 } 229 230 void 231 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) 232 { 233 to->to_func = fn; 234 to->to_arg = arg; 235 to->to_flags = flags | TIMEOUT_INITIALIZED; 236 } 237 238 void 239 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 240 { 241 timeout_set_flags(new, fn, arg, TIMEOUT_PROC); 242 } 243 244 int 245 timeout_add(struct timeout *new, int to_ticks) 246 { 247 int old_time; 248 int ret = 1; 249 250 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 251 KASSERT(to_ticks >= 0); 252 253 mtx_enter(&timeout_mutex); 254 255 /* Initialize the time here, it won't change. */ 256 old_time = new->to_time; 257 new->to_time = to_ticks + ticks; 258 CLR(new->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 259 260 /* 261 * If this timeout already is scheduled and now is moved 262 * earlier, reschedule it now. Otherwise leave it in place 263 * and let it be rescheduled later. 264 */ 265 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 266 if (new->to_time - ticks < old_time - ticks) { 267 CIRCQ_REMOVE(&new->to_list); 268 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 269 } 270 tostat.tos_readded++; 271 ret = 0; 272 } else { 273 SET(new->to_flags, TIMEOUT_ONQUEUE); 274 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 275 } 276 tostat.tos_added++; 277 mtx_leave(&timeout_mutex); 278 279 return ret; 280 } 281 282 int 283 timeout_add_tv(struct timeout *to, const struct timeval *tv) 284 { 285 uint64_t to_ticks; 286 287 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 288 if (to_ticks > INT_MAX) 289 to_ticks = INT_MAX; 290 if (to_ticks == 0 && tv->tv_usec > 0) 291 to_ticks = 1; 292 293 return timeout_add(to, (int)to_ticks); 294 } 295 296 int 297 timeout_add_ts(struct timeout *to, const struct timespec *ts) 298 { 299 uint64_t to_ticks; 300 301 to_ticks = (uint64_t)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 302 if (to_ticks > INT_MAX) 303 to_ticks = INT_MAX; 304 if (to_ticks == 0 && ts->tv_nsec > 0) 305 to_ticks = 1; 306 307 return timeout_add(to, (int)to_ticks); 308 } 309 310 int 311 timeout_add_bt(struct timeout *to, const struct bintime *bt) 312 { 313 uint64_t to_ticks; 314 315 to_ticks = (uint64_t)hz * bt->sec + (long)(((uint64_t)1000000 * 316 (uint32_t)(bt->frac >> 32)) >> 32) / tick; 317 if (to_ticks > INT_MAX) 318 to_ticks = INT_MAX; 319 if (to_ticks == 0 && bt->frac > 0) 320 to_ticks = 1; 321 322 return timeout_add(to, (int)to_ticks); 323 } 324 325 int 326 timeout_add_sec(struct timeout *to, int secs) 327 { 328 uint64_t to_ticks; 329 330 to_ticks = (uint64_t)hz * secs; 331 if (to_ticks > INT_MAX) 332 to_ticks = INT_MAX; 333 334 return timeout_add(to, (int)to_ticks); 335 } 336 337 int 338 timeout_add_msec(struct timeout *to, int msecs) 339 { 340 uint64_t to_ticks; 341 342 to_ticks = (uint64_t)msecs * 1000 / tick; 343 if (to_ticks > INT_MAX) 344 to_ticks = INT_MAX; 345 if (to_ticks == 0 && msecs > 0) 346 to_ticks = 1; 347 348 return timeout_add(to, (int)to_ticks); 349 } 350 351 int 352 timeout_add_usec(struct timeout *to, int usecs) 353 { 354 int to_ticks = usecs / tick; 355 356 if (to_ticks == 0 && usecs > 0) 357 to_ticks = 1; 358 359 return timeout_add(to, to_ticks); 360 } 361 362 int 363 timeout_add_nsec(struct timeout *to, int nsecs) 364 { 365 int to_ticks = nsecs / (tick * 1000); 366 367 if (to_ticks == 0 && nsecs > 0) 368 to_ticks = 1; 369 370 return timeout_add(to, to_ticks); 371 } 372 373 int 374 timeout_del(struct timeout *to) 375 { 376 int ret = 0; 377 378 mtx_enter(&timeout_mutex); 379 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 380 CIRCQ_REMOVE(&to->to_list); 381 CLR(to->to_flags, TIMEOUT_ONQUEUE); 382 tostat.tos_cancelled++; 383 ret = 1; 384 } 385 CLR(to->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 386 tostat.tos_deleted++; 387 mtx_leave(&timeout_mutex); 388 389 return ret; 390 } 391 392 int 393 timeout_del_barrier(struct timeout *to) 394 { 395 int removed; 396 397 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 398 399 removed = timeout_del(to); 400 if (!removed) 401 timeout_barrier(to); 402 403 return removed; 404 } 405 406 void 407 timeout_barrier(struct timeout *to) 408 { 409 int needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 410 411 timeout_sync_order(needsproc); 412 413 if (!needsproc) { 414 KERNEL_LOCK(); 415 splx(splsoftclock()); 416 KERNEL_UNLOCK(); 417 } else { 418 struct cond c = COND_INITIALIZER(); 419 struct timeout barrier; 420 421 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 422 423 mtx_enter(&timeout_mutex); 424 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 425 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 426 mtx_leave(&timeout_mutex); 427 428 wakeup_one(&timeout_proc); 429 430 cond_wait(&c, "tmobar"); 431 } 432 } 433 434 void 435 timeout_proc_barrier(void *arg) 436 { 437 struct cond *c = arg; 438 439 cond_signal(c); 440 } 441 442 /* 443 * This is called from hardclock() on the primary CPU at the start of 444 * every tick. 445 */ 446 void 447 timeout_hardclock_update(void) 448 { 449 int need_softclock; 450 451 mtx_enter(&timeout_mutex); 452 453 MOVEBUCKET(0, ticks); 454 if (MASKWHEEL(0, ticks) == 0) { 455 MOVEBUCKET(1, ticks); 456 if (MASKWHEEL(1, ticks) == 0) { 457 MOVEBUCKET(2, ticks); 458 if (MASKWHEEL(2, ticks) == 0) 459 MOVEBUCKET(3, ticks); 460 } 461 } 462 need_softclock = !CIRCQ_EMPTY(&timeout_todo); 463 464 mtx_leave(&timeout_mutex); 465 466 if (need_softclock) 467 softintr_schedule(softclock_si); 468 } 469 470 void 471 timeout_run(struct timeout *to) 472 { 473 void (*fn)(void *); 474 void *arg; 475 int needsproc; 476 477 MUTEX_ASSERT_LOCKED(&timeout_mutex); 478 479 CLR(to->to_flags, TIMEOUT_ONQUEUE | TIMEOUT_SCHEDULED); 480 SET(to->to_flags, TIMEOUT_TRIGGERED); 481 482 fn = to->to_func; 483 arg = to->to_arg; 484 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 485 486 mtx_leave(&timeout_mutex); 487 timeout_sync_enter(needsproc); 488 fn(arg); 489 timeout_sync_leave(needsproc); 490 mtx_enter(&timeout_mutex); 491 } 492 493 /* 494 * Timeouts are processed here instead of timeout_hardclock_update() 495 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 496 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 497 * so the system remains responsive even if there is a surge of timeouts. 498 */ 499 void 500 softclock(void *arg) 501 { 502 struct circq *bucket; 503 struct timeout *to; 504 int delta, needsproc; 505 506 mtx_enter(&timeout_mutex); 507 while (!CIRCQ_EMPTY(&timeout_todo)) { 508 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 509 CIRCQ_REMOVE(&to->to_list); 510 511 /* 512 * If due run it or defer execution to the thread, 513 * otherwise insert it into the right bucket. 514 */ 515 delta = to->to_time - ticks; 516 if (delta > 0) { 517 bucket = &BUCKET(delta, to->to_time); 518 CIRCQ_INSERT_TAIL(bucket, &to->to_list); 519 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED)) 520 tostat.tos_rescheduled++; 521 else 522 SET(to->to_flags, TIMEOUT_SCHEDULED); 523 tostat.tos_scheduled++; 524 continue; 525 } 526 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED) && delta < 0) 527 tostat.tos_late++; 528 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 529 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 530 continue; 531 } 532 timeout_run(to); 533 tostat.tos_run_softclock++; 534 } 535 tostat.tos_softclocks++; 536 needsproc = !CIRCQ_EMPTY(&timeout_proc); 537 mtx_leave(&timeout_mutex); 538 539 if (needsproc) 540 wakeup(&timeout_proc); 541 } 542 543 void 544 softclock_create_thread(void *arg) 545 { 546 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 547 panic("fork softclock"); 548 } 549 550 void 551 softclock_thread(void *arg) 552 { 553 CPU_INFO_ITERATOR cii; 554 struct cpu_info *ci; 555 struct sleep_state sls; 556 struct timeout *to; 557 int s; 558 559 KERNEL_ASSERT_LOCKED(); 560 561 /* Be conservative for the moment */ 562 CPU_INFO_FOREACH(cii, ci) { 563 if (CPU_IS_PRIMARY(ci)) 564 break; 565 } 566 KASSERT(ci != NULL); 567 sched_peg_curproc(ci); 568 569 s = splsoftclock(); 570 for (;;) { 571 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 572 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 573 574 mtx_enter(&timeout_mutex); 575 while (!CIRCQ_EMPTY(&timeout_proc)) { 576 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 577 CIRCQ_REMOVE(&to->to_list); 578 timeout_run(to); 579 tostat.tos_run_thread++; 580 } 581 tostat.tos_thread_wakeups++; 582 mtx_leave(&timeout_mutex); 583 } 584 splx(s); 585 } 586 587 #ifndef SMALL_KERNEL 588 void 589 timeout_adjust_ticks(int adj) 590 { 591 struct timeout *to; 592 struct circq *p; 593 int new_ticks, b; 594 595 /* adjusting the monotonic clock backwards would be a Bad Thing */ 596 if (adj <= 0) 597 return; 598 599 mtx_enter(&timeout_mutex); 600 new_ticks = ticks + adj; 601 for (b = 0; b < nitems(timeout_wheel); b++) { 602 p = CIRCQ_FIRST(&timeout_wheel[b]); 603 while (p != &timeout_wheel[b]) { 604 to = timeout_from_circq(p); 605 p = CIRCQ_FIRST(p); 606 607 /* when moving a timeout forward need to reinsert it */ 608 if (to->to_time - ticks < adj) 609 to->to_time = new_ticks; 610 CIRCQ_REMOVE(&to->to_list); 611 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 612 } 613 } 614 ticks = new_ticks; 615 mtx_leave(&timeout_mutex); 616 } 617 #endif 618 619 int 620 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 621 { 622 struct timeoutstat status; 623 624 mtx_enter(&timeout_mutex); 625 memcpy(&status, &tostat, sizeof(status)); 626 mtx_leave(&timeout_mutex); 627 628 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 629 } 630 631 #ifdef DDB 632 void db_show_callout_bucket(struct circq *); 633 634 void 635 db_show_callout_bucket(struct circq *bucket) 636 { 637 char buf[8]; 638 struct timeout *to; 639 struct circq *p; 640 db_expr_t offset; 641 char *name, *where; 642 int width = sizeof(long) * 2; 643 644 CIRCQ_FOREACH(p, bucket) { 645 to = timeout_from_circq(p); 646 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 647 name = name ? name : "?"; 648 if (bucket == &timeout_todo) 649 where = "softint"; 650 else if (bucket == &timeout_proc) 651 where = "thread"; 652 else { 653 snprintf(buf, sizeof(buf), "%3ld/%1ld", 654 (bucket - timeout_wheel) % WHEELSIZE, 655 (bucket - timeout_wheel) / WHEELSIZE); 656 where = buf; 657 } 658 db_printf("%9d %7s 0x%0*lx %s\n", 659 to->to_time - ticks, where, width, (ulong)to->to_arg, name); 660 } 661 } 662 663 void 664 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 665 { 666 int width = sizeof(long) * 2 + 2; 667 int b; 668 669 db_printf("ticks now: %d\n", ticks); 670 db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); 671 672 db_show_callout_bucket(&timeout_todo); 673 db_show_callout_bucket(&timeout_proc); 674 for (b = 0; b < nitems(timeout_wheel); b++) 675 db_show_callout_bucket(&timeout_wheel[b]); 676 } 677 #endif 678