1 /* $OpenBSD: kern_timeout.c,v 1.72 2020/02/18 12:13:40 mpi Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 /* 47 * Locks used to protect global variables in this file: 48 * 49 * I immutable after initialization 50 * t timeout_mutex 51 */ 52 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 53 54 void *softclock_si; /* [I] softclock() interrupt handle */ 55 struct timeoutstat tostat; /* [t] statistics and totals */ 56 57 /* 58 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 59 * of the global variable "ticks" when the timeout should be called. There are 60 * four levels with 256 buckets each. 61 */ 62 #define BUCKETS 1024 63 #define WHEELSIZE 256 64 #define WHEELMASK 255 65 #define WHEELBITS 8 66 67 struct circq timeout_wheel[BUCKETS]; /* [t] Queues of timeouts */ 68 struct circq timeout_todo; /* [t] Due or needs scheduling */ 69 struct circq timeout_proc; /* [t] Due + needs process context */ 70 71 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 72 73 #define BUCKET(rel, abs) \ 74 (timeout_wheel[ \ 75 ((rel) <= (1 << (2*WHEELBITS))) \ 76 ? ((rel) <= (1 << WHEELBITS)) \ 77 ? MASKWHEEL(0, (abs)) \ 78 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 79 : ((rel) <= (1 << (3*WHEELBITS))) \ 80 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 81 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 82 83 #define MOVEBUCKET(wheel, time) \ 84 CIRCQ_CONCAT(&timeout_todo, \ 85 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 86 87 /* 88 * Circular queue definitions. 89 */ 90 91 #define CIRCQ_INIT(elem) do { \ 92 (elem)->next = (elem); \ 93 (elem)->prev = (elem); \ 94 } while (0) 95 96 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 97 (elem)->prev = (list)->prev; \ 98 (elem)->next = (list); \ 99 (list)->prev->next = (elem); \ 100 (list)->prev = (elem); \ 101 tostat.tos_pending++; \ 102 } while (0) 103 104 #define CIRCQ_CONCAT(fst, snd) do { \ 105 if (!CIRCQ_EMPTY(snd)) { \ 106 (fst)->prev->next = (snd)->next;\ 107 (snd)->next->prev = (fst)->prev;\ 108 (snd)->prev->next = (fst); \ 109 (fst)->prev = (snd)->prev; \ 110 CIRCQ_INIT(snd); \ 111 } \ 112 } while (0) 113 114 #define CIRCQ_REMOVE(elem) do { \ 115 (elem)->next->prev = (elem)->prev; \ 116 (elem)->prev->next = (elem)->next; \ 117 _Q_INVALIDATE((elem)->prev); \ 118 _Q_INVALIDATE((elem)->next); \ 119 tostat.tos_pending--; \ 120 } while (0) 121 122 #define CIRCQ_FIRST(elem) ((elem)->next) 123 124 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 125 126 #define CIRCQ_FOREACH(elem, list) \ 127 for ((elem) = CIRCQ_FIRST(list); \ 128 (elem) != (list); \ 129 (elem) = CIRCQ_FIRST(elem)) 130 131 #ifdef WITNESS 132 struct lock_object timeout_sleeplock_obj = { 133 .lo_name = "timeout", 134 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 135 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 136 }; 137 struct lock_object timeout_spinlock_obj = { 138 .lo_name = "timeout", 139 .lo_flags = LO_WITNESS | LO_INITIALIZED | 140 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 141 }; 142 struct lock_type timeout_sleeplock_type = { 143 .lt_name = "timeout" 144 }; 145 struct lock_type timeout_spinlock_type = { 146 .lt_name = "timeout" 147 }; 148 #define TIMEOUT_LOCK_OBJ(needsproc) \ 149 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 150 #endif 151 152 void softclock(void *); 153 void softclock_create_thread(void *); 154 void softclock_thread(void *); 155 void timeout_proc_barrier(void *); 156 157 /* 158 * The first thing in a struct timeout is its struct circq, so we 159 * can get back from a pointer to the latter to a pointer to the 160 * whole timeout with just a cast. 161 */ 162 static inline struct timeout * 163 timeout_from_circq(struct circq *p) 164 { 165 return ((struct timeout *)(p)); 166 } 167 168 static inline void 169 timeout_sync_order(int needsproc) 170 { 171 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 172 } 173 174 static inline void 175 timeout_sync_enter(int needsproc) 176 { 177 timeout_sync_order(needsproc); 178 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 179 } 180 181 static inline void 182 timeout_sync_leave(int needsproc) 183 { 184 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 185 } 186 187 /* 188 * Some of the "math" in here is a bit tricky. 189 * 190 * We have to beware of wrapping ints. 191 * We use the fact that any element added to the queue must be added with a 192 * positive time. That means that any element `to' on the queue cannot be 193 * scheduled to timeout further in time than INT_MAX, but to->to_time can 194 * be positive or negative so comparing it with anything is dangerous. 195 * The only way we can use the to->to_time value in any predictable way 196 * is when we calculate how far in the future `to' will timeout - 197 * "to->to_time - ticks". The result will always be positive for future 198 * timeouts and 0 or negative for due timeouts. 199 */ 200 201 void 202 timeout_startup(void) 203 { 204 int b; 205 206 CIRCQ_INIT(&timeout_todo); 207 CIRCQ_INIT(&timeout_proc); 208 for (b = 0; b < nitems(timeout_wheel); b++) 209 CIRCQ_INIT(&timeout_wheel[b]); 210 } 211 212 void 213 timeout_proc_init(void) 214 { 215 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 216 if (softclock_si == NULL) 217 panic("%s: unable to register softclock interrupt", __func__); 218 219 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 220 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 221 222 kthread_create_deferred(softclock_create_thread, NULL); 223 } 224 225 void 226 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 227 { 228 timeout_set_flags(new, fn, arg, 0); 229 } 230 231 void 232 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) 233 { 234 to->to_func = fn; 235 to->to_arg = arg; 236 to->to_flags = flags | TIMEOUT_INITIALIZED; 237 } 238 239 void 240 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 241 { 242 timeout_set_flags(new, fn, arg, TIMEOUT_PROC); 243 } 244 245 int 246 timeout_add(struct timeout *new, int to_ticks) 247 { 248 int old_time; 249 int ret = 1; 250 251 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 252 KASSERT(to_ticks >= 0); 253 254 mtx_enter(&timeout_mutex); 255 256 /* Initialize the time here, it won't change. */ 257 old_time = new->to_time; 258 new->to_time = to_ticks + ticks; 259 CLR(new->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 260 261 /* 262 * If this timeout already is scheduled and now is moved 263 * earlier, reschedule it now. Otherwise leave it in place 264 * and let it be rescheduled later. 265 */ 266 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 267 if (new->to_time - ticks < old_time - ticks) { 268 CIRCQ_REMOVE(&new->to_list); 269 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 270 } 271 tostat.tos_readded++; 272 ret = 0; 273 } else { 274 SET(new->to_flags, TIMEOUT_ONQUEUE); 275 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 276 } 277 tostat.tos_added++; 278 mtx_leave(&timeout_mutex); 279 280 return ret; 281 } 282 283 int 284 timeout_add_tv(struct timeout *to, const struct timeval *tv) 285 { 286 uint64_t to_ticks; 287 288 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 289 if (to_ticks > INT_MAX) 290 to_ticks = INT_MAX; 291 if (to_ticks == 0 && tv->tv_usec > 0) 292 to_ticks = 1; 293 294 return timeout_add(to, (int)to_ticks); 295 } 296 297 int 298 timeout_add_ts(struct timeout *to, const struct timespec *ts) 299 { 300 uint64_t to_ticks; 301 302 to_ticks = (uint64_t)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 303 if (to_ticks > INT_MAX) 304 to_ticks = INT_MAX; 305 if (to_ticks == 0 && ts->tv_nsec > 0) 306 to_ticks = 1; 307 308 return timeout_add(to, (int)to_ticks); 309 } 310 311 int 312 timeout_add_bt(struct timeout *to, const struct bintime *bt) 313 { 314 uint64_t to_ticks; 315 316 to_ticks = (uint64_t)hz * bt->sec + (long)(((uint64_t)1000000 * 317 (uint32_t)(bt->frac >> 32)) >> 32) / tick; 318 if (to_ticks > INT_MAX) 319 to_ticks = INT_MAX; 320 if (to_ticks == 0 && bt->frac > 0) 321 to_ticks = 1; 322 323 return timeout_add(to, (int)to_ticks); 324 } 325 326 int 327 timeout_add_sec(struct timeout *to, int secs) 328 { 329 uint64_t to_ticks; 330 331 to_ticks = (uint64_t)hz * secs; 332 if (to_ticks > INT_MAX) 333 to_ticks = INT_MAX; 334 335 return timeout_add(to, (int)to_ticks); 336 } 337 338 int 339 timeout_add_msec(struct timeout *to, int msecs) 340 { 341 uint64_t to_ticks; 342 343 to_ticks = (uint64_t)msecs * 1000 / tick; 344 if (to_ticks > INT_MAX) 345 to_ticks = INT_MAX; 346 if (to_ticks == 0 && msecs > 0) 347 to_ticks = 1; 348 349 return timeout_add(to, (int)to_ticks); 350 } 351 352 int 353 timeout_add_usec(struct timeout *to, int usecs) 354 { 355 int to_ticks = usecs / tick; 356 357 if (to_ticks == 0 && usecs > 0) 358 to_ticks = 1; 359 360 return timeout_add(to, to_ticks); 361 } 362 363 int 364 timeout_add_nsec(struct timeout *to, int nsecs) 365 { 366 int to_ticks = nsecs / (tick * 1000); 367 368 if (to_ticks == 0 && nsecs > 0) 369 to_ticks = 1; 370 371 return timeout_add(to, to_ticks); 372 } 373 374 int 375 timeout_del(struct timeout *to) 376 { 377 int ret = 0; 378 379 mtx_enter(&timeout_mutex); 380 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 381 CIRCQ_REMOVE(&to->to_list); 382 CLR(to->to_flags, TIMEOUT_ONQUEUE); 383 tostat.tos_cancelled++; 384 ret = 1; 385 } 386 CLR(to->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 387 tostat.tos_deleted++; 388 mtx_leave(&timeout_mutex); 389 390 return ret; 391 } 392 393 int 394 timeout_del_barrier(struct timeout *to) 395 { 396 int removed; 397 398 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 399 400 removed = timeout_del(to); 401 if (!removed) 402 timeout_barrier(to); 403 404 return removed; 405 } 406 407 void 408 timeout_barrier(struct timeout *to) 409 { 410 int needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 411 412 timeout_sync_order(needsproc); 413 414 if (!needsproc) { 415 KERNEL_LOCK(); 416 splx(splsoftclock()); 417 KERNEL_UNLOCK(); 418 } else { 419 struct cond c = COND_INITIALIZER(); 420 struct timeout barrier; 421 422 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 423 424 mtx_enter(&timeout_mutex); 425 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 426 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 427 mtx_leave(&timeout_mutex); 428 429 wakeup_one(&timeout_proc); 430 431 cond_wait(&c, "tmobar"); 432 } 433 } 434 435 void 436 timeout_proc_barrier(void *arg) 437 { 438 struct cond *c = arg; 439 440 cond_signal(c); 441 } 442 443 /* 444 * This is called from hardclock() on the primary CPU at the start of 445 * every tick. 446 */ 447 void 448 timeout_hardclock_update(void) 449 { 450 int need_softclock; 451 452 mtx_enter(&timeout_mutex); 453 454 MOVEBUCKET(0, ticks); 455 if (MASKWHEEL(0, ticks) == 0) { 456 MOVEBUCKET(1, ticks); 457 if (MASKWHEEL(1, ticks) == 0) { 458 MOVEBUCKET(2, ticks); 459 if (MASKWHEEL(2, ticks) == 0) 460 MOVEBUCKET(3, ticks); 461 } 462 } 463 need_softclock = !CIRCQ_EMPTY(&timeout_todo); 464 465 mtx_leave(&timeout_mutex); 466 467 if (need_softclock) 468 softintr_schedule(softclock_si); 469 } 470 471 void 472 timeout_run(struct timeout *to) 473 { 474 void (*fn)(void *); 475 void *arg; 476 int needsproc; 477 478 MUTEX_ASSERT_LOCKED(&timeout_mutex); 479 480 CLR(to->to_flags, TIMEOUT_ONQUEUE | TIMEOUT_SCHEDULED); 481 SET(to->to_flags, TIMEOUT_TRIGGERED); 482 483 fn = to->to_func; 484 arg = to->to_arg; 485 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 486 487 mtx_leave(&timeout_mutex); 488 timeout_sync_enter(needsproc); 489 fn(arg); 490 timeout_sync_leave(needsproc); 491 mtx_enter(&timeout_mutex); 492 } 493 494 /* 495 * Timeouts are processed here instead of timeout_hardclock_update() 496 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 497 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 498 * so the system remains responsive even if there is a surge of timeouts. 499 */ 500 void 501 softclock(void *arg) 502 { 503 struct circq *bucket; 504 struct timeout *to; 505 int delta, needsproc; 506 507 mtx_enter(&timeout_mutex); 508 while (!CIRCQ_EMPTY(&timeout_todo)) { 509 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 510 CIRCQ_REMOVE(&to->to_list); 511 512 /* 513 * If due run it or defer execution to the thread, 514 * otherwise insert it into the right bucket. 515 */ 516 delta = to->to_time - ticks; 517 if (delta > 0) { 518 bucket = &BUCKET(delta, to->to_time); 519 CIRCQ_INSERT_TAIL(bucket, &to->to_list); 520 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED)) 521 tostat.tos_rescheduled++; 522 else 523 SET(to->to_flags, TIMEOUT_SCHEDULED); 524 tostat.tos_scheduled++; 525 continue; 526 } 527 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED) && delta < 0) 528 tostat.tos_late++; 529 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 530 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 531 continue; 532 } 533 timeout_run(to); 534 tostat.tos_run_softclock++; 535 } 536 tostat.tos_softclocks++; 537 needsproc = !CIRCQ_EMPTY(&timeout_proc); 538 mtx_leave(&timeout_mutex); 539 540 if (needsproc) 541 wakeup(&timeout_proc); 542 } 543 544 void 545 softclock_create_thread(void *arg) 546 { 547 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 548 panic("fork softclock"); 549 } 550 551 void 552 softclock_thread(void *arg) 553 { 554 CPU_INFO_ITERATOR cii; 555 struct cpu_info *ci; 556 struct sleep_state sls; 557 struct timeout *to; 558 int s; 559 560 KERNEL_ASSERT_LOCKED(); 561 562 /* Be conservative for the moment */ 563 CPU_INFO_FOREACH(cii, ci) { 564 if (CPU_IS_PRIMARY(ci)) 565 break; 566 } 567 KASSERT(ci != NULL); 568 sched_peg_curproc(ci); 569 570 s = splsoftclock(); 571 for (;;) { 572 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 573 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 574 575 mtx_enter(&timeout_mutex); 576 while (!CIRCQ_EMPTY(&timeout_proc)) { 577 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 578 CIRCQ_REMOVE(&to->to_list); 579 timeout_run(to); 580 tostat.tos_run_thread++; 581 } 582 tostat.tos_thread_wakeups++; 583 mtx_leave(&timeout_mutex); 584 } 585 splx(s); 586 } 587 588 #ifndef SMALL_KERNEL 589 void 590 timeout_adjust_ticks(int adj) 591 { 592 struct timeout *to; 593 struct circq *p; 594 int new_ticks, b; 595 596 /* adjusting the monotonic clock backwards would be a Bad Thing */ 597 if (adj <= 0) 598 return; 599 600 mtx_enter(&timeout_mutex); 601 new_ticks = ticks + adj; 602 for (b = 0; b < nitems(timeout_wheel); b++) { 603 p = CIRCQ_FIRST(&timeout_wheel[b]); 604 while (p != &timeout_wheel[b]) { 605 to = timeout_from_circq(p); 606 p = CIRCQ_FIRST(p); 607 608 /* when moving a timeout forward need to reinsert it */ 609 if (to->to_time - ticks < adj) 610 to->to_time = new_ticks; 611 CIRCQ_REMOVE(&to->to_list); 612 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 613 } 614 } 615 ticks = new_ticks; 616 mtx_leave(&timeout_mutex); 617 } 618 #endif 619 620 int 621 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 622 { 623 struct timeoutstat status; 624 625 mtx_enter(&timeout_mutex); 626 memcpy(&status, &tostat, sizeof(status)); 627 mtx_leave(&timeout_mutex); 628 629 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 630 } 631 632 #ifdef DDB 633 void db_show_callout_bucket(struct circq *); 634 635 void 636 db_show_callout_bucket(struct circq *bucket) 637 { 638 char buf[8]; 639 struct timeout *to; 640 struct circq *p; 641 db_expr_t offset; 642 char *name, *where; 643 int width = sizeof(long) * 2; 644 645 CIRCQ_FOREACH(p, bucket) { 646 to = timeout_from_circq(p); 647 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 648 name = name ? name : "?"; 649 if (bucket == &timeout_todo) 650 where = "softint"; 651 else if (bucket == &timeout_proc) 652 where = "thread"; 653 else { 654 snprintf(buf, sizeof(buf), "%3ld/%1ld", 655 (bucket - timeout_wheel) % WHEELSIZE, 656 (bucket - timeout_wheel) / WHEELSIZE); 657 where = buf; 658 } 659 db_printf("%9d %7s 0x%0*lx %s\n", 660 to->to_time - ticks, where, width, (ulong)to->to_arg, name); 661 } 662 } 663 664 void 665 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 666 { 667 int width = sizeof(long) * 2 + 2; 668 int b; 669 670 db_printf("ticks now: %d\n", ticks); 671 db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); 672 673 db_show_callout_bucket(&timeout_todo); 674 db_show_callout_bucket(&timeout_proc); 675 for (b = 0; b < nitems(timeout_wheel); b++) 676 db_show_callout_bucket(&timeout_wheel[b]); 677 } 678 #endif 679