1 /* $OpenBSD: kern_timeout.c,v 1.67 2019/12/25 00:15:36 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/timeout.h> 32 #include <sys/mutex.h> 33 #include <sys/kernel.h> 34 #include <sys/queue.h> /* _Q_INVALIDATE */ 35 #include <sys/sysctl.h> 36 #include <sys/witness.h> 37 38 #ifdef DDB 39 #include <machine/db_machdep.h> 40 #include <ddb/db_interface.h> 41 #include <ddb/db_sym.h> 42 #include <ddb/db_output.h> 43 #endif 44 45 /* 46 * Locks used to protect global variables in this file: 47 * 48 * I immutable after initialization 49 * t timeout_mutex 50 */ 51 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 52 53 void *softclock_si; /* [I] softclock() interrupt handle */ 54 struct timeoutstat tostat; /* [t] statistics and totals */ 55 56 /* 57 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 58 * of the global variable "ticks" when the timeout should be called. There are 59 * four levels with 256 buckets each. 60 */ 61 #define BUCKETS 1024 62 #define WHEELSIZE 256 63 #define WHEELMASK 255 64 #define WHEELBITS 8 65 66 struct circq timeout_wheel[BUCKETS]; /* [t] Queues of timeouts */ 67 struct circq timeout_todo; /* [t] Due or needs scheduling */ 68 struct circq timeout_proc; /* [t] Due + needs process context */ 69 70 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 71 72 #define BUCKET(rel, abs) \ 73 (timeout_wheel[ \ 74 ((rel) <= (1 << (2*WHEELBITS))) \ 75 ? ((rel) <= (1 << WHEELBITS)) \ 76 ? MASKWHEEL(0, (abs)) \ 77 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 78 : ((rel) <= (1 << (3*WHEELBITS))) \ 79 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 80 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 81 82 #define MOVEBUCKET(wheel, time) \ 83 CIRCQ_CONCAT(&timeout_todo, \ 84 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 85 86 /* 87 * Circular queue definitions. 88 */ 89 90 #define CIRCQ_INIT(elem) do { \ 91 (elem)->next = (elem); \ 92 (elem)->prev = (elem); \ 93 } while (0) 94 95 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 96 (elem)->prev = (list)->prev; \ 97 (elem)->next = (list); \ 98 (list)->prev->next = (elem); \ 99 (list)->prev = (elem); \ 100 tostat.tos_pending++; \ 101 } while (0) 102 103 #define CIRCQ_CONCAT(fst, snd) do { \ 104 if (!CIRCQ_EMPTY(snd)) { \ 105 (fst)->prev->next = (snd)->next;\ 106 (snd)->next->prev = (fst)->prev;\ 107 (snd)->prev->next = (fst); \ 108 (fst)->prev = (snd)->prev; \ 109 CIRCQ_INIT(snd); \ 110 } \ 111 } while (0) 112 113 #define CIRCQ_REMOVE(elem) do { \ 114 (elem)->next->prev = (elem)->prev; \ 115 (elem)->prev->next = (elem)->next; \ 116 _Q_INVALIDATE((elem)->prev); \ 117 _Q_INVALIDATE((elem)->next); \ 118 tostat.tos_pending--; \ 119 } while (0) 120 121 #define CIRCQ_FIRST(elem) ((elem)->next) 122 123 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 124 125 #define CIRCQ_FOREACH(elem, list) \ 126 for ((elem) = CIRCQ_FIRST(list); \ 127 (elem) != (list); \ 128 (elem) = CIRCQ_FIRST(elem)) 129 130 #ifdef WITNESS 131 struct lock_object timeout_sleeplock_obj = { 132 .lo_name = "timeout", 133 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 134 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 135 }; 136 struct lock_object timeout_spinlock_obj = { 137 .lo_name = "timeout", 138 .lo_flags = LO_WITNESS | LO_INITIALIZED | 139 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 140 }; 141 struct lock_type timeout_sleeplock_type = { 142 .lt_name = "timeout" 143 }; 144 struct lock_type timeout_spinlock_type = { 145 .lt_name = "timeout" 146 }; 147 #define TIMEOUT_LOCK_OBJ(needsproc) \ 148 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 149 #endif 150 151 void softclock(void *); 152 void softclock_create_thread(void *); 153 void softclock_thread(void *); 154 void timeout_proc_barrier(void *); 155 156 /* 157 * The first thing in a struct timeout is its struct circq, so we 158 * can get back from a pointer to the latter to a pointer to the 159 * whole timeout with just a cast. 160 */ 161 static inline struct timeout * 162 timeout_from_circq(struct circq *p) 163 { 164 return ((struct timeout *)(p)); 165 } 166 167 static inline void 168 timeout_sync_order(int needsproc) 169 { 170 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 171 } 172 173 static inline void 174 timeout_sync_enter(int needsproc) 175 { 176 timeout_sync_order(needsproc); 177 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 178 } 179 180 static inline void 181 timeout_sync_leave(int needsproc) 182 { 183 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 184 } 185 186 /* 187 * Some of the "math" in here is a bit tricky. 188 * 189 * We have to beware of wrapping ints. 190 * We use the fact that any element added to the queue must be added with a 191 * positive time. That means that any element `to' on the queue cannot be 192 * scheduled to timeout further in time than INT_MAX, but to->to_time can 193 * be positive or negative so comparing it with anything is dangerous. 194 * The only way we can use the to->to_time value in any predictable way 195 * is when we calculate how far in the future `to' will timeout - 196 * "to->to_time - ticks". The result will always be positive for future 197 * timeouts and 0 or negative for due timeouts. 198 */ 199 200 void 201 timeout_startup(void) 202 { 203 int b; 204 205 CIRCQ_INIT(&timeout_todo); 206 CIRCQ_INIT(&timeout_proc); 207 for (b = 0; b < nitems(timeout_wheel); b++) 208 CIRCQ_INIT(&timeout_wheel[b]); 209 } 210 211 void 212 timeout_proc_init(void) 213 { 214 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 215 if (softclock_si == NULL) 216 panic("%s: unable to register softclock interrupt", __func__); 217 218 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 219 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 220 221 kthread_create_deferred(softclock_create_thread, NULL); 222 } 223 224 void 225 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 226 { 227 new->to_func = fn; 228 new->to_arg = arg; 229 new->to_flags = TIMEOUT_INITIALIZED; 230 } 231 232 void 233 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 234 { 235 timeout_set(new, fn, arg); 236 SET(new->to_flags, TIMEOUT_NEEDPROCCTX); 237 } 238 239 int 240 timeout_add(struct timeout *new, int to_ticks) 241 { 242 int old_time; 243 int ret = 1; 244 245 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 246 KASSERT(to_ticks >= 0); 247 248 mtx_enter(&timeout_mutex); 249 250 /* Initialize the time here, it won't change. */ 251 old_time = new->to_time; 252 new->to_time = to_ticks + ticks; 253 CLR(new->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 254 255 /* 256 * If this timeout already is scheduled and now is moved 257 * earlier, reschedule it now. Otherwise leave it in place 258 * and let it be rescheduled later. 259 */ 260 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 261 if (new->to_time - ticks < old_time - ticks) { 262 CIRCQ_REMOVE(&new->to_list); 263 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 264 } 265 tostat.tos_readded++; 266 ret = 0; 267 } else { 268 SET(new->to_flags, TIMEOUT_ONQUEUE); 269 CIRCQ_INSERT_TAIL(&timeout_todo, &new->to_list); 270 } 271 tostat.tos_added++; 272 mtx_leave(&timeout_mutex); 273 274 return ret; 275 } 276 277 int 278 timeout_add_tv(struct timeout *to, const struct timeval *tv) 279 { 280 uint64_t to_ticks; 281 282 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 283 if (to_ticks > INT_MAX) 284 to_ticks = INT_MAX; 285 if (to_ticks == 0 && tv->tv_usec > 0) 286 to_ticks = 1; 287 288 return timeout_add(to, (int)to_ticks); 289 } 290 291 int 292 timeout_add_ts(struct timeout *to, const struct timespec *ts) 293 { 294 uint64_t to_ticks; 295 296 to_ticks = (uint64_t)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 297 if (to_ticks > INT_MAX) 298 to_ticks = INT_MAX; 299 if (to_ticks == 0 && ts->tv_nsec > 0) 300 to_ticks = 1; 301 302 return timeout_add(to, (int)to_ticks); 303 } 304 305 int 306 timeout_add_bt(struct timeout *to, const struct bintime *bt) 307 { 308 uint64_t to_ticks; 309 310 to_ticks = (uint64_t)hz * bt->sec + (long)(((uint64_t)1000000 * 311 (uint32_t)(bt->frac >> 32)) >> 32) / tick; 312 if (to_ticks > INT_MAX) 313 to_ticks = INT_MAX; 314 if (to_ticks == 0 && bt->frac > 0) 315 to_ticks = 1; 316 317 return timeout_add(to, (int)to_ticks); 318 } 319 320 int 321 timeout_add_sec(struct timeout *to, int secs) 322 { 323 uint64_t to_ticks; 324 325 to_ticks = (uint64_t)hz * secs; 326 if (to_ticks > INT_MAX) 327 to_ticks = INT_MAX; 328 329 return timeout_add(to, (int)to_ticks); 330 } 331 332 int 333 timeout_add_msec(struct timeout *to, int msecs) 334 { 335 uint64_t to_ticks; 336 337 to_ticks = (uint64_t)msecs * 1000 / tick; 338 if (to_ticks > INT_MAX) 339 to_ticks = INT_MAX; 340 if (to_ticks == 0 && msecs > 0) 341 to_ticks = 1; 342 343 return timeout_add(to, (int)to_ticks); 344 } 345 346 int 347 timeout_add_usec(struct timeout *to, int usecs) 348 { 349 int to_ticks = usecs / tick; 350 351 if (to_ticks == 0 && usecs > 0) 352 to_ticks = 1; 353 354 return timeout_add(to, to_ticks); 355 } 356 357 int 358 timeout_add_nsec(struct timeout *to, int nsecs) 359 { 360 int to_ticks = nsecs / (tick * 1000); 361 362 if (to_ticks == 0 && nsecs > 0) 363 to_ticks = 1; 364 365 return timeout_add(to, to_ticks); 366 } 367 368 int 369 timeout_del(struct timeout *to) 370 { 371 int ret = 0; 372 373 mtx_enter(&timeout_mutex); 374 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 375 CIRCQ_REMOVE(&to->to_list); 376 CLR(to->to_flags, TIMEOUT_ONQUEUE); 377 tostat.tos_cancelled++; 378 ret = 1; 379 } 380 CLR(to->to_flags, TIMEOUT_TRIGGERED | TIMEOUT_SCHEDULED); 381 tostat.tos_deleted++; 382 mtx_leave(&timeout_mutex); 383 384 return ret; 385 } 386 387 int 388 timeout_del_barrier(struct timeout *to) 389 { 390 int removed; 391 392 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX)); 393 394 removed = timeout_del(to); 395 if (!removed) 396 timeout_barrier(to); 397 398 return removed; 399 } 400 401 void 402 timeout_barrier(struct timeout *to) 403 { 404 int needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX); 405 406 timeout_sync_order(needsproc); 407 408 if (!needsproc) { 409 KERNEL_LOCK(); 410 splx(splsoftclock()); 411 KERNEL_UNLOCK(); 412 } else { 413 struct cond c = COND_INITIALIZER(); 414 struct timeout barrier; 415 416 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 417 418 mtx_enter(&timeout_mutex); 419 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 420 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 421 mtx_leave(&timeout_mutex); 422 423 wakeup_one(&timeout_proc); 424 425 cond_wait(&c, "tmobar"); 426 } 427 } 428 429 void 430 timeout_proc_barrier(void *arg) 431 { 432 struct cond *c = arg; 433 434 cond_signal(c); 435 } 436 437 /* 438 * This is called from hardclock() on the primary CPU at the start of 439 * every tick. 440 */ 441 void 442 timeout_hardclock_update(void) 443 { 444 int need_softclock; 445 446 mtx_enter(&timeout_mutex); 447 448 MOVEBUCKET(0, ticks); 449 if (MASKWHEEL(0, ticks) == 0) { 450 MOVEBUCKET(1, ticks); 451 if (MASKWHEEL(1, ticks) == 0) { 452 MOVEBUCKET(2, ticks); 453 if (MASKWHEEL(2, ticks) == 0) 454 MOVEBUCKET(3, ticks); 455 } 456 } 457 need_softclock = !CIRCQ_EMPTY(&timeout_todo); 458 459 mtx_leave(&timeout_mutex); 460 461 if (need_softclock) 462 softintr_schedule(softclock_si); 463 } 464 465 void 466 timeout_run(struct timeout *to) 467 { 468 void (*fn)(void *); 469 void *arg; 470 int needsproc; 471 472 MUTEX_ASSERT_LOCKED(&timeout_mutex); 473 474 CLR(to->to_flags, TIMEOUT_ONQUEUE | TIMEOUT_SCHEDULED); 475 SET(to->to_flags, TIMEOUT_TRIGGERED); 476 477 fn = to->to_func; 478 arg = to->to_arg; 479 needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX); 480 481 mtx_leave(&timeout_mutex); 482 timeout_sync_enter(needsproc); 483 fn(arg); 484 timeout_sync_leave(needsproc); 485 mtx_enter(&timeout_mutex); 486 } 487 488 /* 489 * Timeouts are processed here instead of timeout_hardclock_update() 490 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 491 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 492 * so the system remains responsive even if there is a surge of timeouts. 493 */ 494 void 495 softclock(void *arg) 496 { 497 int delta; 498 struct circq *bucket; 499 struct timeout *to; 500 int needsproc = 0; 501 502 mtx_enter(&timeout_mutex); 503 while (!CIRCQ_EMPTY(&timeout_todo)) { 504 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 505 CIRCQ_REMOVE(&to->to_list); 506 507 /* 508 * If due run it or defer execution to the thread, 509 * otherwise insert it into the right bucket. 510 */ 511 delta = to->to_time - ticks; 512 if (delta > 0) { 513 bucket = &BUCKET(delta, to->to_time); 514 CIRCQ_INSERT_TAIL(bucket, &to->to_list); 515 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED)) 516 tostat.tos_rescheduled++; 517 else 518 SET(to->to_flags, TIMEOUT_SCHEDULED); 519 tostat.tos_scheduled++; 520 continue; 521 } 522 if (ISSET(to->to_flags, TIMEOUT_SCHEDULED) && delta < 0) 523 tostat.tos_late++; 524 if (ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX)) { 525 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 526 needsproc = 1; 527 continue; 528 } 529 timeout_run(to); 530 tostat.tos_run_softclock++; 531 } 532 tostat.tos_softclocks++; 533 mtx_leave(&timeout_mutex); 534 535 if (needsproc) 536 wakeup(&timeout_proc); 537 } 538 539 void 540 softclock_create_thread(void *arg) 541 { 542 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 543 panic("fork softclock"); 544 } 545 546 void 547 softclock_thread(void *arg) 548 { 549 CPU_INFO_ITERATOR cii; 550 struct cpu_info *ci; 551 struct sleep_state sls; 552 struct timeout *to; 553 554 KERNEL_ASSERT_LOCKED(); 555 556 /* Be conservative for the moment */ 557 CPU_INFO_FOREACH(cii, ci) { 558 if (CPU_IS_PRIMARY(ci)) 559 break; 560 } 561 KASSERT(ci != NULL); 562 sched_peg_curproc(ci); 563 564 for (;;) { 565 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 566 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 567 568 mtx_enter(&timeout_mutex); 569 while (!CIRCQ_EMPTY(&timeout_proc)) { 570 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 571 CIRCQ_REMOVE(&to->to_list); 572 timeout_run(to); 573 tostat.tos_run_thread++; 574 } 575 tostat.tos_thread_wakeups++; 576 mtx_leave(&timeout_mutex); 577 } 578 } 579 580 #ifndef SMALL_KERNEL 581 void 582 timeout_adjust_ticks(int adj) 583 { 584 struct timeout *to; 585 struct circq *p; 586 int new_ticks, b; 587 588 /* adjusting the monotonic clock backwards would be a Bad Thing */ 589 if (adj <= 0) 590 return; 591 592 mtx_enter(&timeout_mutex); 593 new_ticks = ticks + adj; 594 for (b = 0; b < nitems(timeout_wheel); b++) { 595 p = CIRCQ_FIRST(&timeout_wheel[b]); 596 while (p != &timeout_wheel[b]) { 597 to = timeout_from_circq(p); 598 p = CIRCQ_FIRST(p); 599 600 /* when moving a timeout forward need to reinsert it */ 601 if (to->to_time - ticks < adj) 602 to->to_time = new_ticks; 603 CIRCQ_REMOVE(&to->to_list); 604 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 605 } 606 } 607 ticks = new_ticks; 608 mtx_leave(&timeout_mutex); 609 } 610 #endif 611 612 int 613 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 614 { 615 struct timeoutstat status; 616 617 mtx_enter(&timeout_mutex); 618 memcpy(&status, &tostat, sizeof(status)); 619 mtx_leave(&timeout_mutex); 620 621 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 622 } 623 624 #ifdef DDB 625 void db_show_callout_bucket(struct circq *); 626 627 void 628 db_show_callout_bucket(struct circq *bucket) 629 { 630 char buf[8]; 631 struct timeout *to; 632 struct circq *p; 633 db_expr_t offset; 634 char *name, *where; 635 int width = sizeof(long) * 2; 636 637 CIRCQ_FOREACH(p, bucket) { 638 to = timeout_from_circq(p); 639 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 640 name = name ? name : "?"; 641 if (bucket == &timeout_todo) 642 where = "softint"; 643 else if (bucket == &timeout_proc) 644 where = "thread"; 645 else { 646 snprintf(buf, sizeof(buf), "%3ld/%1ld", 647 (bucket - timeout_wheel) % WHEELSIZE, 648 (bucket - timeout_wheel) / WHEELSIZE); 649 where = buf; 650 } 651 db_printf("%9d %7s 0x%0*lx %s\n", 652 to->to_time - ticks, where, width, (ulong)to->to_arg, name); 653 } 654 } 655 656 void 657 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 658 { 659 int width = sizeof(long) * 2 + 2; 660 int b; 661 662 db_printf("ticks now: %d\n", ticks); 663 db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); 664 665 db_show_callout_bucket(&timeout_todo); 666 db_show_callout_bucket(&timeout_proc); 667 for (b = 0; b < nitems(timeout_wheel); b++) 668 db_show_callout_bucket(&timeout_wheel[b]); 669 } 670 #endif 671