1 /* $OpenBSD: kern_timeout.c,v 1.80 2020/09/22 13:43:28 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 #include "kcov.h" 47 #if NKCOV > 0 48 #include <sys/kcov.h> 49 #endif 50 51 /* 52 * Locks used to protect global variables in this file: 53 * 54 * I immutable after initialization 55 * T timeout_mutex 56 */ 57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 58 59 void *softclock_si; /* [I] softclock() interrupt handle */ 60 struct timeoutstat tostat; /* [T] statistics and totals */ 61 62 /* 63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 64 * of the global variable "ticks" when the timeout should be called. There are 65 * four levels with 256 buckets each. 66 */ 67 #define BUCKETS 1024 68 #define WHEELSIZE 256 69 #define WHEELMASK 255 70 #define WHEELBITS 8 71 72 struct circq timeout_wheel[BUCKETS]; /* [T] Queues of timeouts */ 73 struct circq timeout_new; /* [T] New, unscheduled timeouts */ 74 struct circq timeout_todo; /* [T] Due or needs rescheduling */ 75 struct circq timeout_proc; /* [T] Due + needs process context */ 76 77 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 78 79 #define BUCKET(rel, abs) \ 80 (timeout_wheel[ \ 81 ((rel) <= (1 << (2*WHEELBITS))) \ 82 ? ((rel) <= (1 << WHEELBITS)) \ 83 ? MASKWHEEL(0, (abs)) \ 84 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 85 : ((rel) <= (1 << (3*WHEELBITS))) \ 86 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 87 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 88 89 #define MOVEBUCKET(wheel, time) \ 90 CIRCQ_CONCAT(&timeout_todo, \ 91 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 92 93 /* 94 * Circular queue definitions. 95 */ 96 97 #define CIRCQ_INIT(elem) do { \ 98 (elem)->next = (elem); \ 99 (elem)->prev = (elem); \ 100 } while (0) 101 102 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 103 (elem)->prev = (list)->prev; \ 104 (elem)->next = (list); \ 105 (list)->prev->next = (elem); \ 106 (list)->prev = (elem); \ 107 tostat.tos_pending++; \ 108 } while (0) 109 110 #define CIRCQ_CONCAT(fst, snd) do { \ 111 if (!CIRCQ_EMPTY(snd)) { \ 112 (fst)->prev->next = (snd)->next;\ 113 (snd)->next->prev = (fst)->prev;\ 114 (snd)->prev->next = (fst); \ 115 (fst)->prev = (snd)->prev; \ 116 CIRCQ_INIT(snd); \ 117 } \ 118 } while (0) 119 120 #define CIRCQ_REMOVE(elem) do { \ 121 (elem)->next->prev = (elem)->prev; \ 122 (elem)->prev->next = (elem)->next; \ 123 _Q_INVALIDATE((elem)->prev); \ 124 _Q_INVALIDATE((elem)->next); \ 125 tostat.tos_pending--; \ 126 } while (0) 127 128 #define CIRCQ_FIRST(elem) ((elem)->next) 129 130 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 131 132 #define CIRCQ_FOREACH(elem, list) \ 133 for ((elem) = CIRCQ_FIRST(list); \ 134 (elem) != (list); \ 135 (elem) = CIRCQ_FIRST(elem)) 136 137 #ifdef WITNESS 138 struct lock_object timeout_sleeplock_obj = { 139 .lo_name = "timeout", 140 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 141 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 142 }; 143 struct lock_object timeout_spinlock_obj = { 144 .lo_name = "timeout", 145 .lo_flags = LO_WITNESS | LO_INITIALIZED | 146 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 147 }; 148 struct lock_type timeout_sleeplock_type = { 149 .lt_name = "timeout" 150 }; 151 struct lock_type timeout_spinlock_type = { 152 .lt_name = "timeout" 153 }; 154 #define TIMEOUT_LOCK_OBJ(needsproc) \ 155 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 156 #endif 157 158 void softclock(void *); 159 void softclock_create_thread(void *); 160 void softclock_thread(void *); 161 void timeout_proc_barrier(void *); 162 163 /* 164 * The first thing in a struct timeout is its struct circq, so we 165 * can get back from a pointer to the latter to a pointer to the 166 * whole timeout with just a cast. 167 */ 168 static inline struct timeout * 169 timeout_from_circq(struct circq *p) 170 { 171 return ((struct timeout *)(p)); 172 } 173 174 static inline void 175 timeout_sync_order(int needsproc) 176 { 177 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 178 } 179 180 static inline void 181 timeout_sync_enter(int needsproc) 182 { 183 timeout_sync_order(needsproc); 184 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 185 } 186 187 static inline void 188 timeout_sync_leave(int needsproc) 189 { 190 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 191 } 192 193 /* 194 * Some of the "math" in here is a bit tricky. 195 * 196 * We have to beware of wrapping ints. 197 * We use the fact that any element added to the queue must be added with a 198 * positive time. That means that any element `to' on the queue cannot be 199 * scheduled to timeout further in time than INT_MAX, but to->to_time can 200 * be positive or negative so comparing it with anything is dangerous. 201 * The only way we can use the to->to_time value in any predictable way 202 * is when we calculate how far in the future `to' will timeout - 203 * "to->to_time - ticks". The result will always be positive for future 204 * timeouts and 0 or negative for due timeouts. 205 */ 206 207 void 208 timeout_startup(void) 209 { 210 int b; 211 212 CIRCQ_INIT(&timeout_new); 213 CIRCQ_INIT(&timeout_todo); 214 CIRCQ_INIT(&timeout_proc); 215 for (b = 0; b < nitems(timeout_wheel); b++) 216 CIRCQ_INIT(&timeout_wheel[b]); 217 } 218 219 void 220 timeout_proc_init(void) 221 { 222 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 223 if (softclock_si == NULL) 224 panic("%s: unable to register softclock interrupt", __func__); 225 226 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 227 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 228 229 kthread_create_deferred(softclock_create_thread, NULL); 230 } 231 232 void 233 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 234 { 235 timeout_set_flags(new, fn, arg, 0); 236 } 237 238 void 239 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) 240 { 241 to->to_func = fn; 242 to->to_arg = arg; 243 to->to_process = NULL; 244 to->to_flags = flags | TIMEOUT_INITIALIZED; 245 } 246 247 void 248 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 249 { 250 timeout_set_flags(new, fn, arg, TIMEOUT_PROC); 251 } 252 253 int 254 timeout_add(struct timeout *new, int to_ticks) 255 { 256 int old_time; 257 int ret = 1; 258 259 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 260 KASSERT(to_ticks >= 0); 261 262 mtx_enter(&timeout_mutex); 263 264 /* Initialize the time here, it won't change. */ 265 old_time = new->to_time; 266 new->to_time = to_ticks + ticks; 267 CLR(new->to_flags, TIMEOUT_TRIGGERED); 268 269 /* 270 * If this timeout already is scheduled and now is moved 271 * earlier, reschedule it now. Otherwise leave it in place 272 * and let it be rescheduled later. 273 */ 274 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 275 if (new->to_time - ticks < old_time - ticks) { 276 CIRCQ_REMOVE(&new->to_list); 277 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 278 } 279 tostat.tos_readded++; 280 ret = 0; 281 } else { 282 SET(new->to_flags, TIMEOUT_ONQUEUE); 283 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 284 } 285 #if NKCOV > 0 286 new->to_process = curproc->p_p; 287 #endif 288 tostat.tos_added++; 289 mtx_leave(&timeout_mutex); 290 291 return ret; 292 } 293 294 int 295 timeout_add_tv(struct timeout *to, const struct timeval *tv) 296 { 297 uint64_t to_ticks; 298 299 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 300 if (to_ticks > INT_MAX) 301 to_ticks = INT_MAX; 302 if (to_ticks == 0 && tv->tv_usec > 0) 303 to_ticks = 1; 304 305 return timeout_add(to, (int)to_ticks); 306 } 307 308 int 309 timeout_add_sec(struct timeout *to, int secs) 310 { 311 uint64_t to_ticks; 312 313 to_ticks = (uint64_t)hz * secs; 314 if (to_ticks > INT_MAX) 315 to_ticks = INT_MAX; 316 if (to_ticks == 0) 317 to_ticks = 1; 318 319 return timeout_add(to, (int)to_ticks); 320 } 321 322 int 323 timeout_add_msec(struct timeout *to, int msecs) 324 { 325 uint64_t to_ticks; 326 327 to_ticks = (uint64_t)msecs * 1000 / tick; 328 if (to_ticks > INT_MAX) 329 to_ticks = INT_MAX; 330 if (to_ticks == 0 && msecs > 0) 331 to_ticks = 1; 332 333 return timeout_add(to, (int)to_ticks); 334 } 335 336 int 337 timeout_add_usec(struct timeout *to, int usecs) 338 { 339 int to_ticks = usecs / tick; 340 341 if (to_ticks == 0 && usecs > 0) 342 to_ticks = 1; 343 344 return timeout_add(to, to_ticks); 345 } 346 347 int 348 timeout_add_nsec(struct timeout *to, int nsecs) 349 { 350 int to_ticks = nsecs / (tick * 1000); 351 352 if (to_ticks == 0 && nsecs > 0) 353 to_ticks = 1; 354 355 return timeout_add(to, to_ticks); 356 } 357 358 int 359 timeout_del(struct timeout *to) 360 { 361 int ret = 0; 362 363 mtx_enter(&timeout_mutex); 364 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 365 CIRCQ_REMOVE(&to->to_list); 366 CLR(to->to_flags, TIMEOUT_ONQUEUE); 367 tostat.tos_cancelled++; 368 ret = 1; 369 } 370 CLR(to->to_flags, TIMEOUT_TRIGGERED); 371 tostat.tos_deleted++; 372 mtx_leave(&timeout_mutex); 373 374 return ret; 375 } 376 377 int 378 timeout_del_barrier(struct timeout *to) 379 { 380 int removed; 381 382 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 383 384 removed = timeout_del(to); 385 if (!removed) 386 timeout_barrier(to); 387 388 return removed; 389 } 390 391 void 392 timeout_barrier(struct timeout *to) 393 { 394 int needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 395 396 timeout_sync_order(needsproc); 397 398 if (!needsproc) { 399 KERNEL_LOCK(); 400 splx(splsoftclock()); 401 KERNEL_UNLOCK(); 402 } else { 403 struct cond c = COND_INITIALIZER(); 404 struct timeout barrier; 405 406 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 407 barrier.to_process = curproc->p_p; 408 409 mtx_enter(&timeout_mutex); 410 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 411 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 412 mtx_leave(&timeout_mutex); 413 414 wakeup_one(&timeout_proc); 415 416 cond_wait(&c, "tmobar"); 417 } 418 } 419 420 void 421 timeout_proc_barrier(void *arg) 422 { 423 struct cond *c = arg; 424 425 cond_signal(c); 426 } 427 428 /* 429 * This is called from hardclock() on the primary CPU at the start of 430 * every tick. 431 */ 432 void 433 timeout_hardclock_update(void) 434 { 435 int need_softclock = 1; 436 437 mtx_enter(&timeout_mutex); 438 439 MOVEBUCKET(0, ticks); 440 if (MASKWHEEL(0, ticks) == 0) { 441 MOVEBUCKET(1, ticks); 442 if (MASKWHEEL(1, ticks) == 0) { 443 MOVEBUCKET(2, ticks); 444 if (MASKWHEEL(2, ticks) == 0) 445 MOVEBUCKET(3, ticks); 446 } 447 } 448 449 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) 450 need_softclock = 0; 451 452 mtx_leave(&timeout_mutex); 453 454 if (need_softclock) 455 softintr_schedule(softclock_si); 456 } 457 458 void 459 timeout_run(struct timeout *to) 460 { 461 void (*fn)(void *); 462 void *arg; 463 int needsproc; 464 465 MUTEX_ASSERT_LOCKED(&timeout_mutex); 466 467 CLR(to->to_flags, TIMEOUT_ONQUEUE); 468 SET(to->to_flags, TIMEOUT_TRIGGERED); 469 470 fn = to->to_func; 471 arg = to->to_arg; 472 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 473 #if NKCOV > 0 474 struct process *kcov_process = to->to_process; 475 #endif 476 477 mtx_leave(&timeout_mutex); 478 timeout_sync_enter(needsproc); 479 #if NKCOV > 0 480 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); 481 #endif 482 fn(arg); 483 #if NKCOV > 0 484 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); 485 #endif 486 timeout_sync_leave(needsproc); 487 mtx_enter(&timeout_mutex); 488 } 489 490 /* 491 * Timeouts are processed here instead of timeout_hardclock_update() 492 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 493 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 494 * so the system remains responsive even if there is a surge of timeouts. 495 */ 496 void 497 softclock(void *arg) 498 { 499 struct circq *bucket; 500 struct timeout *first_new, *to; 501 int delta, needsproc, new; 502 503 first_new = NULL; 504 new = 0; 505 506 mtx_enter(&timeout_mutex); 507 if (!CIRCQ_EMPTY(&timeout_new)) 508 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); 509 CIRCQ_CONCAT(&timeout_todo, &timeout_new); 510 while (!CIRCQ_EMPTY(&timeout_todo)) { 511 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 512 CIRCQ_REMOVE(&to->to_list); 513 if (to == first_new) 514 new = 1; 515 516 /* 517 * If due run it or defer execution to the thread, 518 * otherwise insert it into the right bucket. 519 */ 520 delta = to->to_time - ticks; 521 if (delta > 0) { 522 bucket = &BUCKET(delta, to->to_time); 523 CIRCQ_INSERT_TAIL(bucket, &to->to_list); 524 tostat.tos_scheduled++; 525 if (!new) 526 tostat.tos_rescheduled++; 527 continue; 528 } 529 if (!new && delta < 0) 530 tostat.tos_late++; 531 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 532 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 533 continue; 534 } 535 timeout_run(to); 536 tostat.tos_run_softclock++; 537 } 538 tostat.tos_softclocks++; 539 needsproc = !CIRCQ_EMPTY(&timeout_proc); 540 mtx_leave(&timeout_mutex); 541 542 if (needsproc) 543 wakeup(&timeout_proc); 544 } 545 546 void 547 softclock_create_thread(void *arg) 548 { 549 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 550 panic("fork softclock"); 551 } 552 553 void 554 softclock_thread(void *arg) 555 { 556 CPU_INFO_ITERATOR cii; 557 struct cpu_info *ci; 558 struct sleep_state sls; 559 struct timeout *to; 560 int s; 561 562 KERNEL_ASSERT_LOCKED(); 563 564 /* Be conservative for the moment */ 565 CPU_INFO_FOREACH(cii, ci) { 566 if (CPU_IS_PRIMARY(ci)) 567 break; 568 } 569 KASSERT(ci != NULL); 570 sched_peg_curproc(ci); 571 572 s = splsoftclock(); 573 for (;;) { 574 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 575 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 576 577 mtx_enter(&timeout_mutex); 578 while (!CIRCQ_EMPTY(&timeout_proc)) { 579 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 580 CIRCQ_REMOVE(&to->to_list); 581 timeout_run(to); 582 tostat.tos_run_thread++; 583 } 584 tostat.tos_thread_wakeups++; 585 mtx_leave(&timeout_mutex); 586 } 587 splx(s); 588 } 589 590 #ifndef SMALL_KERNEL 591 void 592 timeout_adjust_ticks(int adj) 593 { 594 struct timeout *to; 595 struct circq *p; 596 int new_ticks, b; 597 598 /* adjusting the monotonic clock backwards would be a Bad Thing */ 599 if (adj <= 0) 600 return; 601 602 mtx_enter(&timeout_mutex); 603 new_ticks = ticks + adj; 604 for (b = 0; b < nitems(timeout_wheel); b++) { 605 p = CIRCQ_FIRST(&timeout_wheel[b]); 606 while (p != &timeout_wheel[b]) { 607 to = timeout_from_circq(p); 608 p = CIRCQ_FIRST(p); 609 610 /* when moving a timeout forward need to reinsert it */ 611 if (to->to_time - ticks < adj) 612 to->to_time = new_ticks; 613 CIRCQ_REMOVE(&to->to_list); 614 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 615 } 616 } 617 ticks = new_ticks; 618 mtx_leave(&timeout_mutex); 619 } 620 #endif 621 622 int 623 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 624 { 625 struct timeoutstat status; 626 627 mtx_enter(&timeout_mutex); 628 memcpy(&status, &tostat, sizeof(status)); 629 mtx_leave(&timeout_mutex); 630 631 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 632 } 633 634 #ifdef DDB 635 void db_show_callout_bucket(struct circq *); 636 637 void 638 db_show_callout_bucket(struct circq *bucket) 639 { 640 char buf[8]; 641 struct timeout *to; 642 struct circq *p; 643 db_expr_t offset; 644 char *name, *where; 645 int width = sizeof(long) * 2; 646 647 CIRCQ_FOREACH(p, bucket) { 648 to = timeout_from_circq(p); 649 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 650 name = name ? name : "?"; 651 if (bucket == &timeout_todo) 652 where = "softint"; 653 else if (bucket == &timeout_proc) 654 where = "thread"; 655 else if (bucket == &timeout_new) 656 where = "new"; 657 else { 658 snprintf(buf, sizeof(buf), "%3ld/%1ld", 659 (bucket - timeout_wheel) % WHEELSIZE, 660 (bucket - timeout_wheel) / WHEELSIZE); 661 where = buf; 662 } 663 db_printf("%9d %7s 0x%0*lx %s\n", 664 to->to_time - ticks, where, width, (ulong)to->to_arg, name); 665 } 666 } 667 668 void 669 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 670 { 671 int width = sizeof(long) * 2 + 2; 672 int b; 673 674 db_printf("ticks now: %d\n", ticks); 675 db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); 676 677 db_show_callout_bucket(&timeout_new); 678 db_show_callout_bucket(&timeout_todo); 679 db_show_callout_bucket(&timeout_proc); 680 for (b = 0; b < nitems(timeout_wheel); b++) 681 db_show_callout_bucket(&timeout_wheel[b]); 682 } 683 #endif 684