1 /* $OpenBSD: kern_timeout.c,v 1.79 2020/08/07 00:45:25 cheloha Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/proc.h> 32 #include <sys/timeout.h> 33 #include <sys/mutex.h> 34 #include <sys/kernel.h> 35 #include <sys/queue.h> /* _Q_INVALIDATE */ 36 #include <sys/sysctl.h> 37 #include <sys/witness.h> 38 39 #ifdef DDB 40 #include <machine/db_machdep.h> 41 #include <ddb/db_interface.h> 42 #include <ddb/db_sym.h> 43 #include <ddb/db_output.h> 44 #endif 45 46 #include "kcov.h" 47 #if NKCOV > 0 48 #include <sys/kcov.h> 49 #endif 50 51 /* 52 * Locks used to protect global variables in this file: 53 * 54 * I immutable after initialization 55 * T timeout_mutex 56 */ 57 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 58 59 void *softclock_si; /* [I] softclock() interrupt handle */ 60 struct timeoutstat tostat; /* [T] statistics and totals */ 61 62 /* 63 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 64 * of the global variable "ticks" when the timeout should be called. There are 65 * four levels with 256 buckets each. 66 */ 67 #define BUCKETS 1024 68 #define WHEELSIZE 256 69 #define WHEELMASK 255 70 #define WHEELBITS 8 71 72 struct circq timeout_wheel[BUCKETS]; /* [T] Queues of timeouts */ 73 struct circq timeout_new; /* [T] New, unscheduled timeouts */ 74 struct circq timeout_todo; /* [T] Due or needs rescheduling */ 75 struct circq timeout_proc; /* [T] Due + needs process context */ 76 77 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 78 79 #define BUCKET(rel, abs) \ 80 (timeout_wheel[ \ 81 ((rel) <= (1 << (2*WHEELBITS))) \ 82 ? ((rel) <= (1 << WHEELBITS)) \ 83 ? MASKWHEEL(0, (abs)) \ 84 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 85 : ((rel) <= (1 << (3*WHEELBITS))) \ 86 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 87 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 88 89 #define MOVEBUCKET(wheel, time) \ 90 CIRCQ_CONCAT(&timeout_todo, \ 91 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 92 93 /* 94 * Circular queue definitions. 95 */ 96 97 #define CIRCQ_INIT(elem) do { \ 98 (elem)->next = (elem); \ 99 (elem)->prev = (elem); \ 100 } while (0) 101 102 #define CIRCQ_INSERT_TAIL(list, elem) do { \ 103 (elem)->prev = (list)->prev; \ 104 (elem)->next = (list); \ 105 (list)->prev->next = (elem); \ 106 (list)->prev = (elem); \ 107 tostat.tos_pending++; \ 108 } while (0) 109 110 #define CIRCQ_CONCAT(fst, snd) do { \ 111 if (!CIRCQ_EMPTY(snd)) { \ 112 (fst)->prev->next = (snd)->next;\ 113 (snd)->next->prev = (fst)->prev;\ 114 (snd)->prev->next = (fst); \ 115 (fst)->prev = (snd)->prev; \ 116 CIRCQ_INIT(snd); \ 117 } \ 118 } while (0) 119 120 #define CIRCQ_REMOVE(elem) do { \ 121 (elem)->next->prev = (elem)->prev; \ 122 (elem)->prev->next = (elem)->next; \ 123 _Q_INVALIDATE((elem)->prev); \ 124 _Q_INVALIDATE((elem)->next); \ 125 tostat.tos_pending--; \ 126 } while (0) 127 128 #define CIRCQ_FIRST(elem) ((elem)->next) 129 130 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 131 132 #define CIRCQ_FOREACH(elem, list) \ 133 for ((elem) = CIRCQ_FIRST(list); \ 134 (elem) != (list); \ 135 (elem) = CIRCQ_FIRST(elem)) 136 137 #ifdef WITNESS 138 struct lock_object timeout_sleeplock_obj = { 139 .lo_name = "timeout", 140 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 141 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 142 }; 143 struct lock_object timeout_spinlock_obj = { 144 .lo_name = "timeout", 145 .lo_flags = LO_WITNESS | LO_INITIALIZED | 146 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 147 }; 148 struct lock_type timeout_sleeplock_type = { 149 .lt_name = "timeout" 150 }; 151 struct lock_type timeout_spinlock_type = { 152 .lt_name = "timeout" 153 }; 154 #define TIMEOUT_LOCK_OBJ(needsproc) \ 155 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 156 #endif 157 158 void softclock(void *); 159 void softclock_create_thread(void *); 160 void softclock_thread(void *); 161 void timeout_proc_barrier(void *); 162 163 /* 164 * The first thing in a struct timeout is its struct circq, so we 165 * can get back from a pointer to the latter to a pointer to the 166 * whole timeout with just a cast. 167 */ 168 static inline struct timeout * 169 timeout_from_circq(struct circq *p) 170 { 171 return ((struct timeout *)(p)); 172 } 173 174 static inline void 175 timeout_sync_order(int needsproc) 176 { 177 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 178 } 179 180 static inline void 181 timeout_sync_enter(int needsproc) 182 { 183 timeout_sync_order(needsproc); 184 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 185 } 186 187 static inline void 188 timeout_sync_leave(int needsproc) 189 { 190 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 191 } 192 193 /* 194 * Some of the "math" in here is a bit tricky. 195 * 196 * We have to beware of wrapping ints. 197 * We use the fact that any element added to the queue must be added with a 198 * positive time. That means that any element `to' on the queue cannot be 199 * scheduled to timeout further in time than INT_MAX, but to->to_time can 200 * be positive or negative so comparing it with anything is dangerous. 201 * The only way we can use the to->to_time value in any predictable way 202 * is when we calculate how far in the future `to' will timeout - 203 * "to->to_time - ticks". The result will always be positive for future 204 * timeouts and 0 or negative for due timeouts. 205 */ 206 207 void 208 timeout_startup(void) 209 { 210 int b; 211 212 CIRCQ_INIT(&timeout_new); 213 CIRCQ_INIT(&timeout_todo); 214 CIRCQ_INIT(&timeout_proc); 215 for (b = 0; b < nitems(timeout_wheel); b++) 216 CIRCQ_INIT(&timeout_wheel[b]); 217 } 218 219 void 220 timeout_proc_init(void) 221 { 222 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL); 223 if (softclock_si == NULL) 224 panic("%s: unable to register softclock interrupt", __func__); 225 226 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 227 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 228 229 kthread_create_deferred(softclock_create_thread, NULL); 230 } 231 232 void 233 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 234 { 235 timeout_set_flags(new, fn, arg, 0); 236 } 237 238 void 239 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int flags) 240 { 241 to->to_func = fn; 242 to->to_arg = arg; 243 to->to_process = NULL; 244 to->to_flags = flags | TIMEOUT_INITIALIZED; 245 } 246 247 void 248 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 249 { 250 timeout_set_flags(new, fn, arg, TIMEOUT_PROC); 251 } 252 253 int 254 timeout_add(struct timeout *new, int to_ticks) 255 { 256 int old_time; 257 int ret = 1; 258 259 KASSERT(ISSET(new->to_flags, TIMEOUT_INITIALIZED)); 260 KASSERT(to_ticks >= 0); 261 262 mtx_enter(&timeout_mutex); 263 264 /* Initialize the time here, it won't change. */ 265 old_time = new->to_time; 266 new->to_time = to_ticks + ticks; 267 CLR(new->to_flags, TIMEOUT_TRIGGERED); 268 269 /* 270 * If this timeout already is scheduled and now is moved 271 * earlier, reschedule it now. Otherwise leave it in place 272 * and let it be rescheduled later. 273 */ 274 if (ISSET(new->to_flags, TIMEOUT_ONQUEUE)) { 275 if (new->to_time - ticks < old_time - ticks) { 276 CIRCQ_REMOVE(&new->to_list); 277 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 278 } 279 tostat.tos_readded++; 280 ret = 0; 281 } else { 282 SET(new->to_flags, TIMEOUT_ONQUEUE); 283 CIRCQ_INSERT_TAIL(&timeout_new, &new->to_list); 284 } 285 #if NKCOV > 0 286 new->to_process = curproc->p_p; 287 #endif 288 tostat.tos_added++; 289 mtx_leave(&timeout_mutex); 290 291 return ret; 292 } 293 294 int 295 timeout_add_tv(struct timeout *to, const struct timeval *tv) 296 { 297 uint64_t to_ticks; 298 299 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 300 if (to_ticks > INT_MAX) 301 to_ticks = INT_MAX; 302 if (to_ticks == 0 && tv->tv_usec > 0) 303 to_ticks = 1; 304 305 return timeout_add(to, (int)to_ticks); 306 } 307 308 int 309 timeout_add_sec(struct timeout *to, int secs) 310 { 311 uint64_t to_ticks; 312 313 to_ticks = (uint64_t)hz * secs; 314 if (to_ticks > INT_MAX) 315 to_ticks = INT_MAX; 316 if (to_ticks == 0) 317 to_ticks = 1; 318 319 return timeout_add(to, (int)to_ticks); 320 } 321 322 int 323 timeout_add_msec(struct timeout *to, int msecs) 324 { 325 uint64_t to_ticks; 326 327 to_ticks = (uint64_t)msecs * 1000 / tick; 328 if (to_ticks > INT_MAX) 329 to_ticks = INT_MAX; 330 if (to_ticks == 0 && msecs > 0) 331 to_ticks = 1; 332 333 return timeout_add(to, (int)to_ticks); 334 } 335 336 int 337 timeout_add_usec(struct timeout *to, int usecs) 338 { 339 int to_ticks = usecs / tick; 340 341 if (to_ticks == 0 && usecs > 0) 342 to_ticks = 1; 343 344 return timeout_add(to, to_ticks); 345 } 346 347 int 348 timeout_add_nsec(struct timeout *to, int nsecs) 349 { 350 int to_ticks = nsecs / (tick * 1000); 351 352 if (to_ticks == 0 && nsecs > 0) 353 to_ticks = 1; 354 355 return timeout_add(to, to_ticks); 356 } 357 358 int 359 timeout_del(struct timeout *to) 360 { 361 int ret = 0; 362 363 mtx_enter(&timeout_mutex); 364 if (ISSET(to->to_flags, TIMEOUT_ONQUEUE)) { 365 CIRCQ_REMOVE(&to->to_list); 366 CLR(to->to_flags, TIMEOUT_ONQUEUE); 367 tostat.tos_cancelled++; 368 ret = 1; 369 } 370 CLR(to->to_flags, TIMEOUT_TRIGGERED); 371 tostat.tos_deleted++; 372 mtx_leave(&timeout_mutex); 373 374 return ret; 375 } 376 377 int 378 timeout_del_barrier(struct timeout *to) 379 { 380 int removed; 381 382 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_PROC)); 383 384 removed = timeout_del(to); 385 if (!removed) 386 timeout_barrier(to); 387 388 return removed; 389 } 390 391 void 392 timeout_barrier(struct timeout *to) 393 { 394 int needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 395 396 timeout_sync_order(needsproc); 397 398 if (!needsproc) { 399 KERNEL_LOCK(); 400 splx(splsoftclock()); 401 KERNEL_UNLOCK(); 402 } else { 403 struct cond c = COND_INITIALIZER(); 404 struct timeout barrier; 405 406 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 407 barrier.to_process = curproc->p_p; 408 409 mtx_enter(&timeout_mutex); 410 SET(barrier.to_flags, TIMEOUT_ONQUEUE); 411 CIRCQ_INSERT_TAIL(&timeout_proc, &barrier.to_list); 412 mtx_leave(&timeout_mutex); 413 414 wakeup_one(&timeout_proc); 415 416 cond_wait(&c, "tmobar"); 417 } 418 } 419 420 void 421 timeout_proc_barrier(void *arg) 422 { 423 struct cond *c = arg; 424 425 cond_signal(c); 426 } 427 428 /* 429 * This is called from hardclock() on the primary CPU at the start of 430 * every tick. 431 */ 432 void 433 timeout_hardclock_update(void) 434 { 435 int need_softclock = 1; 436 437 mtx_enter(&timeout_mutex); 438 439 MOVEBUCKET(0, ticks); 440 if (MASKWHEEL(0, ticks) == 0) { 441 MOVEBUCKET(1, ticks); 442 if (MASKWHEEL(1, ticks) == 0) { 443 MOVEBUCKET(2, ticks); 444 if (MASKWHEEL(2, ticks) == 0) 445 MOVEBUCKET(3, ticks); 446 } 447 } 448 449 if (CIRCQ_EMPTY(&timeout_new) && CIRCQ_EMPTY(&timeout_todo)) 450 need_softclock = 0; 451 452 mtx_leave(&timeout_mutex); 453 454 if (need_softclock) 455 softintr_schedule(softclock_si); 456 } 457 458 void 459 timeout_run(struct timeout *to) 460 { 461 void (*fn)(void *); 462 void *arg; 463 int needsproc; 464 465 MUTEX_ASSERT_LOCKED(&timeout_mutex); 466 467 CLR(to->to_flags, TIMEOUT_ONQUEUE); 468 SET(to->to_flags, TIMEOUT_TRIGGERED); 469 470 fn = to->to_func; 471 arg = to->to_arg; 472 needsproc = ISSET(to->to_flags, TIMEOUT_PROC); 473 474 mtx_leave(&timeout_mutex); 475 timeout_sync_enter(needsproc); 476 #if NKCOV > 0 477 struct process *kcov_process = to->to_process; 478 kcov_remote_enter(KCOV_REMOTE_COMMON, kcov_process); 479 #endif 480 fn(arg); 481 #if NKCOV > 0 482 kcov_remote_leave(KCOV_REMOTE_COMMON, kcov_process); 483 #endif 484 timeout_sync_leave(needsproc); 485 mtx_enter(&timeout_mutex); 486 } 487 488 /* 489 * Timeouts are processed here instead of timeout_hardclock_update() 490 * to avoid doing any more work at IPL_CLOCK than absolutely necessary. 491 * Down here at IPL_SOFTCLOCK other interrupts can be serviced promptly 492 * so the system remains responsive even if there is a surge of timeouts. 493 */ 494 void 495 softclock(void *arg) 496 { 497 struct circq *bucket; 498 struct timeout *first_new, *to; 499 int delta, needsproc, new; 500 501 first_new = NULL; 502 new = 0; 503 504 mtx_enter(&timeout_mutex); 505 if (!CIRCQ_EMPTY(&timeout_new)) 506 first_new = timeout_from_circq(CIRCQ_FIRST(&timeout_new)); 507 CIRCQ_CONCAT(&timeout_todo, &timeout_new); 508 while (!CIRCQ_EMPTY(&timeout_todo)) { 509 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 510 CIRCQ_REMOVE(&to->to_list); 511 if (to == first_new) 512 new = 1; 513 514 /* 515 * If due run it or defer execution to the thread, 516 * otherwise insert it into the right bucket. 517 */ 518 delta = to->to_time - ticks; 519 if (delta > 0) { 520 bucket = &BUCKET(delta, to->to_time); 521 CIRCQ_INSERT_TAIL(bucket, &to->to_list); 522 tostat.tos_scheduled++; 523 if (!new) 524 tostat.tos_rescheduled++; 525 continue; 526 } 527 if (!new && delta < 0) 528 tostat.tos_late++; 529 if (ISSET(to->to_flags, TIMEOUT_PROC)) { 530 CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list); 531 continue; 532 } 533 timeout_run(to); 534 tostat.tos_run_softclock++; 535 } 536 tostat.tos_softclocks++; 537 needsproc = !CIRCQ_EMPTY(&timeout_proc); 538 mtx_leave(&timeout_mutex); 539 540 if (needsproc) 541 wakeup(&timeout_proc); 542 } 543 544 void 545 softclock_create_thread(void *arg) 546 { 547 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 548 panic("fork softclock"); 549 } 550 551 void 552 softclock_thread(void *arg) 553 { 554 CPU_INFO_ITERATOR cii; 555 struct cpu_info *ci; 556 struct sleep_state sls; 557 struct timeout *to; 558 int s; 559 560 KERNEL_ASSERT_LOCKED(); 561 562 /* Be conservative for the moment */ 563 CPU_INFO_FOREACH(cii, ci) { 564 if (CPU_IS_PRIMARY(ci)) 565 break; 566 } 567 KASSERT(ci != NULL); 568 sched_peg_curproc(ci); 569 570 s = splsoftclock(); 571 for (;;) { 572 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 573 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 574 575 mtx_enter(&timeout_mutex); 576 while (!CIRCQ_EMPTY(&timeout_proc)) { 577 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 578 CIRCQ_REMOVE(&to->to_list); 579 timeout_run(to); 580 tostat.tos_run_thread++; 581 } 582 tostat.tos_thread_wakeups++; 583 mtx_leave(&timeout_mutex); 584 } 585 splx(s); 586 } 587 588 #ifndef SMALL_KERNEL 589 void 590 timeout_adjust_ticks(int adj) 591 { 592 struct timeout *to; 593 struct circq *p; 594 int new_ticks, b; 595 596 /* adjusting the monotonic clock backwards would be a Bad Thing */ 597 if (adj <= 0) 598 return; 599 600 mtx_enter(&timeout_mutex); 601 new_ticks = ticks + adj; 602 for (b = 0; b < nitems(timeout_wheel); b++) { 603 p = CIRCQ_FIRST(&timeout_wheel[b]); 604 while (p != &timeout_wheel[b]) { 605 to = timeout_from_circq(p); 606 p = CIRCQ_FIRST(p); 607 608 /* when moving a timeout forward need to reinsert it */ 609 if (to->to_time - ticks < adj) 610 to->to_time = new_ticks; 611 CIRCQ_REMOVE(&to->to_list); 612 CIRCQ_INSERT_TAIL(&timeout_todo, &to->to_list); 613 } 614 } 615 ticks = new_ticks; 616 mtx_leave(&timeout_mutex); 617 } 618 #endif 619 620 int 621 timeout_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 622 { 623 struct timeoutstat status; 624 625 mtx_enter(&timeout_mutex); 626 memcpy(&status, &tostat, sizeof(status)); 627 mtx_leave(&timeout_mutex); 628 629 return sysctl_rdstruct(oldp, oldlenp, newp, &status, sizeof(status)); 630 } 631 632 #ifdef DDB 633 void db_show_callout_bucket(struct circq *); 634 635 void 636 db_show_callout_bucket(struct circq *bucket) 637 { 638 char buf[8]; 639 struct timeout *to; 640 struct circq *p; 641 db_expr_t offset; 642 char *name, *where; 643 int width = sizeof(long) * 2; 644 645 CIRCQ_FOREACH(p, bucket) { 646 to = timeout_from_circq(p); 647 db_find_sym_and_offset((vaddr_t)to->to_func, &name, &offset); 648 name = name ? name : "?"; 649 if (bucket == &timeout_todo) 650 where = "softint"; 651 else if (bucket == &timeout_proc) 652 where = "thread"; 653 else if (bucket == &timeout_new) 654 where = "new"; 655 else { 656 snprintf(buf, sizeof(buf), "%3ld/%1ld", 657 (bucket - timeout_wheel) % WHEELSIZE, 658 (bucket - timeout_wheel) / WHEELSIZE); 659 where = buf; 660 } 661 db_printf("%9d %7s 0x%0*lx %s\n", 662 to->to_time - ticks, where, width, (ulong)to->to_arg, name); 663 } 664 } 665 666 void 667 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 668 { 669 int width = sizeof(long) * 2 + 2; 670 int b; 671 672 db_printf("ticks now: %d\n", ticks); 673 db_printf("%9s %7s %*s func\n", "ticks", "wheel", width, "arg"); 674 675 db_show_callout_bucket(&timeout_new); 676 db_show_callout_bucket(&timeout_todo); 677 db_show_callout_bucket(&timeout_proc); 678 for (b = 0; b < nitems(timeout_wheel); b++) 679 db_show_callout_bucket(&timeout_wheel[b]); 680 } 681 #endif 682