1 /* $OpenBSD: kern_timeout.c,v 1.55 2019/04/23 13:35:12 visa Exp $ */ 2 /* 3 * Copyright (c) 2001 Thomas Nordin <nordin@openbsd.org> 4 * Copyright (c) 2000-2001 Artur Grabowski <art@openbsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, 17 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 18 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 19 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/kthread.h> 31 #include <sys/timeout.h> 32 #include <sys/mutex.h> 33 #include <sys/kernel.h> 34 #include <sys/queue.h> /* _Q_INVALIDATE */ 35 #include <sys/witness.h> 36 37 #ifdef DDB 38 #include <machine/db_machdep.h> 39 #include <ddb/db_interface.h> 40 #include <ddb/db_sym.h> 41 #include <ddb/db_output.h> 42 #endif 43 44 /* 45 * Timeouts are kept in a hierarchical timing wheel. The to_time is the value 46 * of the global variable "ticks" when the timeout should be called. There are 47 * four levels with 256 buckets each. See 'Scheme 7' in 48 * "Hashed and Hierarchical Timing Wheels: Efficient Data Structures for 49 * Implementing a Timer Facility" by George Varghese and Tony Lauck. 50 */ 51 #define BUCKETS 1024 52 #define WHEELSIZE 256 53 #define WHEELMASK 255 54 #define WHEELBITS 8 55 56 struct circq timeout_wheel[BUCKETS]; /* Queues of timeouts */ 57 struct circq timeout_todo; /* Worklist */ 58 struct circq timeout_proc; /* Due timeouts needing proc. context */ 59 60 #define MASKWHEEL(wheel, time) (((time) >> ((wheel)*WHEELBITS)) & WHEELMASK) 61 62 #define BUCKET(rel, abs) \ 63 (timeout_wheel[ \ 64 ((rel) <= (1 << (2*WHEELBITS))) \ 65 ? ((rel) <= (1 << WHEELBITS)) \ 66 ? MASKWHEEL(0, (abs)) \ 67 : MASKWHEEL(1, (abs)) + WHEELSIZE \ 68 : ((rel) <= (1 << (3*WHEELBITS))) \ 69 ? MASKWHEEL(2, (abs)) + 2*WHEELSIZE \ 70 : MASKWHEEL(3, (abs)) + 3*WHEELSIZE]) 71 72 #define MOVEBUCKET(wheel, time) \ 73 CIRCQ_APPEND(&timeout_todo, \ 74 &timeout_wheel[MASKWHEEL((wheel), (time)) + (wheel)*WHEELSIZE]) 75 76 /* 77 * The first thing in a struct timeout is its struct circq, so we 78 * can get back from a pointer to the latter to a pointer to the 79 * whole timeout with just a cast. 80 */ 81 static __inline struct timeout * 82 timeout_from_circq(struct circq *p) 83 { 84 return ((struct timeout *)(p)); 85 } 86 87 /* 88 * All wheels are locked with the same mutex. 89 * 90 * We need locking since the timeouts are manipulated from hardclock that's 91 * not behind the big lock. 92 */ 93 struct mutex timeout_mutex = MUTEX_INITIALIZER(IPL_HIGH); 94 95 /* 96 * Circular queue definitions. 97 */ 98 99 #define CIRCQ_INIT(elem) do { \ 100 (elem)->next = (elem); \ 101 (elem)->prev = (elem); \ 102 } while (0) 103 104 #define CIRCQ_INSERT(elem, list) do { \ 105 (elem)->prev = (list)->prev; \ 106 (elem)->next = (list); \ 107 (list)->prev->next = (elem); \ 108 (list)->prev = (elem); \ 109 } while (0) 110 111 #define CIRCQ_APPEND(fst, snd) do { \ 112 if (!CIRCQ_EMPTY(snd)) { \ 113 (fst)->prev->next = (snd)->next;\ 114 (snd)->next->prev = (fst)->prev;\ 115 (snd)->prev->next = (fst); \ 116 (fst)->prev = (snd)->prev; \ 117 CIRCQ_INIT(snd); \ 118 } \ 119 } while (0) 120 121 #define CIRCQ_REMOVE(elem) do { \ 122 (elem)->next->prev = (elem)->prev; \ 123 (elem)->prev->next = (elem)->next; \ 124 _Q_INVALIDATE((elem)->prev); \ 125 _Q_INVALIDATE((elem)->next); \ 126 } while (0) 127 128 #define CIRCQ_FIRST(elem) ((elem)->next) 129 130 #define CIRCQ_EMPTY(elem) (CIRCQ_FIRST(elem) == (elem)) 131 132 void softclock_thread(void *); 133 void softclock_create_thread(void *); 134 135 #ifdef WITNESS 136 struct lock_object timeout_sleeplock_obj = { 137 .lo_name = "timeout", 138 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE | 139 (LO_CLASS_RWLOCK << LO_CLASSSHIFT) 140 }; 141 struct lock_object timeout_spinlock_obj = { 142 .lo_name = "timeout", 143 .lo_flags = LO_WITNESS | LO_INITIALIZED | 144 (LO_CLASS_MUTEX << LO_CLASSSHIFT) 145 }; 146 struct lock_type timeout_sleeplock_type = { 147 .lt_name = "timeout" 148 }; 149 struct lock_type timeout_spinlock_type = { 150 .lt_name = "timeout" 151 }; 152 #define TIMEOUT_LOCK_OBJ(needsproc) \ 153 ((needsproc) ? &timeout_sleeplock_obj : &timeout_spinlock_obj) 154 #endif 155 156 static void 157 timeout_sync_order(int needsproc) 158 { 159 WITNESS_CHECKORDER(TIMEOUT_LOCK_OBJ(needsproc), LOP_NEWORDER, NULL); 160 } 161 162 static void 163 timeout_sync_enter(int needsproc) 164 { 165 timeout_sync_order(needsproc); 166 WITNESS_LOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 167 } 168 169 static void 170 timeout_sync_leave(int needsproc) 171 { 172 WITNESS_UNLOCK(TIMEOUT_LOCK_OBJ(needsproc), 0); 173 } 174 175 /* 176 * Some of the "math" in here is a bit tricky. 177 * 178 * We have to beware of wrapping ints. 179 * We use the fact that any element added to the queue must be added with a 180 * positive time. That means that any element `to' on the queue cannot be 181 * scheduled to timeout further in time than INT_MAX, but to->to_time can 182 * be positive or negative so comparing it with anything is dangerous. 183 * The only way we can use the to->to_time value in any predictable way 184 * is when we calculate how far in the future `to' will timeout - 185 * "to->to_time - ticks". The result will always be positive for future 186 * timeouts and 0 or negative for due timeouts. 187 */ 188 189 void 190 timeout_startup(void) 191 { 192 int b; 193 194 CIRCQ_INIT(&timeout_todo); 195 CIRCQ_INIT(&timeout_proc); 196 for (b = 0; b < nitems(timeout_wheel); b++) 197 CIRCQ_INIT(&timeout_wheel[b]); 198 } 199 200 void 201 timeout_proc_init(void) 202 { 203 WITNESS_INIT(&timeout_sleeplock_obj, &timeout_sleeplock_type); 204 WITNESS_INIT(&timeout_spinlock_obj, &timeout_spinlock_type); 205 206 kthread_create_deferred(softclock_create_thread, NULL); 207 } 208 209 void 210 timeout_set(struct timeout *new, void (*fn)(void *), void *arg) 211 { 212 new->to_func = fn; 213 new->to_arg = arg; 214 new->to_flags = TIMEOUT_INITIALIZED; 215 } 216 217 void 218 timeout_set_proc(struct timeout *new, void (*fn)(void *), void *arg) 219 { 220 timeout_set(new, fn, arg); 221 new->to_flags |= TIMEOUT_NEEDPROCCTX; 222 } 223 224 int 225 timeout_add(struct timeout *new, int to_ticks) 226 { 227 int old_time; 228 int ret = 1; 229 230 #ifdef DIAGNOSTIC 231 if (!(new->to_flags & TIMEOUT_INITIALIZED)) 232 panic("timeout_add: not initialized"); 233 if (to_ticks < 0) 234 panic("timeout_add: to_ticks (%d) < 0", to_ticks); 235 #endif 236 237 mtx_enter(&timeout_mutex); 238 /* Initialize the time here, it won't change. */ 239 old_time = new->to_time; 240 new->to_time = to_ticks + ticks; 241 new->to_flags &= ~TIMEOUT_TRIGGERED; 242 243 /* 244 * If this timeout already is scheduled and now is moved 245 * earlier, reschedule it now. Otherwise leave it in place 246 * and let it be rescheduled later. 247 */ 248 if (new->to_flags & TIMEOUT_ONQUEUE) { 249 if (new->to_time - ticks < old_time - ticks) { 250 CIRCQ_REMOVE(&new->to_list); 251 CIRCQ_INSERT(&new->to_list, &timeout_todo); 252 } 253 ret = 0; 254 } else { 255 new->to_flags |= TIMEOUT_ONQUEUE; 256 CIRCQ_INSERT(&new->to_list, &timeout_todo); 257 } 258 mtx_leave(&timeout_mutex); 259 260 return (ret); 261 } 262 263 int 264 timeout_add_tv(struct timeout *to, const struct timeval *tv) 265 { 266 uint64_t to_ticks; 267 268 to_ticks = (uint64_t)hz * tv->tv_sec + tv->tv_usec / tick; 269 if (to_ticks > INT_MAX) 270 to_ticks = INT_MAX; 271 if (to_ticks == 0 && tv->tv_usec > 0) 272 to_ticks = 1; 273 274 return (timeout_add(to, (int)to_ticks)); 275 } 276 277 int 278 timeout_add_ts(struct timeout *to, const struct timespec *ts) 279 { 280 uint64_t to_ticks; 281 282 to_ticks = (uint64_t)hz * ts->tv_sec + ts->tv_nsec / (tick * 1000); 283 if (to_ticks > INT_MAX) 284 to_ticks = INT_MAX; 285 if (to_ticks == 0 && ts->tv_nsec > 0) 286 to_ticks = 1; 287 288 return (timeout_add(to, (int)to_ticks)); 289 } 290 291 int 292 timeout_add_bt(struct timeout *to, const struct bintime *bt) 293 { 294 uint64_t to_ticks; 295 296 to_ticks = (uint64_t)hz * bt->sec + (long)(((uint64_t)1000000 * 297 (uint32_t)(bt->frac >> 32)) >> 32) / tick; 298 if (to_ticks > INT_MAX) 299 to_ticks = INT_MAX; 300 if (to_ticks == 0 && bt->frac > 0) 301 to_ticks = 1; 302 303 return (timeout_add(to, (int)to_ticks)); 304 } 305 306 int 307 timeout_add_sec(struct timeout *to, int secs) 308 { 309 uint64_t to_ticks; 310 311 to_ticks = (uint64_t)hz * secs; 312 if (to_ticks > INT_MAX) 313 to_ticks = INT_MAX; 314 315 return (timeout_add(to, (int)to_ticks)); 316 } 317 318 int 319 timeout_add_msec(struct timeout *to, int msecs) 320 { 321 uint64_t to_ticks; 322 323 to_ticks = (uint64_t)msecs * 1000 / tick; 324 if (to_ticks > INT_MAX) 325 to_ticks = INT_MAX; 326 if (to_ticks == 0 && msecs > 0) 327 to_ticks = 1; 328 329 return (timeout_add(to, (int)to_ticks)); 330 } 331 332 int 333 timeout_add_usec(struct timeout *to, int usecs) 334 { 335 int to_ticks = usecs / tick; 336 337 if (to_ticks == 0 && usecs > 0) 338 to_ticks = 1; 339 340 return (timeout_add(to, to_ticks)); 341 } 342 343 int 344 timeout_add_nsec(struct timeout *to, int nsecs) 345 { 346 int to_ticks = nsecs / (tick * 1000); 347 348 if (to_ticks == 0 && nsecs > 0) 349 to_ticks = 1; 350 351 return (timeout_add(to, to_ticks)); 352 } 353 354 int 355 timeout_del(struct timeout *to) 356 { 357 int ret = 0; 358 359 mtx_enter(&timeout_mutex); 360 if (to->to_flags & TIMEOUT_ONQUEUE) { 361 CIRCQ_REMOVE(&to->to_list); 362 to->to_flags &= ~TIMEOUT_ONQUEUE; 363 ret = 1; 364 } 365 to->to_flags &= ~TIMEOUT_TRIGGERED; 366 mtx_leave(&timeout_mutex); 367 368 return (ret); 369 } 370 371 int 372 timeout_del_barrier(struct timeout *to) 373 { 374 int removed; 375 376 timeout_sync_order(ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX)); 377 378 removed = timeout_del(to); 379 if (!removed) 380 timeout_barrier(to); 381 382 return (removed); 383 } 384 385 void timeout_proc_barrier(void *); 386 387 void 388 timeout_barrier(struct timeout *to) 389 { 390 int needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX); 391 392 timeout_sync_order(needsproc); 393 394 if (!needsproc) { 395 KERNEL_LOCK(); 396 splx(splsoftclock()); 397 KERNEL_UNLOCK(); 398 } else { 399 struct cond c = COND_INITIALIZER(); 400 struct timeout barrier; 401 402 timeout_set_proc(&barrier, timeout_proc_barrier, &c); 403 404 mtx_enter(&timeout_mutex); 405 barrier.to_flags |= TIMEOUT_ONQUEUE; 406 CIRCQ_INSERT(&barrier.to_list, &timeout_proc); 407 mtx_leave(&timeout_mutex); 408 409 wakeup_one(&timeout_proc); 410 411 cond_wait(&c, "tmobar"); 412 } 413 } 414 415 void 416 timeout_proc_barrier(void *arg) 417 { 418 struct cond *c = arg; 419 420 cond_signal(c); 421 } 422 423 /* 424 * This is called from hardclock() once every tick. 425 * We return !0 if we need to schedule a softclock. 426 */ 427 int 428 timeout_hardclock_update(void) 429 { 430 int ret; 431 432 mtx_enter(&timeout_mutex); 433 434 MOVEBUCKET(0, ticks); 435 if (MASKWHEEL(0, ticks) == 0) { 436 MOVEBUCKET(1, ticks); 437 if (MASKWHEEL(1, ticks) == 0) { 438 MOVEBUCKET(2, ticks); 439 if (MASKWHEEL(2, ticks) == 0) 440 MOVEBUCKET(3, ticks); 441 } 442 } 443 ret = !CIRCQ_EMPTY(&timeout_todo); 444 mtx_leave(&timeout_mutex); 445 446 return (ret); 447 } 448 449 void 450 timeout_run(struct timeout *to) 451 { 452 void (*fn)(void *); 453 void *arg; 454 int needsproc; 455 456 MUTEX_ASSERT_LOCKED(&timeout_mutex); 457 458 to->to_flags &= ~TIMEOUT_ONQUEUE; 459 to->to_flags |= TIMEOUT_TRIGGERED; 460 461 fn = to->to_func; 462 arg = to->to_arg; 463 needsproc = ISSET(to->to_flags, TIMEOUT_NEEDPROCCTX); 464 465 mtx_leave(&timeout_mutex); 466 timeout_sync_enter(needsproc); 467 fn(arg); 468 timeout_sync_leave(needsproc); 469 mtx_enter(&timeout_mutex); 470 } 471 472 void 473 softclock(void *arg) 474 { 475 int delta; 476 struct circq *bucket; 477 struct timeout *to; 478 int needsproc = 0; 479 480 mtx_enter(&timeout_mutex); 481 while (!CIRCQ_EMPTY(&timeout_todo)) { 482 to = timeout_from_circq(CIRCQ_FIRST(&timeout_todo)); 483 CIRCQ_REMOVE(&to->to_list); 484 485 /* 486 * If due run it or defer execution to the thread, 487 * otherwise insert it into the right bucket. 488 */ 489 delta = to->to_time - ticks; 490 if (delta > 0) { 491 bucket = &BUCKET(delta, to->to_time); 492 CIRCQ_INSERT(&to->to_list, bucket); 493 } else if (to->to_flags & TIMEOUT_NEEDPROCCTX) { 494 CIRCQ_INSERT(&to->to_list, &timeout_proc); 495 needsproc = 1; 496 } else { 497 #ifdef DEBUG 498 if (delta < 0) 499 printf("timeout delayed %d\n", delta); 500 #endif 501 timeout_run(to); 502 } 503 } 504 mtx_leave(&timeout_mutex); 505 506 if (needsproc) 507 wakeup(&timeout_proc); 508 } 509 510 void 511 softclock_create_thread(void *arg) 512 { 513 if (kthread_create(softclock_thread, NULL, NULL, "softclock")) 514 panic("fork softclock"); 515 } 516 517 void 518 softclock_thread(void *arg) 519 { 520 CPU_INFO_ITERATOR cii; 521 struct cpu_info *ci; 522 struct sleep_state sls; 523 struct timeout *to; 524 525 KERNEL_ASSERT_LOCKED(); 526 527 /* Be conservative for the moment */ 528 CPU_INFO_FOREACH(cii, ci) { 529 if (CPU_IS_PRIMARY(ci)) 530 break; 531 } 532 KASSERT(ci != NULL); 533 sched_peg_curproc(ci); 534 535 for (;;) { 536 sleep_setup(&sls, &timeout_proc, PSWP, "bored"); 537 sleep_finish(&sls, CIRCQ_EMPTY(&timeout_proc)); 538 539 mtx_enter(&timeout_mutex); 540 while (!CIRCQ_EMPTY(&timeout_proc)) { 541 to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc)); 542 CIRCQ_REMOVE(&to->to_list); 543 timeout_run(to); 544 } 545 mtx_leave(&timeout_mutex); 546 } 547 } 548 549 #ifndef SMALL_KERNEL 550 void 551 timeout_adjust_ticks(int adj) 552 { 553 struct timeout *to; 554 struct circq *p; 555 int new_ticks, b; 556 557 /* adjusting the monotonic clock backwards would be a Bad Thing */ 558 if (adj <= 0) 559 return; 560 561 mtx_enter(&timeout_mutex); 562 new_ticks = ticks + adj; 563 for (b = 0; b < nitems(timeout_wheel); b++) { 564 p = CIRCQ_FIRST(&timeout_wheel[b]); 565 while (p != &timeout_wheel[b]) { 566 to = timeout_from_circq(p); 567 p = CIRCQ_FIRST(p); 568 569 /* when moving a timeout forward need to reinsert it */ 570 if (to->to_time - ticks < adj) 571 to->to_time = new_ticks; 572 CIRCQ_REMOVE(&to->to_list); 573 CIRCQ_INSERT(&to->to_list, &timeout_todo); 574 } 575 } 576 ticks = new_ticks; 577 mtx_leave(&timeout_mutex); 578 } 579 #endif 580 581 #ifdef DDB 582 void db_show_callout_bucket(struct circq *); 583 584 void 585 db_show_callout_bucket(struct circq *bucket) 586 { 587 struct timeout *to; 588 struct circq *p; 589 db_expr_t offset; 590 char *name; 591 592 for (p = CIRCQ_FIRST(bucket); p != bucket; p = CIRCQ_FIRST(p)) { 593 to = timeout_from_circq(p); 594 db_find_sym_and_offset((db_addr_t)to->to_func, &name, &offset); 595 name = name ? name : "?"; 596 db_printf("%9d %2td/%-4td %p %s\n", to->to_time - ticks, 597 (bucket - timeout_wheel) / WHEELSIZE, 598 bucket - timeout_wheel, to->to_arg, name); 599 } 600 } 601 602 void 603 db_show_callout(db_expr_t addr, int haddr, db_expr_t count, char *modif) 604 { 605 int b; 606 607 db_printf("ticks now: %d\n", ticks); 608 db_printf(" ticks wheel arg func\n"); 609 610 db_show_callout_bucket(&timeout_todo); 611 for (b = 0; b < nitems(timeout_wheel); b++) 612 db_show_callout_bucket(&timeout_wheel[b]); 613 } 614 #endif 615