1 /* 2 * Copyright (c) 2004,2014,2019 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * (c) UNIX System Laboratories, Inc. 38 * All or some portions of this file are derived from material licensed 39 * to the University of California by American Telephone and Telegraph 40 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 41 * the permission of UNIX System Laboratories, Inc. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 1. Redistributions of source code must retain the above copyright 47 * notice, this list of conditions and the following disclaimer. 48 * 2. Redistributions in binary form must reproduce the above copyright 49 * notice, this list of conditions and the following disclaimer in the 50 * documentation and/or other materials provided with the distribution. 51 * 3. Neither the name of the University nor the names of its contributors 52 * may be used to endorse or promote products derived from this software 53 * without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 56 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 57 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 58 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 59 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 60 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 61 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 62 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 63 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 /* 68 * The original callout mechanism was based on the work of Adam M. Costello 69 * and George Varghese, published in a technical report entitled "Redesigning 70 * the BSD Callout and Timer Facilities" and modified slightly for inclusion 71 * in FreeBSD by Justin T. Gibbs. The original work on the data structures 72 * used in this implementation was published by G. Varghese and T. Lauck in 73 * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for 74 * the Efficient Implementation of a Timer Facility" in the Proceedings of 75 * the 11th ACM Annual Symposium on Operating Systems Principles, 76 * Austin, Texas Nov 1987. 77 */ 78 79 #include <sys/param.h> 80 #include <sys/systm.h> 81 #include <sys/spinlock.h> 82 #include <sys/callout.h> 83 #include <sys/kernel.h> 84 #include <sys/interrupt.h> 85 #include <sys/thread.h> 86 #include <sys/sysctl.h> 87 #ifdef CALLOUT_TYPESTABLE 88 #include <sys/typestable.h> 89 #endif 90 #include <vm/vm_extern.h> 91 #include <machine/atomic.h> 92 93 #include <sys/spinlock2.h> 94 #include <sys/thread2.h> 95 #include <sys/mplock2.h> 96 97 TAILQ_HEAD(colist, _callout); 98 struct softclock_pcpu; 99 struct _callout_mag; 100 101 /* 102 * DID_INIT - Sanity check 103 * SYNC - Synchronous waiter, request SYNCDONE and wakeup() 104 * SYNCDONE - Synchronous waiter ackknowlegement 105 * CANCEL_RES - Flags that a cancel/stop prevented a callback 106 * STOP_RES 107 * RESET - Callout_reset request queued 108 * STOP - Callout_stop request queued 109 * INPROG - Softclock_handler thread processing in-progress on callout 110 * SET - Callout is linked to queue (if INPROG not set) 111 * AUTOLOCK - Lockmgr cancelable interlock 112 * MPSAFE - Callout is MPSAFE 113 * CANCEL - callout_cancel requested queued 114 * ACTIVE - active/inactive tracking (see documentation). This is 115 * *NOT* the same as whether a callout is queued or not. 116 */ 117 #define CALLOUT_DID_INIT 0x00000001 /* frontend */ 118 #define CALLOUT_SYNC 0x00000002 /* backend */ 119 #define CALLOUT_SYNCDONE 0x00000004 /* frontend */ 120 #define CALLOUT_CANCEL_RES 0x00000008 /* frontend */ 121 #define CALLOUT_STOP_RES 0x00000010 /* frontend */ 122 #define CALLOUT_RESET 0x00000020 /* backend */ 123 #define CALLOUT_STOP 0x00000040 /* backend */ 124 #define CALLOUT_INPROG 0x00000080 /* backend */ 125 #define CALLOUT_SET 0x00000100 /* backend */ 126 #define CALLOUT_AUTOLOCK 0x00000200 /* both */ 127 #define CALLOUT_MPSAFE 0x00000400 /* both */ 128 #define CALLOUT_CANCEL 0x00000800 /* backend */ 129 #define CALLOUT_ACTIVE 0x00001000 /* frontend */ 130 131 struct wheel { 132 struct spinlock spin; 133 struct colist list; 134 }; 135 136 struct softclock_pcpu { 137 struct wheel *callwheel; 138 struct _callout *running; 139 struct _callout * volatile next; 140 #ifdef CALLOUT_TYPESTABLE 141 struct _callout *quick_obj; 142 #endif 143 int softticks; /* softticks index */ 144 int curticks; /* per-cpu ticks counter */ 145 int isrunning; 146 struct thread thread; 147 }; 148 149 typedef struct softclock_pcpu *softclock_pcpu_t; 150 151 TAILQ_HEAD(maglist, _callout_mag); 152 153 #if 0 154 static int callout_debug = 0; 155 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW, 156 &callout_debug, 0, ""); 157 #endif 158 159 #ifdef CALLOUT_TYPESTABLE 160 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts"); 161 #endif 162 163 static int cwheelsize; 164 static int cwheelmask; 165 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU]; 166 #ifdef CALLOUT_TYPESTABLE 167 static struct typestable_glob callout_tsg; 168 #endif 169 170 static void softclock_handler(void *arg); 171 static void slotimer_callback(void *arg); 172 173 #ifdef CALLOUT_TYPESTABLE 174 /* 175 * typestable callback functions. The init function pre-initializes 176 * the structure in order to allow for reuse without complete 177 * reinitialization (i.e. our spinlock). 178 * 179 * The test function allows us to reject an allocation attempt due 180 * to the object being reassociated out-of-band. 181 */ 182 static 183 void 184 _callout_typestable_init(void *obj) 185 { 186 struct _callout *c = obj; 187 188 spin_init(&c->spin, "_callout"); 189 } 190 191 /* 192 * Object may have been reassociated out-of-band. 193 * 194 * Return 1 on success with the spin-lock held, allowing reallocation. 195 * Return 0 on failure with no side effects, rejecting reallocation. 196 */ 197 static 198 int 199 _callout_typestable_test(void *obj) 200 { 201 struct _callout *c = obj; 202 203 if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) 204 return 0; 205 spin_lock(&c->spin); 206 if (c->flags & (CALLOUT_SET | CALLOUT_INPROG)) { 207 spin_unlock(&c->spin); 208 return 0; 209 } else { 210 return 1; 211 } 212 } 213 214 /* 215 * NOTE: sc might refer to a different cpu. 216 */ 217 static __inline 218 void 219 _callout_typestable_free(softclock_pcpu_t sc, void *obj, int tentitive) 220 { 221 if (tentitive == 0) { 222 obj = atomic_swap_ptr((void *)&sc->quick_obj, obj); 223 if (obj == NULL) 224 return; 225 } 226 typestable_free(&callout_tsg, obj, tentitive); 227 } 228 #endif 229 230 /* 231 * Post-processing helper for a callout executes any pending request. 232 * This routine handles post-processing from the softclock thread and 233 * also handles request processing from the API. 234 * 235 * This routine does not block in any way. 236 * Caller must hold c->spin. 237 * 238 * INPROG - Callback is in-processing / in-progress. 239 * 240 * SET - Assigned to queue or is in-processing. If INPROG is set, 241 * however, the _callout is no longer in the queue. 242 * 243 * RESET - New timeout was installed. 244 * 245 * STOP - Stop requested. 246 * 247 * ACTIVE - Set on callout_reset(), cleared by callout_stop() 248 * or callout_cancel(). Starts out cleared. 249 * 250 * NOTE: Flags can be adjusted without holding c->spin, so atomic ops 251 * must be used at all times. 252 * 253 * NOTE: The passed-in (sc) might refer to another cpu. 254 */ 255 static __inline 256 int 257 _callout_process_spinlocked(struct _callout *c, int fromsoftclock) 258 { 259 struct wheel *wheel; 260 int res = -1; 261 262 /* 263 * If a callback manipulates the callout-in-progress we do 264 * a partial 'completion' of the operation so the operation 265 * can be processed synchronously and tell the softclock_handler 266 * to stop messing with it. 267 */ 268 if (fromsoftclock == 0 && curthread == &c->qsc->thread && 269 c->qsc->running == c) { 270 c->qsc->running = NULL; 271 atomic_clear_int(&c->flags, CALLOUT_SET | 272 CALLOUT_INPROG); 273 } 274 275 /* 276 * Based on source and state 277 */ 278 if (fromsoftclock) { 279 /* 280 * From control thread, INPROG is set, handle pending 281 * request and normal termination. 282 */ 283 #ifdef CALLOUT_TYPESTABLE 284 KASSERT(c->verifier->toc == c, 285 ("callout corrupt: c=%p %s/%d\n", 286 c, c->ident, c->lineno)); 287 #else 288 KASSERT(&c->verifier->toc == c, 289 ("callout corrupt: c=%p %s/%d\n", 290 c, c->ident, c->lineno)); 291 #endif 292 if (c->flags & CALLOUT_CANCEL) { 293 /* 294 * CANCEL overrides everything. 295 * 296 * If a RESET is pending it counts as canceling a 297 * running timer. 298 */ 299 if (c->flags & CALLOUT_RESET) 300 atomic_set_int(&c->verifier->flags, 301 CALLOUT_CANCEL_RES | 302 CALLOUT_STOP_RES); 303 if (c->flags & CALLOUT_SYNC) { 304 atomic_set_int(&c->verifier->flags, 305 CALLOUT_SYNCDONE); 306 wakeup(c->verifier); 307 } 308 atomic_clear_int(&c->flags, CALLOUT_SET | 309 CALLOUT_INPROG | 310 CALLOUT_STOP | 311 CALLOUT_CANCEL | 312 CALLOUT_RESET | 313 CALLOUT_SYNC); 314 res = 0; 315 } else if (c->flags & CALLOUT_RESET) { 316 /* 317 * RESET request pending, requeue appropriately. 318 */ 319 atomic_clear_int(&c->flags, CALLOUT_RESET | 320 CALLOUT_INPROG); 321 atomic_set_int(&c->flags, CALLOUT_SET); 322 c->qsc = c->rsc; 323 c->qarg = c->rarg; 324 c->qfunc = c->rfunc; 325 c->qtick = c->rtick; 326 327 /* 328 * Do not queue to current or past wheel or the 329 * callout will be lost for ages. 330 */ 331 wheel = &c->qsc->callwheel[c->qtick & cwheelmask]; 332 spin_lock(&wheel->spin); 333 while (c->qtick - c->qsc->softticks <= 0) { 334 c->qtick = c->qsc->softticks + 1; 335 spin_unlock(&wheel->spin); 336 wheel = &c->qsc->callwheel[c->qtick & 337 cwheelmask]; 338 spin_lock(&wheel->spin); 339 } 340 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 341 spin_unlock(&wheel->spin); 342 } else { 343 /* 344 * STOP request pending or normal termination. Since 345 * this is from our control thread the callout has 346 * already been removed from the queue. 347 */ 348 if (c->flags & CALLOUT_SYNC) { 349 atomic_set_int(&c->verifier->flags, 350 CALLOUT_SYNCDONE); 351 wakeup(c->verifier); 352 } 353 atomic_clear_int(&c->flags, CALLOUT_SET | 354 CALLOUT_INPROG | 355 CALLOUT_STOP | 356 CALLOUT_SYNC); 357 res = 1; 358 } 359 } else if (c->flags & CALLOUT_SET) { 360 /* 361 * Process request from an API function. qtick and ACTIVE 362 * are stable while we hold c->spin. Checking INPROG requires 363 * holding wheel->spin. 364 * 365 * If INPROG is set the control thread must handle the request 366 * for us. 367 */ 368 softclock_pcpu_t sc; 369 370 sc = c->qsc; 371 372 wheel = &sc->callwheel[c->qtick & cwheelmask]; 373 spin_lock(&wheel->spin); 374 if (c->flags & CALLOUT_INPROG) { 375 /* 376 * API requests are deferred if a callback is in 377 * progress and will be handled after the callback 378 * returns. 379 */ 380 } else if (c->flags & CALLOUT_CANCEL) { 381 /* 382 * CANCEL request overrides everything except INPROG 383 * (for INPROG the CANCEL is handled upon completion). 384 */ 385 if (sc->next == c) 386 sc->next = TAILQ_NEXT(c, entry); 387 TAILQ_REMOVE(&wheel->list, c, entry); 388 atomic_set_int(&c->verifier->flags, CALLOUT_CANCEL_RES | 389 CALLOUT_STOP_RES); 390 if (c->flags & CALLOUT_SYNC) { 391 atomic_set_int(&c->verifier->flags, 392 CALLOUT_SYNCDONE); 393 /* direct from API no need to wakeup() */ 394 /* wakeup(c->verifier); */ 395 } 396 atomic_clear_int(&c->flags, CALLOUT_STOP | 397 CALLOUT_SYNC | 398 CALLOUT_SET); 399 res = 0; 400 } else if (c->flags & CALLOUT_RESET) { 401 /* 402 * RESET request pending, requeue appropriately. 403 * 404 * (ACTIVE is governed by c->spin so we do not have 405 * to clear it prior to releasing wheel->spin). 406 */ 407 if (sc->next == c) 408 sc->next = TAILQ_NEXT(c, entry); 409 TAILQ_REMOVE(&wheel->list, c, entry); 410 spin_unlock(&wheel->spin); 411 412 atomic_clear_int(&c->flags, CALLOUT_RESET); 413 /* remain ACTIVE */ 414 sc = c->rsc; 415 c->qsc = sc; 416 c->qarg = c->rarg; 417 c->qfunc = c->rfunc; 418 c->qtick = c->rtick; 419 420 /* 421 * Do not queue to current or past wheel or the 422 * callout will be lost for ages. 423 */ 424 wheel = &sc->callwheel[c->qtick & cwheelmask]; 425 spin_lock(&wheel->spin); 426 while (c->qtick - sc->softticks <= 0) { 427 c->qtick = sc->softticks + 1; 428 spin_unlock(&wheel->spin); 429 wheel = &sc->callwheel[c->qtick & cwheelmask]; 430 spin_lock(&wheel->spin); 431 } 432 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 433 } else if (c->flags & CALLOUT_STOP) { 434 /* 435 * STOP request 436 */ 437 if (sc->next == c) 438 sc->next = TAILQ_NEXT(c, entry); 439 TAILQ_REMOVE(&wheel->list, c, entry); 440 atomic_set_int(&c->verifier->flags, CALLOUT_STOP_RES); 441 if (c->flags & CALLOUT_SYNC) { 442 atomic_set_int(&c->verifier->flags, 443 CALLOUT_SYNCDONE); 444 /* direct from API no need to wakeup() */ 445 /* wakeup(c->verifier); */ 446 } 447 atomic_clear_int(&c->flags, CALLOUT_STOP | 448 CALLOUT_SYNC | 449 CALLOUT_SET); 450 res = 1; 451 } else { 452 /* 453 * No request pending (someone else processed the 454 * request before we could) 455 */ 456 /* nop */ 457 } 458 spin_unlock(&wheel->spin); 459 } else { 460 /* 461 * Process request from API function. callout is not 462 * active so there's nothing for us to remove. 463 */ 464 KKASSERT((c->flags & CALLOUT_INPROG) == 0); 465 if (c->flags & CALLOUT_CANCEL) { 466 /* 467 * CANCEL request (nothing to cancel) 468 */ 469 if (c->flags & CALLOUT_SYNC) { 470 atomic_set_int(&c->verifier->flags, 471 CALLOUT_SYNCDONE); 472 /* direct from API no need to wakeup() */ 473 /* wakeup(c->verifier); */ 474 } 475 atomic_clear_int(&c->flags, CALLOUT_STOP | 476 CALLOUT_CANCEL | 477 CALLOUT_SYNC); 478 res = 0; 479 } else if (c->flags & CALLOUT_RESET) { 480 /* 481 * RESET request pending, queue appropriately. 482 * Do not queue to currently-processing tick. 483 */ 484 softclock_pcpu_t sc; 485 486 sc = c->rsc; 487 atomic_clear_int(&c->flags, CALLOUT_RESET); 488 atomic_set_int(&c->flags, CALLOUT_SET); 489 c->qsc = sc; 490 c->qarg = c->rarg; 491 c->qfunc = c->rfunc; 492 c->qtick = c->rtick; 493 494 /* 495 * Do not queue to current or past wheel or the 496 * callout will be lost for ages. 497 */ 498 wheel = &sc->callwheel[c->qtick & cwheelmask]; 499 spin_lock(&wheel->spin); 500 while (c->qtick - sc->softticks <= 0) { 501 c->qtick = sc->softticks + 1; 502 spin_unlock(&wheel->spin); 503 wheel = &sc->callwheel[c->qtick & cwheelmask]; 504 spin_lock(&wheel->spin); 505 } 506 TAILQ_INSERT_TAIL(&wheel->list, c, entry); 507 spin_unlock(&wheel->spin); 508 } else if (c->flags & CALLOUT_STOP) { 509 /* 510 * STOP request (nothing to stop) 511 */ 512 if (c->flags & CALLOUT_SYNC) { 513 atomic_set_int(&c->verifier->flags, 514 CALLOUT_SYNCDONE); 515 /* direct from API no need to wakeup() */ 516 /* wakeup(c->verifier); */ 517 } 518 atomic_clear_int(&c->flags, CALLOUT_STOP | 519 CALLOUT_SYNC); 520 res = 1; 521 } else { 522 /* 523 * No request pending (someone else processed the 524 * request before we could) 525 */ 526 /* nop */ 527 } 528 } 529 return res; 530 } 531 532 /* 533 * System init 534 */ 535 static void 536 swi_softclock_setup(void *arg) 537 { 538 int cpu; 539 int i; 540 int target; 541 542 /* 543 * Figure out how large a callwheel we need. It must be a power of 2. 544 * 545 * ncallout is primarily based on available memory, don't explode 546 * the allocations if the system has a lot of cpus. 547 */ 548 target = ncallout / ncpus + 16; 549 550 cwheelsize = 1; 551 while (cwheelsize < target) 552 cwheelsize <<= 1; 553 cwheelmask = cwheelsize - 1; 554 555 #ifdef CALLOUT_TYPESTABLE 556 typestable_init_glob(&callout_tsg, M_CALLOUT, 557 sizeof(struct _callout), 558 _callout_typestable_test, 559 _callout_typestable_init); 560 #endif 561 562 /* 563 * Initialize per-cpu data structures. 564 */ 565 for (cpu = 0; cpu < ncpus; ++cpu) { 566 softclock_pcpu_t sc; 567 int wheel_sz; 568 569 sc = (void *)kmem_alloc3(&kernel_map, sizeof(*sc), 570 VM_SUBSYS_GD, KM_CPU(cpu)); 571 memset(sc, 0, sizeof(*sc)); 572 softclock_pcpu_ary[cpu] = sc; 573 574 wheel_sz = sizeof(*sc->callwheel) * cwheelsize; 575 sc->callwheel = (void *)kmem_alloc3(&kernel_map, wheel_sz, 576 VM_SUBSYS_GD, KM_CPU(cpu)); 577 memset(sc->callwheel, 0, wheel_sz); 578 for (i = 0; i < cwheelsize; ++i) { 579 spin_init(&sc->callwheel[i].spin, "wheel"); 580 TAILQ_INIT(&sc->callwheel[i].list); 581 } 582 583 /* 584 * Mark the softclock handler as being an interrupt thread 585 * even though it really isn't, but do not allow it to 586 * preempt other threads (do not assign td_preemptable). 587 * 588 * Kernel code now assumes that callouts do not preempt 589 * the cpu they were scheduled on. 590 */ 591 lwkt_create(softclock_handler, sc, NULL, &sc->thread, 592 TDF_NOSTART | TDF_INTTHREAD, 593 cpu, "softclock %d", cpu); 594 } 595 } 596 597 /* 598 * Must occur after ncpus has been initialized. 599 */ 600 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND, 601 swi_softclock_setup, NULL); 602 603 /* 604 * This routine is called from the hardclock() (basically a FASTint/IPI) on 605 * each cpu in the system. sc->curticks is this cpu's notion of the timebase. 606 * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'! sc->softticks is where 607 * the callwheel is currently indexed. 608 * 609 * sc->softticks is adjusted by either this routine or our helper thread 610 * depending on whether the helper thread is running or not. 611 * 612 * sc->curticks and sc->softticks are adjusted using atomic ops in order 613 * to ensure that remote cpu callout installation does not race the thread. 614 */ 615 void 616 hardclock_softtick(globaldata_t gd) 617 { 618 softclock_pcpu_t sc; 619 struct wheel *wheel; 620 621 sc = softclock_pcpu_ary[gd->gd_cpuid]; 622 atomic_add_int(&sc->curticks, 1); 623 if (sc->isrunning) 624 return; 625 if (sc->softticks == sc->curticks) { 626 /* 627 * In sync, only wakeup the thread if there is something to 628 * do. 629 */ 630 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 631 spin_lock(&wheel->spin); 632 if (TAILQ_FIRST(&wheel->list)) { 633 sc->isrunning = 1; 634 spin_unlock(&wheel->spin); 635 lwkt_schedule(&sc->thread); 636 } else { 637 atomic_add_int(&sc->softticks, 1); 638 spin_unlock(&wheel->spin); 639 } 640 } else { 641 /* 642 * out of sync, wakeup the thread unconditionally so it can 643 * catch up. 644 */ 645 sc->isrunning = 1; 646 lwkt_schedule(&sc->thread); 647 } 648 } 649 650 /* 651 * This procedure is the main loop of our per-cpu helper thread. The 652 * sc->isrunning flag prevents us from racing hardclock_softtick(). 653 * 654 * The thread starts with the MP lock released and not in a critical 655 * section. The loop itself is MP safe while individual callbacks 656 * may or may not be, so we obtain or release the MP lock as appropriate. 657 */ 658 static void 659 softclock_handler(void *arg) 660 { 661 softclock_pcpu_t sc; 662 struct _callout *c; 663 struct wheel *wheel; 664 struct callout slotimer; 665 int mpsafe = 1; 666 667 /* 668 * Setup pcpu slow clocks which we want to run from the callout 669 * thread. 670 */ 671 callout_init_mp(&slotimer); 672 callout_reset(&slotimer, hz * 10, slotimer_callback, &slotimer); 673 674 /* 675 * Run the callout thread at the same priority as other kernel 676 * threads so it can be round-robined. 677 */ 678 /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/ 679 680 sc = arg; 681 loop: 682 while (sc->softticks != (int)(sc->curticks + 1)) { 683 wheel = &sc->callwheel[sc->softticks & cwheelmask]; 684 685 spin_lock(&wheel->spin); 686 sc->next = TAILQ_FIRST(&wheel->list); 687 while ((c = sc->next) != NULL) { 688 int error; 689 int res; 690 691 /* 692 * Match callouts for this tick. The wheel spinlock 693 * is sufficient to set INPROG. Once set, other 694 * threads can make only limited changes to (c) 695 */ 696 sc->next = TAILQ_NEXT(c, entry); 697 if (c->qtick != sc->softticks) 698 continue; 699 TAILQ_REMOVE(&wheel->list, c, entry); 700 atomic_set_int(&c->flags, CALLOUT_INPROG); 701 sc->running = c; 702 spin_unlock(&wheel->spin); 703 704 /* 705 * legacy mplock support 706 */ 707 if (c->flags & CALLOUT_MPSAFE) { 708 if (mpsafe == 0) { 709 mpsafe = 1; 710 rel_mplock(); 711 } 712 } else { 713 if (mpsafe) { 714 mpsafe = 0; 715 get_mplock(); 716 } 717 } 718 719 /* 720 * Execute function (protected by INPROG) 721 */ 722 if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) { 723 /* 724 * Raced a stop or cancel request, do 725 * not execute. The processing code 726 * thinks its a normal completion so 727 * flag the fact that cancel/stop actually 728 * prevented a callout here. 729 */ 730 if (c->flags & CALLOUT_CANCEL) { 731 atomic_set_int(&c->verifier->flags, 732 CALLOUT_CANCEL_RES | 733 CALLOUT_STOP_RES); 734 } else if (c->flags & CALLOUT_STOP) { 735 atomic_set_int(&c->verifier->flags, 736 CALLOUT_STOP_RES); 737 } 738 } else if (c->flags & CALLOUT_AUTOLOCK) { 739 /* 740 * Interlocked cancelable call. If the 741 * lock gets canceled we have to flag the 742 * fact that the cancel/stop actually 743 * prevented the callout here. 744 */ 745 error = lockmgr(c->lk, LK_EXCLUSIVE | 746 LK_CANCELABLE); 747 if (error == 0) { 748 c->qfunc(c->qarg); 749 lockmgr(c->lk, LK_RELEASE); 750 } else if (c->flags & CALLOUT_CANCEL) { 751 atomic_set_int(&c->verifier->flags, 752 CALLOUT_CANCEL_RES | 753 CALLOUT_STOP_RES); 754 } else if (c->flags & CALLOUT_STOP) { 755 atomic_set_int(&c->verifier->flags, 756 CALLOUT_STOP_RES); 757 } 758 } else { 759 /* 760 * Normal call 761 */ 762 c->qfunc(c->qarg); 763 } 764 765 if (sc->running == c) { 766 /* 767 * We are still INPROG so (c) remains valid, but 768 * the callout is now governed by its internal 769 * spin-lock. 770 */ 771 spin_lock(&c->spin); 772 res = _callout_process_spinlocked(c, 1); 773 spin_unlock(&c->spin); 774 #ifdef CALLOUT_TYPESTABLE 775 if (res >= 0) 776 _callout_typestable_free(sc, c, res); 777 #endif 778 } 779 spin_lock(&wheel->spin); 780 } 781 sc->running = NULL; 782 spin_unlock(&wheel->spin); 783 atomic_add_int(&sc->softticks, 1); 784 } 785 786 /* 787 * Don't leave us holding the MP lock when we deschedule ourselves. 788 */ 789 if (mpsafe == 0) { 790 mpsafe = 1; 791 rel_mplock(); 792 } 793 794 /* 795 * Recheck in critical section to interlock against hardlock 796 */ 797 crit_enter(); 798 if (sc->softticks == (int)(sc->curticks + 1)) { 799 sc->isrunning = 0; 800 lwkt_deschedule_self(&sc->thread); /* == curthread */ 801 lwkt_switch(); 802 } 803 crit_exit(); 804 goto loop; 805 /* NOT REACHED */ 806 } 807 808 /* 809 * A very slow system cleanup timer (10 second interval), 810 * per-cpu. 811 */ 812 void 813 slotimer_callback(void *arg) 814 { 815 struct callout *c = arg; 816 817 slab_cleanup(); 818 callout_reset(c, hz * 10, slotimer_callback, c); 819 } 820 821 /* 822 * API FUNCTIONS 823 */ 824 825 /* 826 * Prepare a callout structure for use by callout_reset() and/or 827 * callout_stop(). 828 * 829 * The MP version of this routine requires that the callback 830 * function installed by callout_reset() be MP safe. 831 * 832 * The LK version of this routine is also MPsafe and will automatically 833 * acquire the specified lock for the duration of the function call, 834 * and release it after the function returns. In addition, when autolocking 835 * is used, callout_stop() becomes synchronous if the caller owns the lock. 836 * callout_reset(), callout_stop(), and callout_cancel() will block 837 * normally instead of spinning when a cpu race occurs. Lock cancelation 838 * is used to avoid deadlocks against the callout ring dispatch. 839 * 840 * The init functions can be called from any cpu and do not have to be 841 * called from the cpu that the timer will eventually run on. 842 */ 843 static __inline void 844 _callout_setup(struct callout *cc, int flags CALLOUT_DEBUG_ARGS) 845 { 846 bzero(cc, sizeof(*cc)); 847 cc->flags = flags; /* frontend flags */ 848 #ifdef CALLOUT_DEBUG 849 #ifdef CALLOUT_TYPESTABLE 850 cc->ident = ident; 851 cc->lineno = lineno; 852 #else 853 cc->toc.verifier = cc; /* corruption detector */ 854 cc->toc.ident = ident; 855 cc->toc.lineno = lineno; 856 cc->toc.flags = flags; /* backend flags */ 857 #endif 858 #endif 859 } 860 861 /* 862 * Associate an internal _callout with the external callout and 863 * verify that the type-stable structure is still applicable (inactive 864 * type-stable _callouts might have been reused for a different callout). 865 * If not, a new internal structure will be allocated. 866 * 867 * Returns the _callout already spin-locked. 868 */ 869 static __inline 870 struct _callout * 871 _callout_gettoc(struct callout *cc) 872 { 873 struct _callout *c; 874 #ifdef CALLOUT_TYPESTABLE 875 softclock_pcpu_t sc; 876 877 KKASSERT(cc->flags & CALLOUT_DID_INIT); 878 for (;;) { 879 c = cc->toc; 880 cpu_ccfence(); 881 if (c == NULL) { 882 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 883 c = atomic_swap_ptr((void *)&sc->quick_obj, NULL); 884 if (c == NULL || _callout_typestable_test(c) == 0) 885 c = typestable_alloc(&callout_tsg); 886 /* returns spin-locked */ 887 c->verifier = cc; 888 c->flags = cc->flags; 889 c->lk = cc->lk; 890 c->ident = cc->ident; 891 c->lineno = cc->lineno; 892 if (atomic_cmpset_ptr(&cc->toc, NULL, c)) { 893 break; 894 } 895 c->verifier = NULL; 896 spin_unlock(&c->spin); 897 _callout_typestable_free(sc, c, 0); 898 } else { 899 spin_lock(&c->spin); 900 if (c->verifier == cc) 901 break; 902 spin_unlock(&c->spin); 903 /* ok if atomic op fails */ 904 (void)atomic_cmpset_ptr(&cc->toc, c, NULL); 905 } 906 } 907 #else 908 c = &cc->toc; 909 spin_lock(&c->spin); 910 #endif 911 /* returns with spin-lock held */ 912 return c; 913 } 914 915 /* 916 * Macrod in sys/callout.h for debugging 917 * 918 * WARNING! tsleep() assumes this will not block 919 */ 920 void 921 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS) 922 { 923 _callout_setup(cc, CALLOUT_DID_INIT 924 CALLOUT_DEBUG_PASSTHRU); 925 } 926 927 void 928 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS) 929 { 930 _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE 931 CALLOUT_DEBUG_PASSTHRU); 932 } 933 934 void 935 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS) 936 { 937 _callout_setup(cc, CALLOUT_DID_INIT | CALLOUT_MPSAFE | 938 CALLOUT_AUTOLOCK 939 CALLOUT_DEBUG_PASSTHRU); 940 #ifdef CALLOUT_TYPESTABLE 941 cc->lk = lk; 942 #else 943 cc->toc.lk = lk; 944 #endif 945 } 946 947 /* 948 * Start or restart a timeout. New timeouts can be installed while the 949 * current one is running. 950 * 951 * Start or restart a timeout. Installs the callout structure on the 952 * callwheel of the current cpu. Callers may legally pass any value, even 953 * if 0 or negative, but since the sc->curticks index may have already 954 * been processed a minimum timeout of 1 tick will be enforced. 955 * 956 * This function will not deadlock against a running call. 957 * 958 * WARNING! tsleep() assumes this will not block 959 */ 960 void 961 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg) 962 { 963 softclock_pcpu_t sc; 964 struct _callout *c; 965 int res; 966 967 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 968 c = _callout_gettoc(cc); 969 atomic_set_int(&c->flags, CALLOUT_RESET); 970 971 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 972 c->rsc = sc; 973 c->rtick = sc->curticks + to_ticks; 974 c->rfunc = ftn; 975 c->rarg = arg; 976 #ifdef CALLOUT_TYPESTABLE 977 cc->arg = arg; /* only used by callout_arg() */ 978 #endif 979 res = _callout_process_spinlocked(c, 0); 980 spin_unlock(&c->spin); 981 #ifdef CALLOUT_TYPESTABLE 982 if (res >= 0) 983 _callout_typestable_free(sc, c, res); 984 #endif 985 } 986 987 /* 988 * Same as callout_reset() but the timeout will run on a particular cpu. 989 */ 990 void 991 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *), 992 void *arg, int cpuid) 993 { 994 softclock_pcpu_t sc; 995 struct _callout *c; 996 globaldata_t gd; 997 int res; 998 999 gd = globaldata_find(cpuid); 1000 atomic_set_int(&cc->flags, CALLOUT_ACTIVE); 1001 c = _callout_gettoc(cc); 1002 atomic_set_int(&c->flags, CALLOUT_RESET); 1003 atomic_clear_int(&c->flags, CALLOUT_STOP); 1004 1005 sc = softclock_pcpu_ary[gd->gd_cpuid]; 1006 c->rsc = sc; 1007 c->rtick = sc->curticks + to_ticks; 1008 c->rfunc = ftn; 1009 c->rarg = arg; 1010 #ifdef CALLOUT_TYPESTABLE 1011 cc->arg = arg; /* only used by callout_arg() */ 1012 #endif 1013 res = _callout_process_spinlocked(c, 0); 1014 spin_unlock(&c->spin); 1015 #ifdef CALLOUT_TYPESTABLE 1016 if (res >= 0) 1017 _callout_typestable_free(sc, c, res); 1018 #endif 1019 } 1020 1021 static __inline 1022 void 1023 _callout_cancel_or_stop(struct callout *cc, uint32_t flags) 1024 { 1025 struct _callout *c; 1026 softclock_pcpu_t sc; 1027 uint32_t oflags; 1028 int res; 1029 1030 #ifdef CALLOUT_TYPESTABLE 1031 if (cc->toc == NULL || cc->toc->verifier != cc) 1032 return; 1033 #else 1034 KKASSERT(cc->toc.verifier == cc); 1035 #endif 1036 /* 1037 * Setup for synchronous 1038 */ 1039 atomic_clear_int(&cc->flags, CALLOUT_SYNCDONE | CALLOUT_ACTIVE); 1040 c = _callout_gettoc(cc); 1041 oflags = c->flags; 1042 atomic_set_int(&c->flags, flags | CALLOUT_SYNC); 1043 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 1044 res = _callout_process_spinlocked(c, 0); 1045 spin_unlock(&c->spin); 1046 #ifdef CALLOUT_TYPESTABLE 1047 if (res >= 0) 1048 _callout_typestable_free(sc, c, res); 1049 #endif 1050 1051 /* 1052 * Wait for stop or completion. NOTE: The backend only 1053 * runs atomic ops on the frontend cc->flags for the sync 1054 * operation. 1055 * 1056 * WARNING! (c) can go stale now, so do not use (c) after this 1057 * point. 1058 */ 1059 flags = cc->flags; 1060 if ((flags & CALLOUT_SYNCDONE) == 0) { 1061 #ifdef CALLOUT_TYPESTABLE 1062 if (cc->flags & CALLOUT_AUTOLOCK) 1063 lockmgr(cc->lk, LK_CANCEL_BEG); 1064 #else 1065 if (cc->flags & CALLOUT_AUTOLOCK) 1066 lockmgr(c->lk, LK_CANCEL_BEG); 1067 #endif 1068 while ((flags & CALLOUT_SYNCDONE) == 0) { 1069 tsleep_interlock(cc, 0); 1070 if (atomic_cmpset_int(&cc->flags, 1071 flags | CALLOUT_SYNCDONE, 1072 flags | CALLOUT_SYNCDONE)) { 1073 break; 1074 } 1075 tsleep(cc, PINTERLOCKED, "costp", 0); 1076 flags = cc->flags; /* recheck after sleep */ 1077 cpu_ccfence(); 1078 } 1079 #ifdef CALLOUT_TYPESTABLE 1080 if (cc->flags & CALLOUT_AUTOLOCK) 1081 lockmgr(cc->lk, LK_CANCEL_END); 1082 #else 1083 if (cc->flags & CALLOUT_AUTOLOCK) 1084 lockmgr(c->lk, LK_CANCEL_END); 1085 #endif 1086 } 1087 1088 /* 1089 * If CALLOUT_SYNC was already set before we began, multiple 1090 * threads may have been doing a synchronous wait. This can 1091 * cause the processing code to optimize-out the wakeup(). 1092 * Make sure the wakeup() is issued. 1093 */ 1094 if (oflags & CALLOUT_SYNC) 1095 wakeup(c->verifier); 1096 } 1097 1098 /* 1099 * This is a synchronous STOP which cancels the callout. If AUTOLOCK 1100 * then a CANCEL will be issued to the lock holder. Unlike STOP, the 1101 * cancel function prevents any new callout_reset()s from being issued 1102 * in addition to canceling the lock. The lock will also be deactivated. 1103 * 1104 * Returns 0 if the callout was not active (or was active and completed, 1105 * but didn't try to start a new timeout). 1106 * Returns 1 if the cancel is responsible for stopping the callout. 1107 */ 1108 int 1109 callout_cancel(struct callout *cc) 1110 { 1111 atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); 1112 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1113 1114 return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); 1115 } 1116 1117 /* 1118 * Currently the same as callout_cancel. Ultimately we may wish the 1119 * drain function to allow a pending callout to proceed, but for now 1120 * we will attempt to to cancel it. 1121 * 1122 * Returns 0 if the callout was not active (or was active and completed, 1123 * but didn't try to start a new timeout). 1124 * Returns 1 if the drain is responsible for stopping the callout. 1125 */ 1126 int 1127 callout_drain(struct callout *cc) 1128 { 1129 atomic_clear_int(&cc->flags, CALLOUT_CANCEL_RES); 1130 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1131 1132 return ((cc->flags & CALLOUT_CANCEL_RES) ? 1 : 0); 1133 } 1134 1135 /* 1136 * Stops a callout if it is pending or queued, does not block. 1137 * This function does not interlock against a callout that is in-progress. 1138 * 1139 * Returns whether the STOP operation was responsible for removing a 1140 * queued or pending callout. 1141 */ 1142 int 1143 callout_stop_async(struct callout *cc) 1144 { 1145 softclock_pcpu_t sc; 1146 struct _callout *c; 1147 uint32_t flags; 1148 int res; 1149 1150 atomic_clear_int(&cc->flags, CALLOUT_STOP_RES | CALLOUT_ACTIVE); 1151 #ifdef CALLOUT_TYPESTABLE 1152 if (cc->toc == NULL || cc->toc->verifier != cc) 1153 return 0; 1154 #else 1155 KKASSERT(cc->toc.verifier == cc); 1156 #endif 1157 c = _callout_gettoc(cc); 1158 atomic_set_int(&c->flags, CALLOUT_STOP); 1159 atomic_clear_int(&c->flags, CALLOUT_RESET); 1160 sc = softclock_pcpu_ary[mycpu->gd_cpuid]; 1161 res = _callout_process_spinlocked(c, 0); 1162 flags = cc->flags; 1163 spin_unlock(&c->spin); 1164 #ifdef CALLOUT_TYPESTABLE 1165 if (res >= 0) 1166 _callout_typestable_free(sc, c, res); 1167 #endif 1168 1169 return ((flags & CALLOUT_STOP_RES) ? 1 : 0); 1170 } 1171 1172 /* 1173 * Callout deactivate merely clears the CALLOUT_ACTIVE bit 1174 * Stops a callout if it is pending or queued, does not block. 1175 * This function does not interlock against a callout that is in-progress. 1176 */ 1177 void 1178 callout_deactivate(struct callout *cc) 1179 { 1180 atomic_clear_int(&cc->flags, CALLOUT_ACTIVE); 1181 } 1182 1183 /* 1184 * lock-aided callouts are STOPped synchronously using STOP semantics 1185 * (meaning that another thread can start the callout again before we 1186 * return). 1187 * 1188 * non-lock-aided callouts 1189 * 1190 * Stops a callout if it is pending or queued, does not block. 1191 * This function does not interlock against a callout that is in-progress. 1192 */ 1193 int 1194 callout_stop(struct callout *cc) 1195 { 1196 if (cc->flags & CALLOUT_AUTOLOCK) { 1197 atomic_clear_int(&cc->flags, CALLOUT_STOP_RES); 1198 _callout_cancel_or_stop(cc, CALLOUT_STOP); 1199 return ((cc->flags & CALLOUT_STOP_RES) ? 1 : 0); 1200 } else { 1201 return callout_stop_async(cc); 1202 } 1203 } 1204 1205 /* 1206 * Terminates a callout by canceling operations and then clears the 1207 * INIT bit. Upon return, the callout structure must not be used. 1208 */ 1209 void 1210 callout_terminate(struct callout *cc) 1211 { 1212 _callout_cancel_or_stop(cc, CALLOUT_CANCEL); 1213 atomic_clear_int(&cc->flags, CALLOUT_DID_INIT); 1214 #ifdef CALLOUT_TYPESTABLE 1215 atomic_swap_ptr((void *)&cc->toc, NULL); 1216 #else 1217 cc->toc.verifier = NULL; 1218 #endif 1219 } 1220 1221 /* 1222 * Returns whether a callout is queued and the time has not yet 1223 * arrived (the callout is not yet in-progress). 1224 */ 1225 int 1226 callout_pending(struct callout *cc) 1227 { 1228 struct _callout *c; 1229 int res = 0; 1230 1231 /* 1232 * Don't instantiate toc to test pending 1233 */ 1234 #ifdef CALLOUT_TYPESTABLE 1235 if ((c = cc->toc) != NULL) { 1236 #else 1237 c = &cc->toc; 1238 KKASSERT(c->verifier == cc); 1239 { 1240 #endif 1241 spin_lock(&c->spin); 1242 if (c->verifier == cc) { 1243 res = ((c->flags & (CALLOUT_SET|CALLOUT_INPROG)) == 1244 CALLOUT_SET); 1245 } 1246 spin_unlock(&c->spin); 1247 } 1248 return res; 1249 } 1250 1251 /* 1252 * Returns whether a callout is active or not. A callout is active when 1253 * a timeout is set and remains active upon normal termination, even if 1254 * it does not issue a new timeout. A callout is inactive if a timeout has 1255 * never been set or if the callout has been stopped or canceled. The next 1256 * timeout that is set will re-set the active state. 1257 */ 1258 int 1259 callout_active(struct callout *cc) 1260 { 1261 return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0); 1262 } 1263