1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t noop_prepare; 58 static dsched_teardown_t noop_teardown; 59 static dsched_cancel_t noop_cancel; 60 static dsched_queue_t noop_queue; 61 62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 63 64 static int dsched_inited = 0; 65 static int default_set = 0; 66 67 struct lock dsched_lock; 68 static int dsched_debug_enable = 0; 69 70 struct dsched_stats dsched_stats; 71 72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 74 struct objcache_malloc_args dsched_thread_io_malloc_args = { 75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 78 79 static struct objcache *dsched_diskctx_cache; 80 static struct objcache *dsched_tdctx_cache; 81 static struct objcache *dsched_tdio_cache; 82 83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 85 86 struct lock dsched_tdctx_lock; 87 88 static struct dsched_policy_head dsched_policy_list = 89 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 90 91 static struct dsched_policy dsched_noop_policy = { 92 .name = "noop", 93 94 .prepare = noop_prepare, 95 .teardown = noop_teardown, 96 .cancel_all = noop_cancel, 97 .bio_queue = noop_queue 98 }; 99 100 static struct dsched_policy *default_policy = &dsched_noop_policy; 101 102 /* 103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 104 * using kvprintf 105 */ 106 int 107 dsched_debug(int level, char *fmt, ...) 108 { 109 __va_list ap; 110 111 __va_start(ap, fmt); 112 if (level <= dsched_debug_enable) 113 kvprintf(fmt, ap); 114 __va_end(ap); 115 116 return 0; 117 } 118 119 /* 120 * Called on disk_create() 121 * tries to read which policy to use from loader.conf, if there's 122 * none specified, the default policy is used. 123 */ 124 void 125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 126 { 127 char tunable_key[SPECNAMELEN + 48]; 128 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 129 char *ptr; 130 struct dsched_policy *policy = NULL; 131 132 /* Also look for serno stuff? */ 133 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 134 lockmgr(&dsched_lock, LK_EXCLUSIVE); 135 136 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 137 head_name, unit); 138 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 139 sizeof(sched_policy)) != 0) { 140 policy = dsched_find_policy(sched_policy); 141 } 142 143 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 144 head_name); 145 for (ptr = tunable_key; *ptr; ptr++) { 146 if (*ptr == '/') 147 *ptr = '-'; 148 } 149 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 150 sizeof(sched_policy)) != 0)) { 151 policy = dsched_find_policy(sched_policy); 152 } 153 154 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 155 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 156 sizeof(sched_policy)) != 0)) { 157 policy = dsched_find_policy(sched_policy); 158 } 159 160 if (!policy) { 161 if (!default_set) { 162 dsched_debug(0, "No policy for %s%d specified, " 163 "or policy not found\n", head_name, unit); 164 } 165 dsched_set_policy(dp, default_policy); 166 } else { 167 dsched_set_policy(dp, policy); 168 } 169 170 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 171 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 172 else 173 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 174 for (ptr = tunable_key; *ptr; ptr++) { 175 if (*ptr == '/') 176 *ptr = '-'; 177 } 178 dsched_sysctl_add_disk( 179 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 180 tunable_key); 181 182 lockmgr(&dsched_lock, LK_RELEASE); 183 } 184 185 /* 186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 187 * there's any policy associated with the serial number of the device. 188 */ 189 void 190 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 191 { 192 char tunable_key[SPECNAMELEN + 48]; 193 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 194 struct dsched_policy *policy = NULL; 195 196 if (info->d_serialno == NULL) 197 return; 198 199 lockmgr(&dsched_lock, LK_EXCLUSIVE); 200 201 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 202 info->d_serialno); 203 204 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 205 sizeof(sched_policy)) != 0)) { 206 policy = dsched_find_policy(sched_policy); 207 } 208 209 if (policy) { 210 dsched_switch(dp, policy); 211 } 212 213 dsched_sysctl_add_disk( 214 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 215 info->d_serialno); 216 217 lockmgr(&dsched_lock, LK_RELEASE); 218 } 219 220 /* 221 * Called on disk_destroy() 222 * shuts down the scheduler core and cancels all remaining bios 223 */ 224 void 225 dsched_disk_destroy_callback(struct disk *dp) 226 { 227 struct dsched_policy *old_policy; 228 struct dsched_disk_ctx *diskctx; 229 230 lockmgr(&dsched_lock, LK_EXCLUSIVE); 231 232 diskctx = dsched_get_disk_priv(dp); 233 234 old_policy = dp->d_sched_policy; 235 dp->d_sched_policy = &dsched_noop_policy; 236 old_policy->cancel_all(dsched_get_disk_priv(dp)); 237 old_policy->teardown(dsched_get_disk_priv(dp)); 238 239 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 240 sysctl_ctx_free(&diskctx->sysctl_ctx); 241 242 policy_destroy(dp); 243 atomic_subtract_int(&old_policy->ref_count, 1); 244 KKASSERT(old_policy->ref_count >= 0); 245 246 lockmgr(&dsched_lock, LK_RELEASE); 247 } 248 249 250 void 251 dsched_queue(struct disk *dp, struct bio *bio) 252 { 253 struct dsched_thread_ctx *tdctx; 254 struct dsched_thread_io *tdio; 255 struct dsched_disk_ctx *diskctx; 256 257 int found = 0, error = 0; 258 259 tdctx = dsched_get_buf_priv(bio->bio_buf); 260 if (tdctx == NULL) { 261 /* We don't handle this case, let dsched dispatch */ 262 atomic_add_int(&dsched_stats.no_tdctx, 1); 263 dsched_strategy_raw(dp, bio); 264 return; 265 } 266 267 DSCHED_THREAD_CTX_LOCK(tdctx); 268 269 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 270 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 271 if (tdio->dp == dp) { 272 dsched_thread_io_ref(tdio); 273 found = 1; 274 break; 275 } 276 } 277 278 DSCHED_THREAD_CTX_UNLOCK(tdctx); 279 dsched_clr_buf_priv(bio->bio_buf); 280 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 281 282 KKASSERT(found == 1); 283 diskctx = dsched_get_disk_priv(dp); 284 dsched_disk_ctx_ref(diskctx); 285 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 286 287 if (error) { 288 dsched_strategy_raw(dp, bio); 289 } 290 dsched_disk_ctx_unref(diskctx); 291 dsched_thread_io_unref(tdio); 292 } 293 294 295 /* 296 * Called from each module_init or module_attach of each policy 297 * registers the policy in the local policy list. 298 */ 299 int 300 dsched_register(struct dsched_policy *d_policy) 301 { 302 struct dsched_policy *policy; 303 int error = 0; 304 305 lockmgr(&dsched_lock, LK_EXCLUSIVE); 306 307 policy = dsched_find_policy(d_policy->name); 308 309 if (!policy) { 310 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 311 atomic_add_int(&d_policy->ref_count, 1); 312 } else { 313 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 314 d_policy->name); 315 error = EEXIST; 316 } 317 318 lockmgr(&dsched_lock, LK_RELEASE); 319 return error; 320 } 321 322 /* 323 * Called from each module_detach of each policy 324 * unregisters the policy 325 */ 326 int 327 dsched_unregister(struct dsched_policy *d_policy) 328 { 329 struct dsched_policy *policy; 330 331 lockmgr(&dsched_lock, LK_EXCLUSIVE); 332 policy = dsched_find_policy(d_policy->name); 333 334 if (policy) { 335 if (policy->ref_count > 1) { 336 lockmgr(&dsched_lock, LK_RELEASE); 337 return EBUSY; 338 } 339 TAILQ_REMOVE(&dsched_policy_list, policy, link); 340 atomic_subtract_int(&policy->ref_count, 1); 341 KKASSERT(policy->ref_count == 0); 342 } 343 lockmgr(&dsched_lock, LK_RELEASE); 344 return 0; 345 } 346 347 348 /* 349 * switches the policy by first removing the old one and then 350 * enabling the new one. 351 */ 352 int 353 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 354 { 355 struct dsched_policy *old_policy; 356 357 /* If we are asked to set the same policy, do nothing */ 358 if (dp->d_sched_policy == new_policy) 359 return 0; 360 361 /* lock everything down, diskwise */ 362 lockmgr(&dsched_lock, LK_EXCLUSIVE); 363 old_policy = dp->d_sched_policy; 364 365 atomic_subtract_int(&old_policy->ref_count, 1); 366 KKASSERT(old_policy->ref_count >= 0); 367 368 dp->d_sched_policy = &dsched_noop_policy; 369 old_policy->teardown(dsched_get_disk_priv(dp)); 370 policy_destroy(dp); 371 372 /* Bring everything back to life */ 373 dsched_set_policy(dp, new_policy); 374 lockmgr(&dsched_lock, LK_RELEASE); 375 return 0; 376 } 377 378 379 /* 380 * Loads a given policy and attaches it to the specified disk. 381 * Also initializes the core for the policy 382 */ 383 void 384 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 385 { 386 int locked = 0; 387 388 /* Check if it is locked already. if not, we acquire the devfs lock */ 389 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 390 lockmgr(&dsched_lock, LK_EXCLUSIVE); 391 locked = 1; 392 } 393 394 policy_new(dp, new_policy); 395 new_policy->prepare(dsched_get_disk_priv(dp)); 396 dp->d_sched_policy = new_policy; 397 atomic_add_int(&new_policy->ref_count, 1); 398 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 399 new_policy->name); 400 401 /* If we acquired the lock, we also get rid of it */ 402 if (locked) 403 lockmgr(&dsched_lock, LK_RELEASE); 404 } 405 406 struct dsched_policy* 407 dsched_find_policy(char *search) 408 { 409 struct dsched_policy *policy; 410 struct dsched_policy *policy_found = NULL; 411 int locked = 0; 412 413 /* Check if it is locked already. if not, we acquire the devfs lock */ 414 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 415 lockmgr(&dsched_lock, LK_EXCLUSIVE); 416 locked = 1; 417 } 418 419 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 420 if (!strcmp(policy->name, search)) { 421 policy_found = policy; 422 break; 423 } 424 } 425 426 /* If we acquired the lock, we also get rid of it */ 427 if (locked) 428 lockmgr(&dsched_lock, LK_RELEASE); 429 430 return policy_found; 431 } 432 433 struct disk* 434 dsched_find_disk(char *search) 435 { 436 struct disk *dp_found = NULL; 437 struct disk *dp = NULL; 438 439 while((dp = disk_enumerate(dp))) { 440 if (!strcmp(dp->d_cdev->si_name, search)) { 441 dp_found = dp; 442 break; 443 } 444 } 445 446 return dp_found; 447 } 448 449 struct disk* 450 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 451 { 452 while ((dp = disk_enumerate(dp))) { 453 if (dp->d_sched_policy == policy) 454 return dp; 455 } 456 457 return NULL; 458 } 459 460 struct dsched_policy * 461 dsched_policy_enumerate(struct dsched_policy *pol) 462 { 463 if (!pol) 464 return (TAILQ_FIRST(&dsched_policy_list)); 465 else 466 return (TAILQ_NEXT(pol, link)); 467 } 468 469 void 470 dsched_cancel_bio(struct bio *bp) 471 { 472 bp->bio_buf->b_error = ENXIO; 473 bp->bio_buf->b_flags |= B_ERROR; 474 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 475 476 biodone(bp); 477 } 478 479 void 480 dsched_strategy_raw(struct disk *dp, struct bio *bp) 481 { 482 /* 483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 484 * to avoid panics 485 */ 486 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 487 if(bp->bio_track != NULL) { 488 dsched_debug(LOG_INFO, 489 "dsched_strategy_raw sees non-NULL bio_track!! " 490 "bio: %p\n", bp); 491 bp->bio_track = NULL; 492 } 493 dev_dstrategy(dp->d_rawdev, bp); 494 } 495 496 void 497 dsched_strategy_sync(struct disk *dp, struct bio *bio) 498 { 499 struct buf *bp, *nbp; 500 struct bio *nbio; 501 502 bp = bio->bio_buf; 503 504 nbp = getpbuf(NULL); 505 nbio = &nbp->b_bio1; 506 507 nbp->b_cmd = bp->b_cmd; 508 nbp->b_bufsize = bp->b_bufsize; 509 nbp->b_runningbufspace = bp->b_runningbufspace; 510 nbp->b_bcount = bp->b_bcount; 511 nbp->b_resid = bp->b_resid; 512 nbp->b_data = bp->b_data; 513 #if 0 514 /* 515 * Buffers undergoing device I/O do not need a kvabase/size. 516 */ 517 nbp->b_kvabase = bp->b_kvabase; 518 nbp->b_kvasize = bp->b_kvasize; 519 #endif 520 nbp->b_dirtyend = bp->b_dirtyend; 521 522 nbio->bio_done = biodone_sync; 523 nbio->bio_flags |= BIO_SYNC; 524 nbio->bio_track = NULL; 525 526 nbio->bio_caller_info1.ptr = dp; 527 nbio->bio_offset = bio->bio_offset; 528 529 dev_dstrategy(dp->d_rawdev, nbio); 530 biowait(nbio, "dschedsync"); 531 bp->b_resid = nbp->b_resid; 532 bp->b_error = nbp->b_error; 533 biodone(bio); 534 #if 0 535 nbp->b_kvabase = NULL; 536 nbp->b_kvasize = 0; 537 #endif 538 relpbuf(nbp, NULL); 539 } 540 541 void 542 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 543 { 544 struct bio *nbio; 545 546 nbio = push_bio(bio); 547 nbio->bio_done = done; 548 nbio->bio_offset = bio->bio_offset; 549 550 dsched_set_bio_dp(nbio, dp); 551 dsched_set_bio_priv(nbio, priv); 552 553 getmicrotime(&nbio->bio_caller_info3.tv); 554 dev_dstrategy(dp->d_rawdev, nbio); 555 } 556 557 void 558 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 559 { 560 int refcount; 561 562 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 563 564 KKASSERT(refcount >= 0); 565 } 566 567 void 568 dsched_thread_io_ref(struct dsched_thread_io *tdio) 569 { 570 int refcount; 571 572 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 573 574 KKASSERT(refcount >= 0); 575 } 576 577 void 578 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 579 { 580 int refcount; 581 582 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 583 584 KKASSERT(refcount >= 0); 585 } 586 587 void 588 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 589 { 590 struct dsched_thread_io *tdio, *tdio2; 591 int refcount; 592 593 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 594 595 596 KKASSERT(refcount >= 0 || refcount <= -0x400); 597 598 if (refcount == 1) { 599 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 600 #if 0 601 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 602 print_backtrace(4); 603 #endif 604 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 605 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 606 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 607 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 608 dsched_thread_io_unref(tdio); 609 } 610 lockmgr(&diskctx->lock, LK_RELEASE); 611 if (diskctx->dp->d_sched_policy->destroy_diskctx) 612 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 613 objcache_put(dsched_diskctx_cache, diskctx); 614 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 615 } 616 } 617 618 void 619 dsched_thread_io_unref(struct dsched_thread_io *tdio) 620 { 621 struct dsched_thread_ctx *tdctx; 622 struct dsched_disk_ctx *diskctx; 623 int refcount; 624 625 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 626 627 KKASSERT(refcount >= 0 || refcount <= -0x400); 628 629 if (refcount == 1) { 630 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 631 #if 0 632 kprintf("tdio (%p) destruction started, trace:\n", tdio); 633 print_backtrace(8); 634 #endif 635 diskctx = tdio->diskctx; 636 KKASSERT(diskctx != NULL); 637 KKASSERT(tdio->qlength == 0); 638 639 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 640 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 641 642 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 643 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 644 645 lockmgr(&diskctx->lock, LK_RELEASE); 646 } 647 648 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 649 tdctx = tdio->tdctx; 650 KKASSERT(tdctx != NULL); 651 652 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 653 654 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 655 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 656 657 lockmgr(&tdctx->lock, LK_RELEASE); 658 } 659 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 660 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 661 objcache_put(dsched_tdio_cache, tdio); 662 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 663 #if 0 664 dsched_disk_ctx_unref(diskctx); 665 #endif 666 } 667 } 668 669 void 670 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 671 { 672 struct dsched_thread_io *tdio, *tdio2; 673 int refcount; 674 675 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 676 677 KKASSERT(refcount >= 0 || refcount <= -0x400); 678 679 if (refcount == 1) { 680 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 681 #if 0 682 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 683 print_backtrace(8); 684 #endif 685 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 686 687 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 688 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 689 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 690 dsched_thread_io_unref(tdio); 691 } 692 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 693 694 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 695 696 objcache_put(dsched_tdctx_cache, tdctx); 697 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 698 } 699 } 700 701 702 struct dsched_thread_io * 703 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 704 struct dsched_policy *pol) 705 { 706 struct dsched_thread_io *tdio; 707 #if 0 708 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 709 #endif 710 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 711 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 712 713 /* XXX: maybe we do need another ref for the disk list for tdio */ 714 dsched_thread_io_ref(tdio); 715 716 DSCHED_THREAD_IO_LOCKINIT(tdio); 717 tdio->dp = dp; 718 719 tdio->diskctx = dsched_get_disk_priv(dp); 720 TAILQ_INIT(&tdio->queue); 721 722 if (pol->new_tdio) 723 pol->new_tdio(tdio); 724 725 lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE); 726 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 727 tdio->flags |= DSCHED_LINKED_DISK_CTX; 728 lockmgr(&tdio->diskctx->lock, LK_RELEASE); 729 730 if (tdctx) { 731 tdio->tdctx = tdctx; 732 tdio->p = tdctx->p; 733 734 /* Put the tdio in the tdctx list */ 735 DSCHED_THREAD_CTX_LOCK(tdctx); 736 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 737 DSCHED_THREAD_CTX_UNLOCK(tdctx); 738 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 739 } 740 741 atomic_add_int(&dsched_stats.tdio_allocations, 1); 742 return tdio; 743 } 744 745 746 struct dsched_disk_ctx * 747 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 748 { 749 struct dsched_disk_ctx *diskctx; 750 751 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 752 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 753 dsched_disk_ctx_ref(diskctx); 754 diskctx->dp = dp; 755 DSCHED_DISK_CTX_LOCKINIT(diskctx); 756 TAILQ_INIT(&diskctx->tdio_list); 757 758 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 759 if (pol->new_diskctx) 760 pol->new_diskctx(diskctx); 761 return diskctx; 762 } 763 764 765 struct dsched_thread_ctx * 766 dsched_thread_ctx_alloc(struct proc *p) 767 { 768 struct dsched_thread_ctx *tdctx; 769 struct dsched_thread_io *tdio; 770 struct disk *dp = NULL; 771 772 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 773 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 774 dsched_thread_ctx_ref(tdctx); 775 #if 0 776 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 777 #endif 778 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 779 TAILQ_INIT(&tdctx->tdio_list); 780 tdctx->p = p; 781 782 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 783 while ((dp = disk_enumerate(dp))) { 784 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 785 } 786 787 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 788 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 789 790 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 791 /* XXX: no callback here */ 792 return tdctx; 793 } 794 795 void 796 policy_new(struct disk *dp, struct dsched_policy *pol) { 797 struct dsched_thread_ctx *tdctx; 798 struct dsched_disk_ctx *diskctx; 799 struct dsched_thread_io *tdio; 800 801 diskctx = dsched_disk_ctx_alloc(dp, pol); 802 dsched_disk_ctx_ref(diskctx); 803 dsched_set_disk_priv(dp, diskctx); 804 805 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 806 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 807 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 808 } 809 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 810 811 } 812 813 void 814 policy_destroy(struct disk *dp) { 815 struct dsched_disk_ctx *diskctx; 816 817 diskctx = dsched_get_disk_priv(dp); 818 KKASSERT(diskctx != NULL); 819 820 dsched_disk_ctx_unref(diskctx); /* from prepare */ 821 dsched_disk_ctx_unref(diskctx); /* from alloc */ 822 823 dsched_set_disk_priv(dp, NULL); 824 } 825 826 void 827 dsched_new_buf(struct buf *bp) 828 { 829 struct dsched_thread_ctx *tdctx = NULL; 830 831 if (dsched_inited == 0) 832 return; 833 834 if (curproc != NULL) { 835 tdctx = dsched_get_proc_priv(curproc); 836 } else { 837 /* This is a kernel thread, so no proc info is available */ 838 tdctx = dsched_get_thread_priv(curthread); 839 } 840 841 #if 0 842 /* 843 * XXX: hack. we don't want this assert because we aren't catching all 844 * threads. mi_startup() is still getting away without an tdctx. 845 */ 846 847 /* by now we should have an tdctx. if not, something bad is going on */ 848 KKASSERT(tdctx != NULL); 849 #endif 850 851 if (tdctx) { 852 dsched_thread_ctx_ref(tdctx); 853 } 854 dsched_set_buf_priv(bp, tdctx); 855 } 856 857 void 858 dsched_exit_buf(struct buf *bp) 859 { 860 struct dsched_thread_ctx *tdctx; 861 862 tdctx = dsched_get_buf_priv(bp); 863 if (tdctx != NULL) { 864 dsched_clr_buf_priv(bp); 865 dsched_thread_ctx_unref(tdctx); 866 } 867 } 868 869 void 870 dsched_new_proc(struct proc *p) 871 { 872 struct dsched_thread_ctx *tdctx; 873 874 if (dsched_inited == 0) 875 return; 876 877 KKASSERT(p != NULL); 878 879 tdctx = dsched_thread_ctx_alloc(p); 880 tdctx->p = p; 881 dsched_thread_ctx_ref(tdctx); 882 883 dsched_set_proc_priv(p, tdctx); 884 atomic_add_int(&dsched_stats.nprocs, 1); 885 } 886 887 888 void 889 dsched_new_thread(struct thread *td) 890 { 891 struct dsched_thread_ctx *tdctx; 892 893 if (dsched_inited == 0) 894 return; 895 896 KKASSERT(td != NULL); 897 898 tdctx = dsched_thread_ctx_alloc(NULL); 899 tdctx->td = td; 900 dsched_thread_ctx_ref(tdctx); 901 902 dsched_set_thread_priv(td, tdctx); 903 atomic_add_int(&dsched_stats.nthreads, 1); 904 } 905 906 void 907 dsched_exit_proc(struct proc *p) 908 { 909 struct dsched_thread_ctx *tdctx; 910 911 if (dsched_inited == 0) 912 return; 913 914 KKASSERT(p != NULL); 915 916 tdctx = dsched_get_proc_priv(p); 917 KKASSERT(tdctx != NULL); 918 919 tdctx->dead = 0xDEAD; 920 dsched_set_proc_priv(p, NULL); 921 922 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 923 dsched_thread_ctx_unref(tdctx); /* one for ref */ 924 atomic_subtract_int(&dsched_stats.nprocs, 1); 925 } 926 927 928 void 929 dsched_exit_thread(struct thread *td) 930 { 931 struct dsched_thread_ctx *tdctx; 932 933 if (dsched_inited == 0) 934 return; 935 936 KKASSERT(td != NULL); 937 938 tdctx = dsched_get_thread_priv(td); 939 KKASSERT(tdctx != NULL); 940 941 tdctx->dead = 0xDEAD; 942 dsched_set_thread_priv(td, 0); 943 944 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 945 dsched_thread_ctx_unref(tdctx); /* one for ref */ 946 atomic_subtract_int(&dsched_stats.nthreads, 1); 947 } 948 949 struct dsched_thread_io * 950 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 951 struct dsched_policy *pol) { 952 struct dsched_thread_ctx *tdctx; 953 struct dsched_thread_io *tdio; 954 955 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 956 957 tdctx = dsched_get_thread_priv(curthread); 958 KKASSERT(tdctx != NULL); 959 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol); 960 961 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 962 963 return tdio; 964 } 965 966 /* DEFAULT NOOP POLICY */ 967 968 static int 969 noop_prepare(struct dsched_disk_ctx *diskctx) 970 { 971 return 0; 972 } 973 974 static void 975 noop_teardown(struct dsched_disk_ctx *diskctx) 976 { 977 978 } 979 980 static void 981 noop_cancel(struct dsched_disk_ctx *diskctx) 982 { 983 984 } 985 986 static int 987 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 988 struct bio *bio) 989 { 990 dsched_strategy_raw(diskctx->dp, bio); 991 #if 0 992 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 993 #endif 994 return 0; 995 } 996 997 /* 998 * SYSINIT stuff 999 */ 1000 static void 1001 dsched_init(void) 1002 { 1003 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1004 NULL, NULL, NULL, 1005 objcache_malloc_alloc, 1006 objcache_malloc_free, 1007 &dsched_thread_io_malloc_args ); 1008 1009 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1010 NULL, NULL, NULL, 1011 objcache_malloc_alloc, 1012 objcache_malloc_free, 1013 &dsched_thread_ctx_malloc_args ); 1014 1015 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1016 NULL, NULL, NULL, 1017 objcache_malloc_alloc, 1018 objcache_malloc_free, 1019 &dsched_disk_ctx_malloc_args ); 1020 1021 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1022 1023 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1024 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1025 1026 dsched_register(&dsched_noop_policy); 1027 1028 dsched_inited = 1; 1029 } 1030 1031 static void 1032 dsched_uninit(void) 1033 { 1034 } 1035 1036 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1037 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1038 1039 /* 1040 * SYSCTL stuff 1041 */ 1042 static int 1043 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1044 { 1045 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1046 } 1047 1048 static int 1049 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1050 { 1051 struct dsched_policy *pol = NULL; 1052 int error, first = 1; 1053 1054 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1055 1056 while ((pol = dsched_policy_enumerate(pol))) { 1057 if (!first) { 1058 error = SYSCTL_OUT(req, " ", 1); 1059 if (error) 1060 break; 1061 } else { 1062 first = 0; 1063 } 1064 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1065 if (error) 1066 break; 1067 1068 } 1069 1070 lockmgr(&dsched_lock, LK_RELEASE); 1071 1072 error = SYSCTL_OUT(req, "", 1); 1073 1074 return error; 1075 } 1076 1077 static int 1078 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1079 { 1080 char buf[DSCHED_POLICY_NAME_LENGTH]; 1081 struct dsched_disk_ctx *diskctx = arg1; 1082 struct dsched_policy *pol = NULL; 1083 int error; 1084 1085 if (diskctx == NULL) { 1086 return 0; 1087 } 1088 1089 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1090 1091 pol = diskctx->dp->d_sched_policy; 1092 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1093 1094 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1095 if (error || req->newptr == NULL) { 1096 lockmgr(&dsched_lock, LK_RELEASE); 1097 return (error); 1098 } 1099 1100 pol = dsched_find_policy(buf); 1101 if (pol == NULL) { 1102 lockmgr(&dsched_lock, LK_RELEASE); 1103 return 0; 1104 } 1105 1106 dsched_switch(diskctx->dp, pol); 1107 1108 lockmgr(&dsched_lock, LK_RELEASE); 1109 1110 return error; 1111 } 1112 1113 static int 1114 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1115 { 1116 char buf[DSCHED_POLICY_NAME_LENGTH]; 1117 struct dsched_policy *pol = NULL; 1118 int error; 1119 1120 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1121 1122 pol = default_policy; 1123 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1124 1125 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1126 if (error || req->newptr == NULL) { 1127 lockmgr(&dsched_lock, LK_RELEASE); 1128 return (error); 1129 } 1130 1131 pol = dsched_find_policy(buf); 1132 if (pol == NULL) { 1133 lockmgr(&dsched_lock, LK_RELEASE); 1134 return 0; 1135 } 1136 1137 default_set = 1; 1138 default_policy = pol; 1139 1140 lockmgr(&dsched_lock, LK_RELEASE); 1141 1142 return error; 1143 } 1144 1145 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1146 "Disk Scheduler Framework (dsched) magic"); 1147 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1148 "List of disks and their policies"); 1149 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1150 0, "Enable dsched debugging"); 1151 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1152 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1153 "dsched statistics"); 1154 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1155 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1156 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1157 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1158 1159 static void 1160 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1161 { 1162 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1163 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1164 sysctl_ctx_init(&diskctx->sysctl_ctx); 1165 } 1166 1167 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1168 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1169 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1170 } 1171