1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t noop_prepare; 58 static dsched_teardown_t noop_teardown; 59 static dsched_cancel_t noop_cancel; 60 static dsched_queue_t noop_queue; 61 62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 63 64 static int dsched_inited = 0; 65 static int default_set = 0; 66 67 struct lock dsched_lock; 68 static int dsched_debug_enable = 0; 69 70 struct dsched_stats dsched_stats; 71 72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 74 struct objcache_malloc_args dsched_thread_io_malloc_args = { 75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 78 79 static struct objcache *dsched_diskctx_cache; 80 static struct objcache *dsched_tdctx_cache; 81 static struct objcache *dsched_tdio_cache; 82 83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 85 86 struct lock dsched_tdctx_lock; 87 88 static struct dsched_policy_head dsched_policy_list = 89 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 90 91 static struct dsched_policy dsched_noop_policy = { 92 .name = "noop", 93 94 .prepare = noop_prepare, 95 .teardown = noop_teardown, 96 .cancel_all = noop_cancel, 97 .bio_queue = noop_queue 98 }; 99 100 static struct dsched_policy *default_policy = &dsched_noop_policy; 101 102 /* 103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 104 * using kvprintf 105 */ 106 int 107 dsched_debug(int level, char *fmt, ...) 108 { 109 __va_list ap; 110 111 __va_start(ap, fmt); 112 if (level <= dsched_debug_enable) 113 kvprintf(fmt, ap); 114 __va_end(ap); 115 116 return 0; 117 } 118 119 /* 120 * Called on disk_create() 121 * tries to read which policy to use from loader.conf, if there's 122 * none specified, the default policy is used. 123 */ 124 void 125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 126 { 127 char tunable_key[SPECNAMELEN + 48]; 128 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 129 struct dsched_policy *policy = NULL; 130 131 /* Also look for serno stuff? */ 132 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 133 lockmgr(&dsched_lock, LK_EXCLUSIVE); 134 135 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 136 head_name, unit); 137 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 138 sizeof(sched_policy)) != 0) { 139 policy = dsched_find_policy(sched_policy); 140 } 141 142 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 143 head_name); 144 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 145 sizeof(sched_policy)) != 0)) { 146 policy = dsched_find_policy(sched_policy); 147 } 148 149 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 150 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 151 sizeof(sched_policy)) != 0)) { 152 policy = dsched_find_policy(sched_policy); 153 } 154 155 if (!policy) { 156 if (!default_set) { 157 dsched_debug(0, "No policy for %s%d specified, " 158 "or policy not found\n", head_name, unit); 159 } 160 dsched_set_policy(dp, default_policy); 161 } else { 162 dsched_set_policy(dp, policy); 163 } 164 165 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 166 dsched_sysctl_add_disk( 167 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 168 tunable_key); 169 170 lockmgr(&dsched_lock, LK_RELEASE); 171 } 172 173 /* 174 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 175 * there's any policy associated with the serial number of the device. 176 */ 177 void 178 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 179 { 180 char tunable_key[SPECNAMELEN + 48]; 181 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 182 struct dsched_policy *policy = NULL; 183 184 if (info->d_serialno == NULL) 185 return; 186 187 lockmgr(&dsched_lock, LK_EXCLUSIVE); 188 189 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 190 info->d_serialno); 191 192 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 193 sizeof(sched_policy)) != 0)) { 194 policy = dsched_find_policy(sched_policy); 195 } 196 197 if (policy) { 198 dsched_switch(dp, policy); 199 } 200 201 dsched_sysctl_add_disk( 202 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 203 info->d_serialno); 204 205 lockmgr(&dsched_lock, LK_RELEASE); 206 } 207 208 /* 209 * Called on disk_destroy() 210 * shuts down the scheduler core and cancels all remaining bios 211 */ 212 void 213 dsched_disk_destroy_callback(struct disk *dp) 214 { 215 struct dsched_policy *old_policy; 216 struct dsched_disk_ctx *diskctx; 217 218 lockmgr(&dsched_lock, LK_EXCLUSIVE); 219 220 diskctx = dsched_get_disk_priv(dp); 221 222 old_policy = dp->d_sched_policy; 223 dp->d_sched_policy = &dsched_noop_policy; 224 old_policy->cancel_all(dsched_get_disk_priv(dp)); 225 old_policy->teardown(dsched_get_disk_priv(dp)); 226 227 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 228 sysctl_ctx_free(&diskctx->sysctl_ctx); 229 230 policy_destroy(dp); 231 atomic_subtract_int(&old_policy->ref_count, 1); 232 KKASSERT(old_policy->ref_count >= 0); 233 234 lockmgr(&dsched_lock, LK_RELEASE); 235 } 236 237 238 void 239 dsched_queue(struct disk *dp, struct bio *bio) 240 { 241 struct dsched_thread_ctx *tdctx; 242 struct dsched_thread_io *tdio; 243 struct dsched_disk_ctx *diskctx; 244 245 int found = 0, error = 0; 246 247 tdctx = dsched_get_buf_priv(bio->bio_buf); 248 if (tdctx == NULL) { 249 /* We don't handle this case, let dsched dispatch */ 250 atomic_add_int(&dsched_stats.no_tdctx, 1); 251 dsched_strategy_raw(dp, bio); 252 return; 253 } 254 255 DSCHED_THREAD_CTX_LOCK(tdctx); 256 257 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 258 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 259 if (tdio->dp == dp) { 260 dsched_thread_io_ref(tdio); 261 found = 1; 262 break; 263 } 264 } 265 266 DSCHED_THREAD_CTX_UNLOCK(tdctx); 267 dsched_clr_buf_priv(bio->bio_buf); 268 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 269 270 KKASSERT(found == 1); 271 diskctx = dsched_get_disk_priv(dp); 272 dsched_disk_ctx_ref(diskctx); 273 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 274 275 if (error) { 276 dsched_strategy_raw(dp, bio); 277 } 278 dsched_disk_ctx_unref(diskctx); 279 dsched_thread_io_unref(tdio); 280 } 281 282 283 /* 284 * Called from each module_init or module_attach of each policy 285 * registers the policy in the local policy list. 286 */ 287 int 288 dsched_register(struct dsched_policy *d_policy) 289 { 290 struct dsched_policy *policy; 291 int error = 0; 292 293 lockmgr(&dsched_lock, LK_EXCLUSIVE); 294 295 policy = dsched_find_policy(d_policy->name); 296 297 if (!policy) { 298 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 299 atomic_add_int(&d_policy->ref_count, 1); 300 } else { 301 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 302 d_policy->name); 303 error = EEXIST; 304 } 305 306 lockmgr(&dsched_lock, LK_RELEASE); 307 return error; 308 } 309 310 /* 311 * Called from each module_detach of each policy 312 * unregisters the policy 313 */ 314 int 315 dsched_unregister(struct dsched_policy *d_policy) 316 { 317 struct dsched_policy *policy; 318 319 lockmgr(&dsched_lock, LK_EXCLUSIVE); 320 policy = dsched_find_policy(d_policy->name); 321 322 if (policy) { 323 if (policy->ref_count > 1) { 324 lockmgr(&dsched_lock, LK_RELEASE); 325 return EBUSY; 326 } 327 TAILQ_REMOVE(&dsched_policy_list, policy, link); 328 atomic_subtract_int(&policy->ref_count, 1); 329 KKASSERT(policy->ref_count == 0); 330 } 331 lockmgr(&dsched_lock, LK_RELEASE); 332 return 0; 333 } 334 335 336 /* 337 * switches the policy by first removing the old one and then 338 * enabling the new one. 339 */ 340 int 341 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 342 { 343 struct dsched_policy *old_policy; 344 345 /* If we are asked to set the same policy, do nothing */ 346 if (dp->d_sched_policy == new_policy) 347 return 0; 348 349 /* lock everything down, diskwise */ 350 lockmgr(&dsched_lock, LK_EXCLUSIVE); 351 old_policy = dp->d_sched_policy; 352 353 atomic_subtract_int(&old_policy->ref_count, 1); 354 KKASSERT(old_policy->ref_count >= 0); 355 356 dp->d_sched_policy = &dsched_noop_policy; 357 old_policy->teardown(dsched_get_disk_priv(dp)); 358 policy_destroy(dp); 359 360 /* Bring everything back to life */ 361 dsched_set_policy(dp, new_policy); 362 lockmgr(&dsched_lock, LK_RELEASE); 363 return 0; 364 } 365 366 367 /* 368 * Loads a given policy and attaches it to the specified disk. 369 * Also initializes the core for the policy 370 */ 371 void 372 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 373 { 374 int locked = 0; 375 376 /* Check if it is locked already. if not, we acquire the devfs lock */ 377 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 378 lockmgr(&dsched_lock, LK_EXCLUSIVE); 379 locked = 1; 380 } 381 382 policy_new(dp, new_policy); 383 new_policy->prepare(dsched_get_disk_priv(dp)); 384 dp->d_sched_policy = new_policy; 385 atomic_add_int(&new_policy->ref_count, 1); 386 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 387 new_policy->name); 388 389 /* If we acquired the lock, we also get rid of it */ 390 if (locked) 391 lockmgr(&dsched_lock, LK_RELEASE); 392 } 393 394 struct dsched_policy* 395 dsched_find_policy(char *search) 396 { 397 struct dsched_policy *policy; 398 struct dsched_policy *policy_found = NULL; 399 int locked = 0; 400 401 /* Check if it is locked already. if not, we acquire the devfs lock */ 402 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 403 lockmgr(&dsched_lock, LK_EXCLUSIVE); 404 locked = 1; 405 } 406 407 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 408 if (!strcmp(policy->name, search)) { 409 policy_found = policy; 410 break; 411 } 412 } 413 414 /* If we acquired the lock, we also get rid of it */ 415 if (locked) 416 lockmgr(&dsched_lock, LK_RELEASE); 417 418 return policy_found; 419 } 420 421 struct disk* 422 dsched_find_disk(char *search) 423 { 424 struct disk *dp_found = NULL; 425 struct disk *dp = NULL; 426 427 while((dp = disk_enumerate(dp))) { 428 if (!strcmp(dp->d_cdev->si_name, search)) { 429 dp_found = dp; 430 break; 431 } 432 } 433 434 return dp_found; 435 } 436 437 struct disk* 438 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 439 { 440 while ((dp = disk_enumerate(dp))) { 441 if (dp->d_sched_policy == policy) 442 return dp; 443 } 444 445 return NULL; 446 } 447 448 struct dsched_policy * 449 dsched_policy_enumerate(struct dsched_policy *pol) 450 { 451 if (!pol) 452 return (TAILQ_FIRST(&dsched_policy_list)); 453 else 454 return (TAILQ_NEXT(pol, link)); 455 } 456 457 void 458 dsched_cancel_bio(struct bio *bp) 459 { 460 bp->bio_buf->b_error = ENXIO; 461 bp->bio_buf->b_flags |= B_ERROR; 462 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 463 464 biodone(bp); 465 } 466 467 void 468 dsched_strategy_raw(struct disk *dp, struct bio *bp) 469 { 470 /* 471 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 472 * to avoid panics 473 */ 474 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 475 if(bp->bio_track != NULL) { 476 dsched_debug(LOG_INFO, 477 "dsched_strategy_raw sees non-NULL bio_track!! " 478 "bio: %p\n", bp); 479 bp->bio_track = NULL; 480 } 481 dev_dstrategy(dp->d_rawdev, bp); 482 } 483 484 void 485 dsched_strategy_sync(struct disk *dp, struct bio *bio) 486 { 487 struct buf *bp, *nbp; 488 struct bio *nbio; 489 490 bp = bio->bio_buf; 491 492 nbp = getpbuf(NULL); 493 nbio = &nbp->b_bio1; 494 495 nbp->b_cmd = bp->b_cmd; 496 nbp->b_bufsize = bp->b_bufsize; 497 nbp->b_runningbufspace = bp->b_runningbufspace; 498 nbp->b_bcount = bp->b_bcount; 499 nbp->b_resid = bp->b_resid; 500 nbp->b_data = bp->b_data; 501 nbp->b_kvabase = bp->b_kvabase; 502 nbp->b_kvasize = bp->b_kvasize; 503 nbp->b_dirtyend = bp->b_dirtyend; 504 505 nbio->bio_done = biodone_sync; 506 nbio->bio_flags |= BIO_SYNC; 507 nbio->bio_track = NULL; 508 509 nbio->bio_caller_info1.ptr = dp; 510 nbio->bio_offset = bio->bio_offset; 511 512 dev_dstrategy(dp->d_rawdev, nbio); 513 biowait(nbio, "dschedsync"); 514 bp->b_resid = nbp->b_resid; 515 bp->b_error = nbp->b_error; 516 biodone(bio); 517 relpbuf(nbp, NULL); 518 } 519 520 void 521 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 522 { 523 struct bio *nbio; 524 525 nbio = push_bio(bio); 526 nbio->bio_done = done; 527 nbio->bio_offset = bio->bio_offset; 528 529 dsched_set_bio_dp(nbio, dp); 530 dsched_set_bio_priv(nbio, priv); 531 532 getmicrotime(&nbio->bio_caller_info3.tv); 533 dev_dstrategy(dp->d_rawdev, nbio); 534 } 535 536 void 537 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 538 { 539 int refcount; 540 541 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 542 543 KKASSERT(refcount >= 0); 544 } 545 546 void 547 dsched_thread_io_ref(struct dsched_thread_io *tdio) 548 { 549 int refcount; 550 551 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 552 553 KKASSERT(refcount >= 0); 554 } 555 556 void 557 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 558 { 559 int refcount; 560 561 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 562 563 KKASSERT(refcount >= 0); 564 } 565 566 void 567 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 568 { 569 struct dsched_thread_io *tdio, *tdio2; 570 int refcount; 571 572 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 573 574 575 KKASSERT(refcount >= 0 || refcount <= -0x400); 576 577 if (refcount == 1) { 578 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 579 #if 0 580 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 581 print_backtrace(4); 582 #endif 583 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 584 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 585 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 586 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 587 dsched_thread_io_unref(tdio); 588 } 589 lockmgr(&diskctx->lock, LK_RELEASE); 590 if (diskctx->dp->d_sched_policy->destroy_diskctx) 591 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 592 objcache_put(dsched_diskctx_cache, diskctx); 593 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 594 } 595 } 596 597 void 598 dsched_thread_io_unref(struct dsched_thread_io *tdio) 599 { 600 struct dsched_thread_ctx *tdctx; 601 struct dsched_disk_ctx *diskctx; 602 int refcount; 603 604 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 605 606 KKASSERT(refcount >= 0 || refcount <= -0x400); 607 608 if (refcount == 1) { 609 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 610 #if 0 611 kprintf("tdio (%p) destruction started, trace:\n", tdio); 612 print_backtrace(8); 613 #endif 614 diskctx = tdio->diskctx; 615 KKASSERT(diskctx != NULL); 616 KKASSERT(tdio->qlength == 0); 617 618 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 619 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 620 621 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 622 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 623 624 lockmgr(&diskctx->lock, LK_RELEASE); 625 } 626 627 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 628 tdctx = tdio->tdctx; 629 KKASSERT(tdctx != NULL); 630 631 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 632 633 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 634 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 635 636 lockmgr(&tdctx->lock, LK_RELEASE); 637 } 638 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 639 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 640 objcache_put(dsched_tdio_cache, tdio); 641 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 642 #if 0 643 dsched_disk_ctx_unref(diskctx); 644 #endif 645 } 646 } 647 648 void 649 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 650 { 651 struct dsched_thread_io *tdio, *tdio2; 652 int refcount; 653 654 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 655 656 KKASSERT(refcount >= 0 || refcount <= -0x400); 657 658 if (refcount == 1) { 659 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 660 #if 0 661 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 662 print_backtrace(8); 663 #endif 664 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 665 666 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 667 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 668 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 669 dsched_thread_io_unref(tdio); 670 } 671 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 672 673 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 674 675 objcache_put(dsched_tdctx_cache, tdctx); 676 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 677 } 678 } 679 680 681 struct dsched_thread_io * 682 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 683 struct dsched_policy *pol) 684 { 685 struct dsched_thread_io *tdio; 686 #if 0 687 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 688 #endif 689 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 690 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 691 692 /* XXX: maybe we do need another ref for the disk list for tdio */ 693 dsched_thread_io_ref(tdio); 694 695 DSCHED_THREAD_IO_LOCKINIT(tdio); 696 tdio->dp = dp; 697 698 tdio->diskctx = dsched_get_disk_priv(dp); 699 TAILQ_INIT(&tdio->queue); 700 701 if (pol->new_tdio) 702 pol->new_tdio(tdio); 703 704 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 705 tdio->flags |= DSCHED_LINKED_DISK_CTX; 706 707 if (tdctx) { 708 tdio->tdctx = tdctx; 709 tdio->p = tdctx->p; 710 711 /* Put the tdio in the tdctx list */ 712 DSCHED_THREAD_CTX_LOCK(tdctx); 713 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 714 DSCHED_THREAD_CTX_UNLOCK(tdctx); 715 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 716 } 717 718 atomic_add_int(&dsched_stats.tdio_allocations, 1); 719 return tdio; 720 } 721 722 723 struct dsched_disk_ctx * 724 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 725 { 726 struct dsched_disk_ctx *diskctx; 727 728 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 729 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 730 dsched_disk_ctx_ref(diskctx); 731 diskctx->dp = dp; 732 DSCHED_DISK_CTX_LOCKINIT(diskctx); 733 TAILQ_INIT(&diskctx->tdio_list); 734 735 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 736 if (pol->new_diskctx) 737 pol->new_diskctx(diskctx); 738 return diskctx; 739 } 740 741 742 struct dsched_thread_ctx * 743 dsched_thread_ctx_alloc(struct proc *p) 744 { 745 struct dsched_thread_ctx *tdctx; 746 struct dsched_thread_io *tdio; 747 struct disk *dp = NULL; 748 749 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 750 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 751 dsched_thread_ctx_ref(tdctx); 752 #if 0 753 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 754 #endif 755 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 756 TAILQ_INIT(&tdctx->tdio_list); 757 tdctx->p = p; 758 759 /* XXX */ 760 while ((dp = disk_enumerate(dp))) { 761 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 762 } 763 764 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 765 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 766 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 767 768 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 769 /* XXX: no callback here */ 770 return tdctx; 771 } 772 773 void 774 policy_new(struct disk *dp, struct dsched_policy *pol) { 775 struct dsched_thread_ctx *tdctx; 776 struct dsched_disk_ctx *diskctx; 777 struct dsched_thread_io *tdio; 778 779 diskctx = dsched_disk_ctx_alloc(dp, pol); 780 dsched_disk_ctx_ref(diskctx); 781 dsched_set_disk_priv(dp, diskctx); 782 783 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 784 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 785 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 786 } 787 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 788 789 } 790 791 void 792 policy_destroy(struct disk *dp) { 793 struct dsched_disk_ctx *diskctx; 794 795 diskctx = dsched_get_disk_priv(dp); 796 KKASSERT(diskctx != NULL); 797 798 dsched_disk_ctx_unref(diskctx); /* from prepare */ 799 dsched_disk_ctx_unref(diskctx); /* from alloc */ 800 801 dsched_set_disk_priv(dp, NULL); 802 } 803 804 void 805 dsched_new_buf(struct buf *bp) 806 { 807 struct dsched_thread_ctx *tdctx = NULL; 808 809 if (dsched_inited == 0) 810 return; 811 812 if (curproc != NULL) { 813 tdctx = dsched_get_proc_priv(curproc); 814 } else { 815 /* This is a kernel thread, so no proc info is available */ 816 tdctx = dsched_get_thread_priv(curthread); 817 } 818 819 #if 0 820 /* 821 * XXX: hack. we don't want this assert because we aren't catching all 822 * threads. mi_startup() is still getting away without an tdctx. 823 */ 824 825 /* by now we should have an tdctx. if not, something bad is going on */ 826 KKASSERT(tdctx != NULL); 827 #endif 828 829 if (tdctx) { 830 dsched_thread_ctx_ref(tdctx); 831 } 832 dsched_set_buf_priv(bp, tdctx); 833 } 834 835 void 836 dsched_exit_buf(struct buf *bp) 837 { 838 struct dsched_thread_ctx *tdctx; 839 840 tdctx = dsched_get_buf_priv(bp); 841 if (tdctx != NULL) { 842 dsched_clr_buf_priv(bp); 843 dsched_thread_ctx_unref(tdctx); 844 } 845 } 846 847 void 848 dsched_new_proc(struct proc *p) 849 { 850 struct dsched_thread_ctx *tdctx; 851 852 if (dsched_inited == 0) 853 return; 854 855 KKASSERT(p != NULL); 856 857 tdctx = dsched_thread_ctx_alloc(p); 858 tdctx->p = p; 859 dsched_thread_ctx_ref(tdctx); 860 861 dsched_set_proc_priv(p, tdctx); 862 atomic_add_int(&dsched_stats.nprocs, 1); 863 } 864 865 866 void 867 dsched_new_thread(struct thread *td) 868 { 869 struct dsched_thread_ctx *tdctx; 870 871 if (dsched_inited == 0) 872 return; 873 874 KKASSERT(td != NULL); 875 876 tdctx = dsched_thread_ctx_alloc(NULL); 877 tdctx->td = td; 878 dsched_thread_ctx_ref(tdctx); 879 880 dsched_set_thread_priv(td, tdctx); 881 atomic_add_int(&dsched_stats.nthreads, 1); 882 } 883 884 void 885 dsched_exit_proc(struct proc *p) 886 { 887 struct dsched_thread_ctx *tdctx; 888 889 if (dsched_inited == 0) 890 return; 891 892 KKASSERT(p != NULL); 893 894 tdctx = dsched_get_proc_priv(p); 895 KKASSERT(tdctx != NULL); 896 897 tdctx->dead = 0xDEAD; 898 dsched_set_proc_priv(p, 0); 899 900 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 901 dsched_thread_ctx_unref(tdctx); /* one for ref */ 902 atomic_subtract_int(&dsched_stats.nprocs, 1); 903 } 904 905 906 void 907 dsched_exit_thread(struct thread *td) 908 { 909 struct dsched_thread_ctx *tdctx; 910 911 if (dsched_inited == 0) 912 return; 913 914 KKASSERT(td != NULL); 915 916 tdctx = dsched_get_thread_priv(td); 917 KKASSERT(tdctx != NULL); 918 919 tdctx->dead = 0xDEAD; 920 dsched_set_thread_priv(td, 0); 921 922 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 923 dsched_thread_ctx_unref(tdctx); /* one for ref */ 924 atomic_subtract_int(&dsched_stats.nthreads, 1); 925 } 926 927 struct dsched_thread_io * 928 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 929 struct dsched_policy *pol) { 930 struct dsched_thread_ctx *tdctx; 931 struct dsched_thread_io *tdio; 932 933 tdctx = dsched_get_thread_priv(curthread); 934 KKASSERT(tdctx != NULL); 935 936 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol); 937 return tdio; 938 } 939 940 /* DEFAULT NOOP POLICY */ 941 942 static int 943 noop_prepare(struct dsched_disk_ctx *diskctx) 944 { 945 return 0; 946 } 947 948 static void 949 noop_teardown(struct dsched_disk_ctx *diskctx) 950 { 951 952 } 953 954 static void 955 noop_cancel(struct dsched_disk_ctx *diskctx) 956 { 957 958 } 959 960 static int 961 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 962 struct bio *bio) 963 { 964 dsched_strategy_raw(diskctx->dp, bio); 965 #if 0 966 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 967 #endif 968 return 0; 969 } 970 971 /* 972 * SYSINIT stuff 973 */ 974 static void 975 dsched_init(void) 976 { 977 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 978 NULL, NULL, NULL, 979 objcache_malloc_alloc, 980 objcache_malloc_free, 981 &dsched_thread_io_malloc_args ); 982 983 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 984 NULL, NULL, NULL, 985 objcache_malloc_alloc, 986 objcache_malloc_free, 987 &dsched_thread_ctx_malloc_args ); 988 989 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 990 NULL, NULL, NULL, 991 objcache_malloc_alloc, 992 objcache_malloc_free, 993 &dsched_disk_ctx_malloc_args ); 994 995 bzero(&dsched_stats, sizeof(struct dsched_stats)); 996 997 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 998 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 999 1000 dsched_register(&dsched_noop_policy); 1001 1002 dsched_inited = 1; 1003 } 1004 1005 static void 1006 dsched_uninit(void) 1007 { 1008 } 1009 1010 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1011 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1012 1013 /* 1014 * SYSCTL stuff 1015 */ 1016 static int 1017 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1018 { 1019 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1020 } 1021 1022 static int 1023 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1024 { 1025 struct dsched_policy *pol = NULL; 1026 int error, first = 1; 1027 1028 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1029 1030 while ((pol = dsched_policy_enumerate(pol))) { 1031 if (!first) { 1032 error = SYSCTL_OUT(req, " ", 1); 1033 if (error) 1034 break; 1035 } else { 1036 first = 0; 1037 } 1038 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1039 if (error) 1040 break; 1041 1042 } 1043 1044 lockmgr(&dsched_lock, LK_RELEASE); 1045 1046 error = SYSCTL_OUT(req, "", 1); 1047 1048 return error; 1049 } 1050 1051 static int 1052 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1053 { 1054 char buf[DSCHED_POLICY_NAME_LENGTH]; 1055 struct dsched_disk_ctx *diskctx = arg1; 1056 struct dsched_policy *pol = NULL; 1057 int error; 1058 1059 if (diskctx == NULL) { 1060 return 0; 1061 } 1062 1063 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1064 1065 pol = diskctx->dp->d_sched_policy; 1066 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1067 1068 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1069 if (error || req->newptr == NULL) { 1070 lockmgr(&dsched_lock, LK_RELEASE); 1071 return (error); 1072 } 1073 1074 pol = dsched_find_policy(buf); 1075 if (pol == NULL) { 1076 lockmgr(&dsched_lock, LK_RELEASE); 1077 return 0; 1078 } 1079 1080 dsched_switch(diskctx->dp, pol); 1081 1082 lockmgr(&dsched_lock, LK_RELEASE); 1083 1084 return error; 1085 } 1086 1087 static int 1088 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1089 { 1090 char buf[DSCHED_POLICY_NAME_LENGTH]; 1091 struct dsched_policy *pol = NULL; 1092 int error; 1093 1094 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1095 1096 pol = default_policy; 1097 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1098 1099 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1100 if (error || req->newptr == NULL) { 1101 lockmgr(&dsched_lock, LK_RELEASE); 1102 return (error); 1103 } 1104 1105 pol = dsched_find_policy(buf); 1106 if (pol == NULL) { 1107 lockmgr(&dsched_lock, LK_RELEASE); 1108 return 0; 1109 } 1110 1111 default_set = 1; 1112 default_policy = pol; 1113 1114 lockmgr(&dsched_lock, LK_RELEASE); 1115 1116 return error; 1117 } 1118 1119 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1120 "Disk Scheduler Framework (dsched) magic"); 1121 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1122 "List of disks and their policies"); 1123 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1124 0, "Enable dsched debugging"); 1125 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1126 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1127 "dsched statistics"); 1128 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1129 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1130 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1131 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1132 1133 static void 1134 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1135 { 1136 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1137 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1138 sysctl_ctx_init(&diskctx->sysctl_ctx); 1139 } 1140 1141 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1142 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1143 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1144 } 1145