1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t noop_prepare; 58 static dsched_teardown_t noop_teardown; 59 static dsched_cancel_t noop_cancel; 60 static dsched_queue_t noop_queue; 61 62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 63 64 static int dsched_inited = 0; 65 static int default_set = 0; 66 67 struct lock dsched_lock; 68 static int dsched_debug_enable = 0; 69 70 struct dsched_stats dsched_stats; 71 72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 74 struct objcache_malloc_args dsched_thread_io_malloc_args = { 75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 78 79 static struct objcache *dsched_diskctx_cache; 80 static struct objcache *dsched_tdctx_cache; 81 static struct objcache *dsched_tdio_cache; 82 83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 85 86 struct lock dsched_tdctx_lock; 87 88 static struct dsched_policy_head dsched_policy_list = 89 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 90 91 static struct dsched_policy dsched_noop_policy = { 92 .name = "noop", 93 94 .prepare = noop_prepare, 95 .teardown = noop_teardown, 96 .cancel_all = noop_cancel, 97 .bio_queue = noop_queue 98 }; 99 100 static struct dsched_policy *default_policy = &dsched_noop_policy; 101 102 /* 103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 104 * using kvprintf 105 */ 106 int 107 dsched_debug(int level, char *fmt, ...) 108 { 109 __va_list ap; 110 111 __va_start(ap, fmt); 112 if (level <= dsched_debug_enable) 113 kvprintf(fmt, ap); 114 __va_end(ap); 115 116 return 0; 117 } 118 119 /* 120 * Called on disk_create() 121 * tries to read which policy to use from loader.conf, if there's 122 * none specified, the default policy is used. 123 */ 124 void 125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 126 { 127 char tunable_key[SPECNAMELEN + 48]; 128 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 129 struct dsched_policy *policy = NULL; 130 131 /* Also look for serno stuff? */ 132 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 133 lockmgr(&dsched_lock, LK_EXCLUSIVE); 134 135 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 136 head_name, unit); 137 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 138 sizeof(sched_policy)) != 0) { 139 policy = dsched_find_policy(sched_policy); 140 } 141 142 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 143 head_name); 144 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 145 sizeof(sched_policy)) != 0)) { 146 policy = dsched_find_policy(sched_policy); 147 } 148 149 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 150 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 151 sizeof(sched_policy)) != 0)) { 152 policy = dsched_find_policy(sched_policy); 153 } 154 155 if (!policy) { 156 if (!default_set) { 157 dsched_debug(0, "No policy for %s%d specified, " 158 "or policy not found\n", head_name, unit); 159 } 160 dsched_set_policy(dp, default_policy); 161 } else { 162 dsched_set_policy(dp, policy); 163 } 164 165 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 166 dsched_sysctl_add_disk( 167 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 168 tunable_key); 169 170 lockmgr(&dsched_lock, LK_RELEASE); 171 } 172 173 /* 174 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 175 * there's any policy associated with the serial number of the device. 176 */ 177 void 178 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 179 { 180 char tunable_key[SPECNAMELEN + 48]; 181 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 182 struct dsched_policy *policy = NULL; 183 184 if (info->d_serialno == NULL) 185 return; 186 187 lockmgr(&dsched_lock, LK_EXCLUSIVE); 188 189 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 190 info->d_serialno); 191 192 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 193 sizeof(sched_policy)) != 0)) { 194 policy = dsched_find_policy(sched_policy); 195 } 196 197 if (policy) { 198 dsched_switch(dp, policy); 199 } 200 201 dsched_sysctl_add_disk( 202 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 203 info->d_serialno); 204 205 lockmgr(&dsched_lock, LK_RELEASE); 206 } 207 208 /* 209 * Called on disk_destroy() 210 * shuts down the scheduler core and cancels all remaining bios 211 */ 212 void 213 dsched_disk_destroy_callback(struct disk *dp) 214 { 215 struct dsched_policy *old_policy; 216 struct dsched_disk_ctx *diskctx; 217 218 lockmgr(&dsched_lock, LK_EXCLUSIVE); 219 220 diskctx = dsched_get_disk_priv(dp); 221 222 old_policy = dp->d_sched_policy; 223 dp->d_sched_policy = &dsched_noop_policy; 224 old_policy->cancel_all(dsched_get_disk_priv(dp)); 225 old_policy->teardown(dsched_get_disk_priv(dp)); 226 227 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 228 sysctl_ctx_free(&diskctx->sysctl_ctx); 229 230 policy_destroy(dp); 231 atomic_subtract_int(&old_policy->ref_count, 1); 232 KKASSERT(old_policy->ref_count >= 0); 233 234 lockmgr(&dsched_lock, LK_RELEASE); 235 } 236 237 238 void 239 dsched_queue(struct disk *dp, struct bio *bio) 240 { 241 struct dsched_thread_ctx *tdctx; 242 struct dsched_thread_io *tdio; 243 struct dsched_disk_ctx *diskctx; 244 245 int found = 0, error = 0; 246 247 tdctx = dsched_get_buf_priv(bio->bio_buf); 248 if (tdctx == NULL) { 249 /* We don't handle this case, let dsched dispatch */ 250 atomic_add_int(&dsched_stats.no_tdctx, 1); 251 dsched_strategy_raw(dp, bio); 252 return; 253 } 254 255 DSCHED_THREAD_CTX_LOCK(tdctx); 256 257 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 258 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 259 if (tdio->dp == dp) { 260 dsched_thread_io_ref(tdio); 261 found = 1; 262 break; 263 } 264 } 265 266 DSCHED_THREAD_CTX_UNLOCK(tdctx); 267 dsched_clr_buf_priv(bio->bio_buf); 268 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 269 270 KKASSERT(found == 1); 271 diskctx = dsched_get_disk_priv(dp); 272 dsched_disk_ctx_ref(diskctx); 273 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 274 275 if (error) { 276 dsched_strategy_raw(dp, bio); 277 } 278 dsched_disk_ctx_unref(diskctx); 279 dsched_thread_io_unref(tdio); 280 } 281 282 283 /* 284 * Called from each module_init or module_attach of each policy 285 * registers the policy in the local policy list. 286 */ 287 int 288 dsched_register(struct dsched_policy *d_policy) 289 { 290 struct dsched_policy *policy; 291 int error = 0; 292 293 lockmgr(&dsched_lock, LK_EXCLUSIVE); 294 295 policy = dsched_find_policy(d_policy->name); 296 297 if (!policy) { 298 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 299 atomic_add_int(&d_policy->ref_count, 1); 300 } else { 301 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 302 d_policy->name); 303 error = EEXIST; 304 } 305 306 lockmgr(&dsched_lock, LK_RELEASE); 307 return error; 308 } 309 310 /* 311 * Called from each module_detach of each policy 312 * unregisters the policy 313 */ 314 int 315 dsched_unregister(struct dsched_policy *d_policy) 316 { 317 struct dsched_policy *policy; 318 319 lockmgr(&dsched_lock, LK_EXCLUSIVE); 320 policy = dsched_find_policy(d_policy->name); 321 322 if (policy) { 323 if (policy->ref_count > 1) { 324 lockmgr(&dsched_lock, LK_RELEASE); 325 return EBUSY; 326 } 327 TAILQ_REMOVE(&dsched_policy_list, policy, link); 328 atomic_subtract_int(&policy->ref_count, 1); 329 KKASSERT(policy->ref_count == 0); 330 } 331 lockmgr(&dsched_lock, LK_RELEASE); 332 return 0; 333 } 334 335 336 /* 337 * switches the policy by first removing the old one and then 338 * enabling the new one. 339 */ 340 int 341 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 342 { 343 struct dsched_policy *old_policy; 344 345 /* If we are asked to set the same policy, do nothing */ 346 if (dp->d_sched_policy == new_policy) 347 return 0; 348 349 /* lock everything down, diskwise */ 350 lockmgr(&dsched_lock, LK_EXCLUSIVE); 351 old_policy = dp->d_sched_policy; 352 353 atomic_subtract_int(&old_policy->ref_count, 1); 354 KKASSERT(old_policy->ref_count >= 0); 355 356 dp->d_sched_policy = &dsched_noop_policy; 357 old_policy->teardown(dsched_get_disk_priv(dp)); 358 policy_destroy(dp); 359 360 /* Bring everything back to life */ 361 dsched_set_policy(dp, new_policy); 362 lockmgr(&dsched_lock, LK_RELEASE); 363 return 0; 364 } 365 366 367 /* 368 * Loads a given policy and attaches it to the specified disk. 369 * Also initializes the core for the policy 370 */ 371 void 372 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 373 { 374 int locked = 0; 375 376 /* Check if it is locked already. if not, we acquire the devfs lock */ 377 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 378 lockmgr(&dsched_lock, LK_EXCLUSIVE); 379 locked = 1; 380 } 381 382 policy_new(dp, new_policy); 383 new_policy->prepare(dsched_get_disk_priv(dp)); 384 dp->d_sched_policy = new_policy; 385 atomic_add_int(&new_policy->ref_count, 1); 386 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 387 new_policy->name); 388 389 /* If we acquired the lock, we also get rid of it */ 390 if (locked) 391 lockmgr(&dsched_lock, LK_RELEASE); 392 } 393 394 struct dsched_policy* 395 dsched_find_policy(char *search) 396 { 397 struct dsched_policy *policy; 398 struct dsched_policy *policy_found = NULL; 399 int locked = 0; 400 401 /* Check if it is locked already. if not, we acquire the devfs lock */ 402 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 403 lockmgr(&dsched_lock, LK_EXCLUSIVE); 404 locked = 1; 405 } 406 407 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 408 if (!strcmp(policy->name, search)) { 409 policy_found = policy; 410 break; 411 } 412 } 413 414 /* If we acquired the lock, we also get rid of it */ 415 if (locked) 416 lockmgr(&dsched_lock, LK_RELEASE); 417 418 return policy_found; 419 } 420 421 struct disk* 422 dsched_find_disk(char *search) 423 { 424 struct disk *dp_found = NULL; 425 struct disk *dp = NULL; 426 427 while((dp = disk_enumerate(dp))) { 428 if (!strcmp(dp->d_cdev->si_name, search)) { 429 dp_found = dp; 430 break; 431 } 432 } 433 434 return dp_found; 435 } 436 437 struct disk* 438 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 439 { 440 while ((dp = disk_enumerate(dp))) { 441 if (dp->d_sched_policy == policy) 442 return dp; 443 } 444 445 return NULL; 446 } 447 448 struct dsched_policy * 449 dsched_policy_enumerate(struct dsched_policy *pol) 450 { 451 if (!pol) 452 return (TAILQ_FIRST(&dsched_policy_list)); 453 else 454 return (TAILQ_NEXT(pol, link)); 455 } 456 457 void 458 dsched_cancel_bio(struct bio *bp) 459 { 460 bp->bio_buf->b_error = ENXIO; 461 bp->bio_buf->b_flags |= B_ERROR; 462 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 463 464 biodone(bp); 465 } 466 467 void 468 dsched_strategy_raw(struct disk *dp, struct bio *bp) 469 { 470 /* 471 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 472 * to avoid panics 473 */ 474 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 475 if(bp->bio_track != NULL) { 476 dsched_debug(LOG_INFO, 477 "dsched_strategy_raw sees non-NULL bio_track!! " 478 "bio: %p\n", bp); 479 bp->bio_track = NULL; 480 } 481 dev_dstrategy(dp->d_rawdev, bp); 482 } 483 484 void 485 dsched_strategy_sync(struct disk *dp, struct bio *bio) 486 { 487 struct buf *bp, *nbp; 488 struct bio *nbio; 489 490 bp = bio->bio_buf; 491 492 nbp = getpbuf(NULL); 493 nbio = &nbp->b_bio1; 494 495 nbp->b_cmd = bp->b_cmd; 496 nbp->b_bufsize = bp->b_bufsize; 497 nbp->b_runningbufspace = bp->b_runningbufspace; 498 nbp->b_bcount = bp->b_bcount; 499 nbp->b_resid = bp->b_resid; 500 nbp->b_data = bp->b_data; 501 #if 0 502 /* 503 * Buffers undergoing device I/O do not need a kvabase/size. 504 */ 505 nbp->b_kvabase = bp->b_kvabase; 506 nbp->b_kvasize = bp->b_kvasize; 507 #endif 508 nbp->b_dirtyend = bp->b_dirtyend; 509 510 nbio->bio_done = biodone_sync; 511 nbio->bio_flags |= BIO_SYNC; 512 nbio->bio_track = NULL; 513 514 nbio->bio_caller_info1.ptr = dp; 515 nbio->bio_offset = bio->bio_offset; 516 517 dev_dstrategy(dp->d_rawdev, nbio); 518 biowait(nbio, "dschedsync"); 519 bp->b_resid = nbp->b_resid; 520 bp->b_error = nbp->b_error; 521 biodone(bio); 522 #if 0 523 nbp->b_kvabase = NULL; 524 nbp->b_kvasize = 0; 525 #endif 526 relpbuf(nbp, NULL); 527 } 528 529 void 530 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 531 { 532 struct bio *nbio; 533 534 nbio = push_bio(bio); 535 nbio->bio_done = done; 536 nbio->bio_offset = bio->bio_offset; 537 538 dsched_set_bio_dp(nbio, dp); 539 dsched_set_bio_priv(nbio, priv); 540 541 getmicrotime(&nbio->bio_caller_info3.tv); 542 dev_dstrategy(dp->d_rawdev, nbio); 543 } 544 545 void 546 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 547 { 548 int refcount; 549 550 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 551 552 KKASSERT(refcount >= 0); 553 } 554 555 void 556 dsched_thread_io_ref(struct dsched_thread_io *tdio) 557 { 558 int refcount; 559 560 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 561 562 KKASSERT(refcount >= 0); 563 } 564 565 void 566 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 567 { 568 int refcount; 569 570 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 571 572 KKASSERT(refcount >= 0); 573 } 574 575 void 576 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 577 { 578 struct dsched_thread_io *tdio, *tdio2; 579 int refcount; 580 581 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 582 583 584 KKASSERT(refcount >= 0 || refcount <= -0x400); 585 586 if (refcount == 1) { 587 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 588 #if 0 589 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 590 print_backtrace(4); 591 #endif 592 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 593 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 594 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 595 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 596 dsched_thread_io_unref(tdio); 597 } 598 lockmgr(&diskctx->lock, LK_RELEASE); 599 if (diskctx->dp->d_sched_policy->destroy_diskctx) 600 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 601 objcache_put(dsched_diskctx_cache, diskctx); 602 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 603 } 604 } 605 606 void 607 dsched_thread_io_unref(struct dsched_thread_io *tdio) 608 { 609 struct dsched_thread_ctx *tdctx; 610 struct dsched_disk_ctx *diskctx; 611 int refcount; 612 613 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 614 615 KKASSERT(refcount >= 0 || refcount <= -0x400); 616 617 if (refcount == 1) { 618 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 619 #if 0 620 kprintf("tdio (%p) destruction started, trace:\n", tdio); 621 print_backtrace(8); 622 #endif 623 diskctx = tdio->diskctx; 624 KKASSERT(diskctx != NULL); 625 KKASSERT(tdio->qlength == 0); 626 627 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 628 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 629 630 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 631 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 632 633 lockmgr(&diskctx->lock, LK_RELEASE); 634 } 635 636 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 637 tdctx = tdio->tdctx; 638 KKASSERT(tdctx != NULL); 639 640 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 641 642 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 643 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 644 645 lockmgr(&tdctx->lock, LK_RELEASE); 646 } 647 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 648 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 649 objcache_put(dsched_tdio_cache, tdio); 650 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 651 #if 0 652 dsched_disk_ctx_unref(diskctx); 653 #endif 654 } 655 } 656 657 void 658 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 659 { 660 struct dsched_thread_io *tdio, *tdio2; 661 int refcount; 662 663 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 664 665 KKASSERT(refcount >= 0 || refcount <= -0x400); 666 667 if (refcount == 1) { 668 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 669 #if 0 670 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 671 print_backtrace(8); 672 #endif 673 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 674 675 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 676 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 677 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 678 dsched_thread_io_unref(tdio); 679 } 680 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 681 682 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 683 684 objcache_put(dsched_tdctx_cache, tdctx); 685 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 686 } 687 } 688 689 690 struct dsched_thread_io * 691 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 692 struct dsched_policy *pol) 693 { 694 struct dsched_thread_io *tdio; 695 #if 0 696 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 697 #endif 698 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 699 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 700 701 /* XXX: maybe we do need another ref for the disk list for tdio */ 702 dsched_thread_io_ref(tdio); 703 704 DSCHED_THREAD_IO_LOCKINIT(tdio); 705 tdio->dp = dp; 706 707 tdio->diskctx = dsched_get_disk_priv(dp); 708 TAILQ_INIT(&tdio->queue); 709 710 if (pol->new_tdio) 711 pol->new_tdio(tdio); 712 713 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 714 tdio->flags |= DSCHED_LINKED_DISK_CTX; 715 716 if (tdctx) { 717 tdio->tdctx = tdctx; 718 tdio->p = tdctx->p; 719 720 /* Put the tdio in the tdctx list */ 721 DSCHED_THREAD_CTX_LOCK(tdctx); 722 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 723 DSCHED_THREAD_CTX_UNLOCK(tdctx); 724 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 725 } 726 727 atomic_add_int(&dsched_stats.tdio_allocations, 1); 728 return tdio; 729 } 730 731 732 struct dsched_disk_ctx * 733 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 734 { 735 struct dsched_disk_ctx *diskctx; 736 737 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 738 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 739 dsched_disk_ctx_ref(diskctx); 740 diskctx->dp = dp; 741 DSCHED_DISK_CTX_LOCKINIT(diskctx); 742 TAILQ_INIT(&diskctx->tdio_list); 743 744 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 745 if (pol->new_diskctx) 746 pol->new_diskctx(diskctx); 747 return diskctx; 748 } 749 750 751 struct dsched_thread_ctx * 752 dsched_thread_ctx_alloc(struct proc *p) 753 { 754 struct dsched_thread_ctx *tdctx; 755 struct dsched_thread_io *tdio; 756 struct disk *dp = NULL; 757 758 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 759 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 760 dsched_thread_ctx_ref(tdctx); 761 #if 0 762 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 763 #endif 764 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 765 TAILQ_INIT(&tdctx->tdio_list); 766 tdctx->p = p; 767 768 /* XXX */ 769 while ((dp = disk_enumerate(dp))) { 770 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 771 } 772 773 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 774 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 775 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 776 777 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 778 /* XXX: no callback here */ 779 return tdctx; 780 } 781 782 void 783 policy_new(struct disk *dp, struct dsched_policy *pol) { 784 struct dsched_thread_ctx *tdctx; 785 struct dsched_disk_ctx *diskctx; 786 struct dsched_thread_io *tdio; 787 788 diskctx = dsched_disk_ctx_alloc(dp, pol); 789 dsched_disk_ctx_ref(diskctx); 790 dsched_set_disk_priv(dp, diskctx); 791 792 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 793 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 794 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 795 } 796 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 797 798 } 799 800 void 801 policy_destroy(struct disk *dp) { 802 struct dsched_disk_ctx *diskctx; 803 804 diskctx = dsched_get_disk_priv(dp); 805 KKASSERT(diskctx != NULL); 806 807 dsched_disk_ctx_unref(diskctx); /* from prepare */ 808 dsched_disk_ctx_unref(diskctx); /* from alloc */ 809 810 dsched_set_disk_priv(dp, NULL); 811 } 812 813 void 814 dsched_new_buf(struct buf *bp) 815 { 816 struct dsched_thread_ctx *tdctx = NULL; 817 818 if (dsched_inited == 0) 819 return; 820 821 if (curproc != NULL) { 822 tdctx = dsched_get_proc_priv(curproc); 823 } else { 824 /* This is a kernel thread, so no proc info is available */ 825 tdctx = dsched_get_thread_priv(curthread); 826 } 827 828 #if 0 829 /* 830 * XXX: hack. we don't want this assert because we aren't catching all 831 * threads. mi_startup() is still getting away without an tdctx. 832 */ 833 834 /* by now we should have an tdctx. if not, something bad is going on */ 835 KKASSERT(tdctx != NULL); 836 #endif 837 838 if (tdctx) { 839 dsched_thread_ctx_ref(tdctx); 840 } 841 dsched_set_buf_priv(bp, tdctx); 842 } 843 844 void 845 dsched_exit_buf(struct buf *bp) 846 { 847 struct dsched_thread_ctx *tdctx; 848 849 tdctx = dsched_get_buf_priv(bp); 850 if (tdctx != NULL) { 851 dsched_clr_buf_priv(bp); 852 dsched_thread_ctx_unref(tdctx); 853 } 854 } 855 856 void 857 dsched_new_proc(struct proc *p) 858 { 859 struct dsched_thread_ctx *tdctx; 860 861 if (dsched_inited == 0) 862 return; 863 864 KKASSERT(p != NULL); 865 866 tdctx = dsched_thread_ctx_alloc(p); 867 tdctx->p = p; 868 dsched_thread_ctx_ref(tdctx); 869 870 dsched_set_proc_priv(p, tdctx); 871 atomic_add_int(&dsched_stats.nprocs, 1); 872 } 873 874 875 void 876 dsched_new_thread(struct thread *td) 877 { 878 struct dsched_thread_ctx *tdctx; 879 880 if (dsched_inited == 0) 881 return; 882 883 KKASSERT(td != NULL); 884 885 tdctx = dsched_thread_ctx_alloc(NULL); 886 tdctx->td = td; 887 dsched_thread_ctx_ref(tdctx); 888 889 dsched_set_thread_priv(td, tdctx); 890 atomic_add_int(&dsched_stats.nthreads, 1); 891 } 892 893 void 894 dsched_exit_proc(struct proc *p) 895 { 896 struct dsched_thread_ctx *tdctx; 897 898 if (dsched_inited == 0) 899 return; 900 901 KKASSERT(p != NULL); 902 903 tdctx = dsched_get_proc_priv(p); 904 KKASSERT(tdctx != NULL); 905 906 tdctx->dead = 0xDEAD; 907 dsched_set_proc_priv(p, 0); 908 909 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 910 dsched_thread_ctx_unref(tdctx); /* one for ref */ 911 atomic_subtract_int(&dsched_stats.nprocs, 1); 912 } 913 914 915 void 916 dsched_exit_thread(struct thread *td) 917 { 918 struct dsched_thread_ctx *tdctx; 919 920 if (dsched_inited == 0) 921 return; 922 923 KKASSERT(td != NULL); 924 925 tdctx = dsched_get_thread_priv(td); 926 KKASSERT(tdctx != NULL); 927 928 tdctx->dead = 0xDEAD; 929 dsched_set_thread_priv(td, 0); 930 931 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 932 dsched_thread_ctx_unref(tdctx); /* one for ref */ 933 atomic_subtract_int(&dsched_stats.nthreads, 1); 934 } 935 936 struct dsched_thread_io * 937 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 938 struct dsched_policy *pol) { 939 struct dsched_thread_ctx *tdctx; 940 struct dsched_thread_io *tdio; 941 942 tdctx = dsched_get_thread_priv(curthread); 943 KKASSERT(tdctx != NULL); 944 945 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol); 946 return tdio; 947 } 948 949 /* DEFAULT NOOP POLICY */ 950 951 static int 952 noop_prepare(struct dsched_disk_ctx *diskctx) 953 { 954 return 0; 955 } 956 957 static void 958 noop_teardown(struct dsched_disk_ctx *diskctx) 959 { 960 961 } 962 963 static void 964 noop_cancel(struct dsched_disk_ctx *diskctx) 965 { 966 967 } 968 969 static int 970 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 971 struct bio *bio) 972 { 973 dsched_strategy_raw(diskctx->dp, bio); 974 #if 0 975 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 976 #endif 977 return 0; 978 } 979 980 /* 981 * SYSINIT stuff 982 */ 983 static void 984 dsched_init(void) 985 { 986 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 987 NULL, NULL, NULL, 988 objcache_malloc_alloc, 989 objcache_malloc_free, 990 &dsched_thread_io_malloc_args ); 991 992 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 993 NULL, NULL, NULL, 994 objcache_malloc_alloc, 995 objcache_malloc_free, 996 &dsched_thread_ctx_malloc_args ); 997 998 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 999 NULL, NULL, NULL, 1000 objcache_malloc_alloc, 1001 objcache_malloc_free, 1002 &dsched_disk_ctx_malloc_args ); 1003 1004 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1005 1006 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1007 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1008 1009 dsched_register(&dsched_noop_policy); 1010 1011 dsched_inited = 1; 1012 } 1013 1014 static void 1015 dsched_uninit(void) 1016 { 1017 } 1018 1019 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1020 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1021 1022 /* 1023 * SYSCTL stuff 1024 */ 1025 static int 1026 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1027 { 1028 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1029 } 1030 1031 static int 1032 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1033 { 1034 struct dsched_policy *pol = NULL; 1035 int error, first = 1; 1036 1037 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1038 1039 while ((pol = dsched_policy_enumerate(pol))) { 1040 if (!first) { 1041 error = SYSCTL_OUT(req, " ", 1); 1042 if (error) 1043 break; 1044 } else { 1045 first = 0; 1046 } 1047 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1048 if (error) 1049 break; 1050 1051 } 1052 1053 lockmgr(&dsched_lock, LK_RELEASE); 1054 1055 error = SYSCTL_OUT(req, "", 1); 1056 1057 return error; 1058 } 1059 1060 static int 1061 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1062 { 1063 char buf[DSCHED_POLICY_NAME_LENGTH]; 1064 struct dsched_disk_ctx *diskctx = arg1; 1065 struct dsched_policy *pol = NULL; 1066 int error; 1067 1068 if (diskctx == NULL) { 1069 return 0; 1070 } 1071 1072 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1073 1074 pol = diskctx->dp->d_sched_policy; 1075 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1076 1077 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1078 if (error || req->newptr == NULL) { 1079 lockmgr(&dsched_lock, LK_RELEASE); 1080 return (error); 1081 } 1082 1083 pol = dsched_find_policy(buf); 1084 if (pol == NULL) { 1085 lockmgr(&dsched_lock, LK_RELEASE); 1086 return 0; 1087 } 1088 1089 dsched_switch(diskctx->dp, pol); 1090 1091 lockmgr(&dsched_lock, LK_RELEASE); 1092 1093 return error; 1094 } 1095 1096 static int 1097 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1098 { 1099 char buf[DSCHED_POLICY_NAME_LENGTH]; 1100 struct dsched_policy *pol = NULL; 1101 int error; 1102 1103 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1104 1105 pol = default_policy; 1106 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1107 1108 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1109 if (error || req->newptr == NULL) { 1110 lockmgr(&dsched_lock, LK_RELEASE); 1111 return (error); 1112 } 1113 1114 pol = dsched_find_policy(buf); 1115 if (pol == NULL) { 1116 lockmgr(&dsched_lock, LK_RELEASE); 1117 return 0; 1118 } 1119 1120 default_set = 1; 1121 default_policy = pol; 1122 1123 lockmgr(&dsched_lock, LK_RELEASE); 1124 1125 return error; 1126 } 1127 1128 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1129 "Disk Scheduler Framework (dsched) magic"); 1130 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1131 "List of disks and their policies"); 1132 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1133 0, "Enable dsched debugging"); 1134 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1135 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1136 "dsched statistics"); 1137 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1138 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1139 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1140 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1141 1142 static void 1143 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1144 { 1145 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1146 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1147 sysctl_ctx_init(&diskctx->sysctl_ctx); 1148 } 1149 1150 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1151 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1152 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1153 } 1154