1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t default_prepare; 58 static dsched_teardown_t default_teardown; 59 static dsched_cancel_t default_cancel; 60 static dsched_queue_t default_queue; 61 62 static d_open_t dsched_dev_open; 63 static d_close_t dsched_dev_close; 64 static d_ioctl_t dsched_dev_ioctl; 65 66 static int dsched_dev_list_disks(struct dsched_ioctl *data); 67 static int dsched_dev_list_disk(struct dsched_ioctl *data); 68 static int dsched_dev_list_policies(struct dsched_ioctl *data); 69 static int dsched_dev_handle_switch(char *disk, char *policy); 70 71 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 72 73 static int dsched_inited = 0; 74 75 struct lock dsched_lock; 76 static int dsched_debug_enable = 0; 77 static cdev_t dsched_dev; 78 79 struct dsched_stats dsched_stats; 80 81 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 82 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 83 struct objcache_malloc_args dsched_thread_io_malloc_args = { 84 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 85 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 86 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 87 88 static struct objcache *dsched_diskctx_cache; 89 static struct objcache *dsched_tdctx_cache; 90 static struct objcache *dsched_tdio_cache; 91 92 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 93 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 94 95 struct lock dsched_tdctx_lock; 96 97 static struct dsched_policy_head dsched_policy_list = 98 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 99 100 static struct dsched_policy dsched_default_policy = { 101 .name = "noop", 102 103 .prepare = default_prepare, 104 .teardown = default_teardown, 105 .cancel_all = default_cancel, 106 .bio_queue = default_queue 107 }; 108 109 static struct dev_ops dsched_dev_ops = { 110 { "dsched", 0, 0 }, 111 .d_open = dsched_dev_open, 112 .d_close = dsched_dev_close, 113 .d_ioctl = dsched_dev_ioctl 114 }; 115 116 /* 117 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 118 * using kvprintf 119 */ 120 int 121 dsched_debug(int level, char *fmt, ...) 122 { 123 __va_list ap; 124 125 __va_start(ap, fmt); 126 if (level <= dsched_debug_enable) 127 kvprintf(fmt, ap); 128 __va_end(ap); 129 130 return 0; 131 } 132 133 /* 134 * Called on disk_create() 135 * tries to read which policy to use from loader.conf, if there's 136 * none specified, the default policy is used. 137 */ 138 void 139 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 140 { 141 char tunable_key[SPECNAMELEN + 48]; 142 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 143 struct dsched_policy *policy = NULL; 144 145 /* Also look for serno stuff? */ 146 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 147 lockmgr(&dsched_lock, LK_EXCLUSIVE); 148 149 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 150 head_name, unit); 151 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 152 sizeof(sched_policy)) != 0) { 153 policy = dsched_find_policy(sched_policy); 154 } 155 156 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 157 head_name); 158 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 159 sizeof(sched_policy)) != 0)) { 160 policy = dsched_find_policy(sched_policy); 161 } 162 163 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 164 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 165 sizeof(sched_policy)) != 0)) { 166 policy = dsched_find_policy(sched_policy); 167 } 168 169 if (!policy) { 170 dsched_debug(0, "No policy for %s%d specified, " 171 "or policy not found\n", head_name, unit); 172 dsched_set_policy(dp, &dsched_default_policy); 173 } else { 174 dsched_set_policy(dp, policy); 175 } 176 177 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 178 dsched_sysctl_add_disk( 179 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 180 tunable_key); 181 182 lockmgr(&dsched_lock, LK_RELEASE); 183 } 184 185 /* 186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 187 * there's any policy associated with the serial number of the device. 188 */ 189 void 190 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 191 { 192 char tunable_key[SPECNAMELEN + 48]; 193 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 194 struct dsched_policy *policy = NULL; 195 196 if (info->d_serialno == NULL) 197 return; 198 199 lockmgr(&dsched_lock, LK_EXCLUSIVE); 200 201 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 202 info->d_serialno); 203 204 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 205 sizeof(sched_policy)) != 0)) { 206 policy = dsched_find_policy(sched_policy); 207 } 208 209 if (policy) { 210 dsched_switch(dp, policy); 211 } 212 213 dsched_sysctl_add_disk( 214 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 215 info->d_serialno); 216 217 lockmgr(&dsched_lock, LK_RELEASE); 218 } 219 220 /* 221 * Called on disk_destroy() 222 * shuts down the scheduler core and cancels all remaining bios 223 */ 224 void 225 dsched_disk_destroy_callback(struct disk *dp) 226 { 227 struct dsched_policy *old_policy; 228 struct dsched_disk_ctx *diskctx; 229 230 lockmgr(&dsched_lock, LK_EXCLUSIVE); 231 232 diskctx = dsched_get_disk_priv(dp); 233 234 old_policy = dp->d_sched_policy; 235 dp->d_sched_policy = &dsched_default_policy; 236 old_policy->cancel_all(dsched_get_disk_priv(dp)); 237 old_policy->teardown(dsched_get_disk_priv(dp)); 238 239 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 240 sysctl_ctx_free(&diskctx->sysctl_ctx); 241 242 policy_destroy(dp); 243 atomic_subtract_int(&old_policy->ref_count, 1); 244 KKASSERT(old_policy->ref_count >= 0); 245 246 lockmgr(&dsched_lock, LK_RELEASE); 247 } 248 249 250 void 251 dsched_queue(struct disk *dp, struct bio *bio) 252 { 253 struct dsched_thread_ctx *tdctx; 254 struct dsched_thread_io *tdio; 255 struct dsched_disk_ctx *diskctx; 256 257 int found = 0, error = 0; 258 259 tdctx = dsched_get_buf_priv(bio->bio_buf); 260 if (tdctx == NULL) { 261 /* We don't handle this case, let dsched dispatch */ 262 atomic_add_int(&dsched_stats.no_tdctx, 1); 263 dsched_strategy_raw(dp, bio); 264 return; 265 } 266 267 DSCHED_THREAD_CTX_LOCK(tdctx); 268 269 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 270 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 271 if (tdio->dp == dp) { 272 dsched_thread_io_ref(tdio); 273 found = 1; 274 break; 275 } 276 } 277 278 DSCHED_THREAD_CTX_UNLOCK(tdctx); 279 dsched_clr_buf_priv(bio->bio_buf); 280 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 281 282 KKASSERT(found == 1); 283 diskctx = dsched_get_disk_priv(dp); 284 dsched_disk_ctx_ref(diskctx); 285 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 286 287 if (error) { 288 dsched_strategy_raw(dp, bio); 289 } 290 dsched_disk_ctx_unref(diskctx); 291 dsched_thread_io_unref(tdio); 292 } 293 294 295 /* 296 * Called from each module_init or module_attach of each policy 297 * registers the policy in the local policy list. 298 */ 299 int 300 dsched_register(struct dsched_policy *d_policy) 301 { 302 struct dsched_policy *policy; 303 int error = 0; 304 305 lockmgr(&dsched_lock, LK_EXCLUSIVE); 306 307 policy = dsched_find_policy(d_policy->name); 308 309 if (!policy) { 310 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 311 atomic_add_int(&d_policy->ref_count, 1); 312 } else { 313 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 314 d_policy->name); 315 error = EEXIST; 316 } 317 318 lockmgr(&dsched_lock, LK_RELEASE); 319 return error; 320 } 321 322 /* 323 * Called from each module_detach of each policy 324 * unregisters the policy 325 */ 326 int 327 dsched_unregister(struct dsched_policy *d_policy) 328 { 329 struct dsched_policy *policy; 330 331 lockmgr(&dsched_lock, LK_EXCLUSIVE); 332 policy = dsched_find_policy(d_policy->name); 333 334 if (policy) { 335 if (policy->ref_count > 1) { 336 lockmgr(&dsched_lock, LK_RELEASE); 337 return EBUSY; 338 } 339 TAILQ_REMOVE(&dsched_policy_list, policy, link); 340 atomic_subtract_int(&policy->ref_count, 1); 341 KKASSERT(policy->ref_count == 0); 342 } 343 lockmgr(&dsched_lock, LK_RELEASE); 344 return 0; 345 } 346 347 348 /* 349 * switches the policy by first removing the old one and then 350 * enabling the new one. 351 */ 352 int 353 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 354 { 355 struct dsched_policy *old_policy; 356 357 /* If we are asked to set the same policy, do nothing */ 358 if (dp->d_sched_policy == new_policy) 359 return 0; 360 361 /* lock everything down, diskwise */ 362 lockmgr(&dsched_lock, LK_EXCLUSIVE); 363 old_policy = dp->d_sched_policy; 364 365 atomic_subtract_int(&old_policy->ref_count, 1); 366 KKASSERT(old_policy->ref_count >= 0); 367 368 dp->d_sched_policy = &dsched_default_policy; 369 old_policy->teardown(dsched_get_disk_priv(dp)); 370 policy_destroy(dp); 371 372 /* Bring everything back to life */ 373 dsched_set_policy(dp, new_policy); 374 lockmgr(&dsched_lock, LK_RELEASE); 375 return 0; 376 } 377 378 379 /* 380 * Loads a given policy and attaches it to the specified disk. 381 * Also initializes the core for the policy 382 */ 383 void 384 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 385 { 386 int locked = 0; 387 388 /* Check if it is locked already. if not, we acquire the devfs lock */ 389 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 390 lockmgr(&dsched_lock, LK_EXCLUSIVE); 391 locked = 1; 392 } 393 394 policy_new(dp, new_policy); 395 new_policy->prepare(dsched_get_disk_priv(dp)); 396 dp->d_sched_policy = new_policy; 397 atomic_add_int(&new_policy->ref_count, 1); 398 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 399 new_policy->name); 400 401 /* If we acquired the lock, we also get rid of it */ 402 if (locked) 403 lockmgr(&dsched_lock, LK_RELEASE); 404 } 405 406 struct dsched_policy* 407 dsched_find_policy(char *search) 408 { 409 struct dsched_policy *policy; 410 struct dsched_policy *policy_found = NULL; 411 int locked = 0; 412 413 /* Check if it is locked already. if not, we acquire the devfs lock */ 414 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 415 lockmgr(&dsched_lock, LK_EXCLUSIVE); 416 locked = 1; 417 } 418 419 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 420 if (!strcmp(policy->name, search)) { 421 policy_found = policy; 422 break; 423 } 424 } 425 426 /* If we acquired the lock, we also get rid of it */ 427 if (locked) 428 lockmgr(&dsched_lock, LK_RELEASE); 429 430 return policy_found; 431 } 432 433 struct disk* 434 dsched_find_disk(char *search) 435 { 436 struct disk *dp_found = NULL; 437 struct disk *dp = NULL; 438 439 while((dp = disk_enumerate(dp))) { 440 if (!strcmp(dp->d_cdev->si_name, search)) { 441 dp_found = dp; 442 break; 443 } 444 } 445 446 return dp_found; 447 } 448 449 struct disk* 450 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 451 { 452 while ((dp = disk_enumerate(dp))) { 453 if (dp->d_sched_policy == policy) 454 return dp; 455 } 456 457 return NULL; 458 } 459 460 struct dsched_policy * 461 dsched_policy_enumerate(struct dsched_policy *pol) 462 { 463 if (!pol) 464 return (TAILQ_FIRST(&dsched_policy_list)); 465 else 466 return (TAILQ_NEXT(pol, link)); 467 } 468 469 void 470 dsched_cancel_bio(struct bio *bp) 471 { 472 bp->bio_buf->b_error = ENXIO; 473 bp->bio_buf->b_flags |= B_ERROR; 474 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 475 476 biodone(bp); 477 } 478 479 void 480 dsched_strategy_raw(struct disk *dp, struct bio *bp) 481 { 482 /* 483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 484 * to avoid panics 485 */ 486 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 487 if(bp->bio_track != NULL) { 488 dsched_debug(LOG_INFO, 489 "dsched_strategy_raw sees non-NULL bio_track!! " 490 "bio: %p\n", bp); 491 bp->bio_track = NULL; 492 } 493 dev_dstrategy(dp->d_rawdev, bp); 494 } 495 496 void 497 dsched_strategy_sync(struct disk *dp, struct bio *bio) 498 { 499 struct buf *bp, *nbp; 500 struct bio *nbio; 501 502 bp = bio->bio_buf; 503 504 nbp = getpbuf(NULL); 505 nbio = &nbp->b_bio1; 506 507 nbp->b_cmd = bp->b_cmd; 508 nbp->b_bufsize = bp->b_bufsize; 509 nbp->b_runningbufspace = bp->b_runningbufspace; 510 nbp->b_bcount = bp->b_bcount; 511 nbp->b_resid = bp->b_resid; 512 nbp->b_data = bp->b_data; 513 nbp->b_kvabase = bp->b_kvabase; 514 nbp->b_kvasize = bp->b_kvasize; 515 nbp->b_dirtyend = bp->b_dirtyend; 516 517 nbio->bio_done = biodone_sync; 518 nbio->bio_flags |= BIO_SYNC; 519 nbio->bio_track = NULL; 520 521 nbio->bio_caller_info1.ptr = dp; 522 nbio->bio_offset = bio->bio_offset; 523 524 dev_dstrategy(dp->d_rawdev, nbio); 525 biowait(nbio, "dschedsync"); 526 bp->b_resid = nbp->b_resid; 527 bp->b_error = nbp->b_error; 528 biodone(bio); 529 relpbuf(nbp, NULL); 530 } 531 532 void 533 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 534 { 535 struct bio *nbio; 536 537 nbio = push_bio(bio); 538 nbio->bio_done = done; 539 nbio->bio_offset = bio->bio_offset; 540 541 dsched_set_bio_dp(nbio, dp); 542 dsched_set_bio_priv(nbio, priv); 543 544 getmicrotime(&nbio->bio_caller_info3.tv); 545 dev_dstrategy(dp->d_rawdev, nbio); 546 } 547 548 void 549 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 550 { 551 int refcount; 552 553 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 554 555 KKASSERT(refcount >= 0); 556 } 557 558 void 559 dsched_thread_io_ref(struct dsched_thread_io *tdio) 560 { 561 int refcount; 562 563 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 564 565 KKASSERT(refcount >= 0); 566 } 567 568 void 569 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 570 { 571 int refcount; 572 573 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 574 575 KKASSERT(refcount >= 0); 576 } 577 578 void 579 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 580 { 581 struct dsched_thread_io *tdio, *tdio2; 582 int refcount; 583 584 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 585 586 587 KKASSERT(refcount >= 0 || refcount <= -0x400); 588 589 if (refcount == 1) { 590 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 591 #if 0 592 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 593 print_backtrace(4); 594 #endif 595 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 596 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 597 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 598 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 599 dsched_thread_io_unref(tdio); 600 } 601 lockmgr(&diskctx->lock, LK_RELEASE); 602 if (diskctx->dp->d_sched_policy->destroy_diskctx) 603 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 604 objcache_put(dsched_diskctx_cache, diskctx); 605 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 606 } 607 } 608 609 void 610 dsched_thread_io_unref(struct dsched_thread_io *tdio) 611 { 612 struct dsched_thread_ctx *tdctx; 613 struct dsched_disk_ctx *diskctx; 614 int refcount; 615 616 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 617 618 KKASSERT(refcount >= 0 || refcount <= -0x400); 619 620 if (refcount == 1) { 621 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 622 #if 0 623 kprintf("tdio (%p) destruction started, trace:\n", tdio); 624 print_backtrace(8); 625 #endif 626 diskctx = tdio->diskctx; 627 KKASSERT(diskctx != NULL); 628 KKASSERT(tdio->qlength == 0); 629 630 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 631 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 632 633 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 634 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 635 636 lockmgr(&diskctx->lock, LK_RELEASE); 637 } 638 639 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 640 tdctx = tdio->tdctx; 641 KKASSERT(tdctx != NULL); 642 643 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 644 645 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 646 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 647 648 lockmgr(&tdctx->lock, LK_RELEASE); 649 } 650 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 651 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 652 objcache_put(dsched_tdio_cache, tdio); 653 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 654 #if 0 655 dsched_disk_ctx_unref(diskctx); 656 #endif 657 } 658 } 659 660 void 661 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 662 { 663 struct dsched_thread_io *tdio, *tdio2; 664 int refcount; 665 666 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 667 668 KKASSERT(refcount >= 0 || refcount <= -0x400); 669 670 if (refcount == 1) { 671 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 672 #if 0 673 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 674 print_backtrace(8); 675 #endif 676 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 677 678 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 679 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 680 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 681 dsched_thread_io_unref(tdio); 682 } 683 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 684 685 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 686 687 objcache_put(dsched_tdctx_cache, tdctx); 688 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 689 } 690 } 691 692 693 struct dsched_thread_io * 694 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 695 struct dsched_policy *pol) 696 { 697 struct dsched_thread_io *tdio; 698 #if 0 699 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 700 #endif 701 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 702 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 703 704 /* XXX: maybe we do need another ref for the disk list for tdio */ 705 dsched_thread_io_ref(tdio); 706 707 DSCHED_THREAD_IO_LOCKINIT(tdio); 708 tdio->dp = dp; 709 710 tdio->diskctx = dsched_get_disk_priv(dp); 711 TAILQ_INIT(&tdio->queue); 712 713 if (pol->new_tdio) 714 pol->new_tdio(tdio); 715 716 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 717 tdio->flags |= DSCHED_LINKED_DISK_CTX; 718 719 if (tdctx) { 720 tdio->tdctx = tdctx; 721 tdio->p = tdctx->p; 722 723 /* Put the tdio in the tdctx list */ 724 DSCHED_THREAD_CTX_LOCK(tdctx); 725 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 726 DSCHED_THREAD_CTX_UNLOCK(tdctx); 727 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 728 } 729 730 atomic_add_int(&dsched_stats.tdio_allocations, 1); 731 return tdio; 732 } 733 734 735 struct dsched_disk_ctx * 736 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 737 { 738 struct dsched_disk_ctx *diskctx; 739 740 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 741 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 742 dsched_disk_ctx_ref(diskctx); 743 diskctx->dp = dp; 744 DSCHED_DISK_CTX_LOCKINIT(diskctx); 745 TAILQ_INIT(&diskctx->tdio_list); 746 747 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 748 if (pol->new_diskctx) 749 pol->new_diskctx(diskctx); 750 return diskctx; 751 } 752 753 754 struct dsched_thread_ctx * 755 dsched_thread_ctx_alloc(struct proc *p) 756 { 757 struct dsched_thread_ctx *tdctx; 758 struct dsched_thread_io *tdio; 759 struct disk *dp = NULL; 760 761 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 762 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 763 dsched_thread_ctx_ref(tdctx); 764 #if 0 765 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 766 #endif 767 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 768 TAILQ_INIT(&tdctx->tdio_list); 769 tdctx->p = p; 770 771 /* XXX */ 772 while ((dp = disk_enumerate(dp))) { 773 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 774 } 775 776 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 777 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 778 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 779 780 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 781 /* XXX: no callback here */ 782 return tdctx; 783 } 784 785 void 786 policy_new(struct disk *dp, struct dsched_policy *pol) { 787 struct dsched_thread_ctx *tdctx; 788 struct dsched_disk_ctx *diskctx; 789 struct dsched_thread_io *tdio; 790 791 diskctx = dsched_disk_ctx_alloc(dp, pol); 792 dsched_disk_ctx_ref(diskctx); 793 dsched_set_disk_priv(dp, diskctx); 794 795 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 796 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 797 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 798 } 799 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 800 801 } 802 803 void 804 policy_destroy(struct disk *dp) { 805 struct dsched_disk_ctx *diskctx; 806 807 diskctx = dsched_get_disk_priv(dp); 808 KKASSERT(diskctx != NULL); 809 810 dsched_disk_ctx_unref(diskctx); /* from prepare */ 811 dsched_disk_ctx_unref(diskctx); /* from alloc */ 812 813 dsched_set_disk_priv(dp, NULL); 814 } 815 816 void 817 dsched_new_buf(struct buf *bp) 818 { 819 struct dsched_thread_ctx *tdctx = NULL; 820 821 if (dsched_inited == 0) 822 return; 823 824 if (curproc != NULL) { 825 tdctx = dsched_get_proc_priv(curproc); 826 } else { 827 /* This is a kernel thread, so no proc info is available */ 828 tdctx = dsched_get_thread_priv(curthread); 829 } 830 831 #if 0 832 /* 833 * XXX: hack. we don't want this assert because we aren't catching all 834 * threads. mi_startup() is still getting away without an tdctx. 835 */ 836 837 /* by now we should have an tdctx. if not, something bad is going on */ 838 KKASSERT(tdctx != NULL); 839 #endif 840 841 if (tdctx) { 842 dsched_thread_ctx_ref(tdctx); 843 } 844 dsched_set_buf_priv(bp, tdctx); 845 } 846 847 void 848 dsched_exit_buf(struct buf *bp) 849 { 850 struct dsched_thread_ctx *tdctx; 851 852 tdctx = dsched_get_buf_priv(bp); 853 if (tdctx != NULL) { 854 dsched_clr_buf_priv(bp); 855 dsched_thread_ctx_unref(tdctx); 856 } 857 } 858 859 void 860 dsched_new_proc(struct proc *p) 861 { 862 struct dsched_thread_ctx *tdctx; 863 864 if (dsched_inited == 0) 865 return; 866 867 KKASSERT(p != NULL); 868 869 tdctx = dsched_thread_ctx_alloc(p); 870 tdctx->p = p; 871 dsched_thread_ctx_ref(tdctx); 872 873 dsched_set_proc_priv(p, tdctx); 874 atomic_add_int(&dsched_stats.nprocs, 1); 875 } 876 877 878 void 879 dsched_new_thread(struct thread *td) 880 { 881 struct dsched_thread_ctx *tdctx; 882 883 if (dsched_inited == 0) 884 return; 885 886 KKASSERT(td != NULL); 887 888 tdctx = dsched_thread_ctx_alloc(NULL); 889 tdctx->td = td; 890 dsched_thread_ctx_ref(tdctx); 891 892 dsched_set_thread_priv(td, tdctx); 893 atomic_add_int(&dsched_stats.nthreads, 1); 894 } 895 896 void 897 dsched_exit_proc(struct proc *p) 898 { 899 struct dsched_thread_ctx *tdctx; 900 901 if (dsched_inited == 0) 902 return; 903 904 KKASSERT(p != NULL); 905 906 tdctx = dsched_get_proc_priv(p); 907 KKASSERT(tdctx != NULL); 908 909 tdctx->dead = 0xDEAD; 910 dsched_set_proc_priv(p, 0); 911 912 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 913 dsched_thread_ctx_unref(tdctx); /* one for ref */ 914 atomic_subtract_int(&dsched_stats.nprocs, 1); 915 } 916 917 918 void 919 dsched_exit_thread(struct thread *td) 920 { 921 struct dsched_thread_ctx *tdctx; 922 923 if (dsched_inited == 0) 924 return; 925 926 KKASSERT(td != NULL); 927 928 tdctx = dsched_get_thread_priv(td); 929 KKASSERT(tdctx != NULL); 930 931 tdctx->dead = 0xDEAD; 932 dsched_set_thread_priv(td, 0); 933 934 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 935 dsched_thread_ctx_unref(tdctx); /* one for ref */ 936 atomic_subtract_int(&dsched_stats.nthreads, 1); 937 } 938 939 /* DEFAULT NOOP POLICY */ 940 941 static int 942 default_prepare(struct dsched_disk_ctx *diskctx) 943 { 944 return 0; 945 } 946 947 static void 948 default_teardown(struct dsched_disk_ctx *diskctx) 949 { 950 951 } 952 953 static void 954 default_cancel(struct dsched_disk_ctx *diskctx) 955 { 956 957 } 958 959 static int 960 default_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 961 struct bio *bio) 962 { 963 dsched_strategy_raw(diskctx->dp, bio); 964 #if 0 965 dsched_strategy_async(diskctx->dp, bio, default_completed, NULL); 966 #endif 967 return 0; 968 } 969 970 971 /* 972 * dsched device stuff 973 */ 974 975 static int 976 dsched_dev_list_disks(struct dsched_ioctl *data) 977 { 978 struct disk *dp = NULL; 979 uint32_t i; 980 981 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++); 982 983 if (dp == NULL) 984 return -1; 985 986 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name)); 987 988 if (dp->d_sched_policy) { 989 strncpy(data->pol_name, dp->d_sched_policy->name, 990 sizeof(data->pol_name)); 991 } else { 992 strncpy(data->pol_name, "N/A (error)", 12); 993 } 994 995 return 0; 996 } 997 998 static int 999 dsched_dev_list_disk(struct dsched_ioctl *data) 1000 { 1001 struct disk *dp = NULL; 1002 int found = 0; 1003 1004 while ((dp = disk_enumerate(dp))) { 1005 if (!strncmp(dp->d_cdev->si_name, data->dev_name, 1006 sizeof(data->dev_name))) { 1007 KKASSERT(dp->d_sched_policy != NULL); 1008 1009 found = 1; 1010 strncpy(data->pol_name, dp->d_sched_policy->name, 1011 sizeof(data->pol_name)); 1012 break; 1013 } 1014 } 1015 if (!found) 1016 return -1; 1017 1018 return 0; 1019 } 1020 1021 static int 1022 dsched_dev_list_policies(struct dsched_ioctl *data) 1023 { 1024 struct dsched_policy *pol = NULL; 1025 uint32_t i; 1026 1027 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++); 1028 1029 if (pol == NULL) 1030 return -1; 1031 1032 strncpy(data->pol_name, pol->name, sizeof(data->pol_name)); 1033 return 0; 1034 } 1035 1036 static int 1037 dsched_dev_handle_switch(char *disk, char *policy) 1038 { 1039 struct disk *dp; 1040 struct dsched_policy *pol; 1041 1042 dp = dsched_find_disk(disk); 1043 pol = dsched_find_policy(policy); 1044 1045 if ((dp == NULL) || (pol == NULL)) 1046 return -1; 1047 1048 return (dsched_switch(dp, pol)); 1049 } 1050 1051 static int 1052 dsched_dev_open(struct dev_open_args *ap) 1053 { 1054 /* 1055 * Only allow read-write access. 1056 */ 1057 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0)) 1058 return(EPERM); 1059 1060 /* 1061 * We don't allow nonblocking access. 1062 */ 1063 if ((ap->a_oflags & O_NONBLOCK) != 0) { 1064 kprintf("dsched_dev: can't do nonblocking access\n"); 1065 return(ENODEV); 1066 } 1067 1068 return 0; 1069 } 1070 1071 static int 1072 dsched_dev_close(struct dev_close_args *ap) 1073 { 1074 return 0; 1075 } 1076 1077 static int 1078 dsched_dev_ioctl(struct dev_ioctl_args *ap) 1079 { 1080 int error; 1081 struct dsched_ioctl *data; 1082 1083 error = 0; 1084 data = (struct dsched_ioctl *)ap->a_data; 1085 1086 switch(ap->a_cmd) { 1087 case DSCHED_SET_DEVICE_POLICY: 1088 if (dsched_dev_handle_switch(data->dev_name, data->pol_name)) 1089 error = ENOENT; /* No such file or directory */ 1090 break; 1091 1092 case DSCHED_LIST_DISK: 1093 if (dsched_dev_list_disk(data) != 0) { 1094 error = EINVAL; /* Invalid argument */ 1095 } 1096 break; 1097 1098 case DSCHED_LIST_DISKS: 1099 if (dsched_dev_list_disks(data) != 0) { 1100 error = EINVAL; /* Invalid argument */ 1101 } 1102 break; 1103 1104 case DSCHED_LIST_POLICIES: 1105 if (dsched_dev_list_policies(data) != 0) { 1106 error = EINVAL; /* Invalid argument */ 1107 } 1108 break; 1109 1110 1111 default: 1112 error = ENOTTY; /* Inappropriate ioctl for device */ 1113 break; 1114 } 1115 1116 return(error); 1117 } 1118 1119 1120 1121 1122 1123 1124 /* 1125 * SYSINIT stuff 1126 */ 1127 1128 1129 static void 1130 dsched_init(void) 1131 { 1132 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1133 NULL, NULL, NULL, 1134 objcache_malloc_alloc, 1135 objcache_malloc_free, 1136 &dsched_thread_io_malloc_args ); 1137 1138 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1139 NULL, NULL, NULL, 1140 objcache_malloc_alloc, 1141 objcache_malloc_free, 1142 &dsched_thread_ctx_malloc_args ); 1143 1144 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1145 NULL, NULL, NULL, 1146 objcache_malloc_alloc, 1147 objcache_malloc_free, 1148 &dsched_disk_ctx_malloc_args ); 1149 1150 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1151 1152 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1153 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1154 1155 dsched_register(&dsched_default_policy); 1156 1157 dsched_inited = 1; 1158 } 1159 1160 static void 1161 dsched_uninit(void) 1162 { 1163 } 1164 1165 static void 1166 dsched_dev_init(void) 1167 { 1168 dsched_dev = make_dev(&dsched_dev_ops, 1169 0, 1170 UID_ROOT, 1171 GID_WHEEL, 1172 0600, 1173 "dsched"); 1174 } 1175 1176 static void 1177 dsched_dev_uninit(void) 1178 { 1179 destroy_dev(dsched_dev); 1180 } 1181 1182 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1183 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1184 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL); 1185 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL); 1186 1187 /* 1188 * SYSCTL stuff 1189 */ 1190 static int 1191 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1192 { 1193 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1194 } 1195 1196 static int 1197 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1198 { 1199 struct dsched_policy *pol = NULL; 1200 int error, first = 1; 1201 1202 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1203 1204 while ((pol = dsched_policy_enumerate(pol))) { 1205 if (!first) { 1206 error = SYSCTL_OUT(req, " ", 1); 1207 if (error) 1208 break; 1209 } else { 1210 first = 0; 1211 } 1212 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1213 if (error) 1214 break; 1215 1216 } 1217 1218 lockmgr(&dsched_lock, LK_RELEASE); 1219 1220 error = SYSCTL_OUT(req, "", 1); 1221 1222 return error; 1223 } 1224 1225 static int 1226 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1227 { 1228 char buf[DSCHED_POLICY_NAME_LENGTH]; 1229 struct dsched_disk_ctx *diskctx = arg1; 1230 struct dsched_policy *pol = NULL; 1231 int error; 1232 1233 if (diskctx == NULL) { 1234 return 0; 1235 } 1236 1237 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1238 1239 pol = diskctx->dp->d_sched_policy; 1240 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1241 1242 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1243 if (error || req->newptr == NULL) { 1244 lockmgr(&dsched_lock, LK_RELEASE); 1245 return (error); 1246 } 1247 1248 pol = dsched_find_policy(buf); 1249 if (pol == NULL) { 1250 lockmgr(&dsched_lock, LK_RELEASE); 1251 return 0; 1252 } 1253 1254 dsched_switch(diskctx->dp, pol); 1255 1256 lockmgr(&dsched_lock, LK_RELEASE); 1257 1258 return error; 1259 } 1260 1261 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1262 "Disk Scheduler Framework (dsched) magic"); 1263 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1264 "List of disks and their policies"); 1265 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1266 0, "Enable dsched debugging"); 1267 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1268 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1269 "dsched statistics"); 1270 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1271 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1272 1273 static void 1274 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1275 { 1276 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1277 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1278 sysctl_ctx_init(&diskctx->sysctl_ctx); 1279 } 1280 1281 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1282 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1283 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1284 } 1285