1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t default_prepare; 58 static dsched_teardown_t default_teardown; 59 static dsched_cancel_t default_cancel; 60 static dsched_queue_t default_queue; 61 62 static d_open_t dsched_dev_open; 63 static d_close_t dsched_dev_close; 64 static d_ioctl_t dsched_dev_ioctl; 65 66 static int dsched_dev_list_disks(struct dsched_ioctl *data); 67 static int dsched_dev_list_disk(struct dsched_ioctl *data); 68 static int dsched_dev_list_policies(struct dsched_ioctl *data); 69 static int dsched_dev_handle_switch(char *disk, char *policy); 70 71 static int dsched_inited = 0; 72 73 struct lock dsched_lock; 74 static int dsched_debug_enable = 0; 75 static cdev_t dsched_dev; 76 77 struct dsched_stats dsched_stats; 78 79 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 80 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 81 struct objcache_malloc_args dsched_thread_io_malloc_args = { 82 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 83 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 84 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 85 86 static struct objcache *dsched_diskctx_cache; 87 static struct objcache *dsched_tdctx_cache; 88 static struct objcache *dsched_tdio_cache; 89 90 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 91 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 92 93 struct lock dsched_tdctx_lock; 94 95 static struct dsched_policy_head dsched_policy_list = 96 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 97 98 static struct dsched_policy dsched_default_policy = { 99 .name = "noop", 100 101 .prepare = default_prepare, 102 .teardown = default_teardown, 103 .cancel_all = default_cancel, 104 .bio_queue = default_queue 105 }; 106 107 static struct dev_ops dsched_dev_ops = { 108 { "dsched", 0, 0 }, 109 .d_open = dsched_dev_open, 110 .d_close = dsched_dev_close, 111 .d_ioctl = dsched_dev_ioctl 112 }; 113 114 /* 115 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 116 * using kvprintf 117 */ 118 int 119 dsched_debug(int level, char *fmt, ...) 120 { 121 __va_list ap; 122 123 __va_start(ap, fmt); 124 if (level <= dsched_debug_enable) 125 kvprintf(fmt, ap); 126 __va_end(ap); 127 128 return 0; 129 } 130 131 /* 132 * Called on disk_create() 133 * tries to read which policy to use from loader.conf, if there's 134 * none specified, the default policy is used. 135 */ 136 void 137 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 138 { 139 char tunable_key[SPECNAMELEN + 48]; 140 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 141 struct dsched_policy *policy = NULL; 142 143 /* Also look for serno stuff? */ 144 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 145 lockmgr(&dsched_lock, LK_EXCLUSIVE); 146 147 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 148 head_name, unit); 149 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 150 sizeof(sched_policy)) != 0) { 151 policy = dsched_find_policy(sched_policy); 152 } 153 154 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 155 head_name); 156 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 157 sizeof(sched_policy)) != 0)) { 158 policy = dsched_find_policy(sched_policy); 159 } 160 161 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 162 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 163 sizeof(sched_policy)) != 0)) { 164 policy = dsched_find_policy(sched_policy); 165 } 166 167 if (!policy) { 168 dsched_debug(0, "No policy for %s%d specified, " 169 "or policy not found\n", head_name, unit); 170 dsched_set_policy(dp, &dsched_default_policy); 171 } else { 172 dsched_set_policy(dp, policy); 173 } 174 175 lockmgr(&dsched_lock, LK_RELEASE); 176 } 177 178 /* 179 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 180 * there's any policy associated with the serial number of the device. 181 */ 182 void 183 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 184 { 185 char tunable_key[SPECNAMELEN + 48]; 186 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 187 struct dsched_policy *policy = NULL; 188 189 if (info->d_serialno == NULL) 190 return; 191 192 lockmgr(&dsched_lock, LK_EXCLUSIVE); 193 194 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 195 info->d_serialno); 196 197 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 198 sizeof(sched_policy)) != 0)) { 199 policy = dsched_find_policy(sched_policy); 200 } 201 202 if (policy) { 203 dsched_switch(dp, policy); 204 } 205 206 lockmgr(&dsched_lock, LK_RELEASE); 207 } 208 209 /* 210 * Called on disk_destroy() 211 * shuts down the scheduler core and cancels all remaining bios 212 */ 213 void 214 dsched_disk_destroy_callback(struct disk *dp) 215 { 216 struct dsched_policy *old_policy; 217 218 lockmgr(&dsched_lock, LK_EXCLUSIVE); 219 220 old_policy = dp->d_sched_policy; 221 dp->d_sched_policy = &dsched_default_policy; 222 old_policy->cancel_all(dsched_get_disk_priv(dp)); 223 old_policy->teardown(dsched_get_disk_priv(dp)); 224 policy_destroy(dp); 225 atomic_subtract_int(&old_policy->ref_count, 1); 226 KKASSERT(old_policy->ref_count >= 0); 227 228 lockmgr(&dsched_lock, LK_RELEASE); 229 } 230 231 232 void 233 dsched_queue(struct disk *dp, struct bio *bio) 234 { 235 struct dsched_thread_ctx *tdctx; 236 struct dsched_thread_io *tdio; 237 struct dsched_disk_ctx *diskctx; 238 239 int found = 0, error = 0; 240 241 tdctx = dsched_get_buf_priv(bio->bio_buf); 242 if (tdctx == NULL) { 243 /* We don't handle this case, let dsched dispatch */ 244 atomic_add_int(&dsched_stats.no_tdctx, 1); 245 dsched_strategy_raw(dp, bio); 246 return; 247 } 248 249 DSCHED_THREAD_CTX_LOCK(tdctx); 250 251 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 252 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 253 if (tdio->dp == dp) { 254 dsched_thread_io_ref(tdio); 255 found = 1; 256 break; 257 } 258 } 259 260 DSCHED_THREAD_CTX_UNLOCK(tdctx); 261 dsched_clr_buf_priv(bio->bio_buf); 262 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 263 264 KKASSERT(found == 1); 265 diskctx = dsched_get_disk_priv(dp); 266 dsched_disk_ctx_ref(diskctx); 267 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 268 269 if (error) { 270 dsched_strategy_raw(dp, bio); 271 } 272 dsched_disk_ctx_unref(diskctx); 273 dsched_thread_io_unref(tdio); 274 } 275 276 277 /* 278 * Called from each module_init or module_attach of each policy 279 * registers the policy in the local policy list. 280 */ 281 int 282 dsched_register(struct dsched_policy *d_policy) 283 { 284 struct dsched_policy *policy; 285 int error = 0; 286 287 lockmgr(&dsched_lock, LK_EXCLUSIVE); 288 289 policy = dsched_find_policy(d_policy->name); 290 291 if (!policy) { 292 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 293 atomic_add_int(&d_policy->ref_count, 1); 294 } else { 295 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 296 d_policy->name); 297 error = EEXIST; 298 } 299 300 lockmgr(&dsched_lock, LK_RELEASE); 301 return error; 302 } 303 304 /* 305 * Called from each module_detach of each policy 306 * unregisters the policy 307 */ 308 int 309 dsched_unregister(struct dsched_policy *d_policy) 310 { 311 struct dsched_policy *policy; 312 313 lockmgr(&dsched_lock, LK_EXCLUSIVE); 314 policy = dsched_find_policy(d_policy->name); 315 316 if (policy) { 317 if (policy->ref_count > 1) { 318 lockmgr(&dsched_lock, LK_RELEASE); 319 return EBUSY; 320 } 321 TAILQ_REMOVE(&dsched_policy_list, policy, link); 322 atomic_subtract_int(&policy->ref_count, 1); 323 KKASSERT(policy->ref_count == 0); 324 } 325 lockmgr(&dsched_lock, LK_RELEASE); 326 return 0; 327 } 328 329 330 /* 331 * switches the policy by first removing the old one and then 332 * enabling the new one. 333 */ 334 int 335 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 336 { 337 struct dsched_policy *old_policy; 338 339 /* If we are asked to set the same policy, do nothing */ 340 if (dp->d_sched_policy == new_policy) 341 return 0; 342 343 /* lock everything down, diskwise */ 344 lockmgr(&dsched_lock, LK_EXCLUSIVE); 345 old_policy = dp->d_sched_policy; 346 347 atomic_subtract_int(&old_policy->ref_count, 1); 348 KKASSERT(old_policy->ref_count >= 0); 349 350 dp->d_sched_policy = &dsched_default_policy; 351 old_policy->teardown(dsched_get_disk_priv(dp)); 352 policy_destroy(dp); 353 354 /* Bring everything back to life */ 355 dsched_set_policy(dp, new_policy); 356 lockmgr(&dsched_lock, LK_RELEASE); 357 return 0; 358 } 359 360 361 /* 362 * Loads a given policy and attaches it to the specified disk. 363 * Also initializes the core for the policy 364 */ 365 void 366 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 367 { 368 int locked = 0; 369 370 /* Check if it is locked already. if not, we acquire the devfs lock */ 371 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 372 lockmgr(&dsched_lock, LK_EXCLUSIVE); 373 locked = 1; 374 } 375 376 policy_new(dp, new_policy); 377 new_policy->prepare(dsched_get_disk_priv(dp)); 378 dp->d_sched_policy = new_policy; 379 atomic_add_int(&new_policy->ref_count, 1); 380 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 381 new_policy->name); 382 383 /* If we acquired the lock, we also get rid of it */ 384 if (locked) 385 lockmgr(&dsched_lock, LK_RELEASE); 386 } 387 388 struct dsched_policy* 389 dsched_find_policy(char *search) 390 { 391 struct dsched_policy *policy; 392 struct dsched_policy *policy_found = NULL; 393 int locked = 0; 394 395 /* Check if it is locked already. if not, we acquire the devfs lock */ 396 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 397 lockmgr(&dsched_lock, LK_EXCLUSIVE); 398 locked = 1; 399 } 400 401 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 402 if (!strcmp(policy->name, search)) { 403 policy_found = policy; 404 break; 405 } 406 } 407 408 /* If we acquired the lock, we also get rid of it */ 409 if (locked) 410 lockmgr(&dsched_lock, LK_RELEASE); 411 412 return policy_found; 413 } 414 415 struct disk* 416 dsched_find_disk(char *search) 417 { 418 struct disk *dp_found = NULL; 419 struct disk *dp = NULL; 420 421 while((dp = disk_enumerate(dp))) { 422 if (!strcmp(dp->d_cdev->si_name, search)) { 423 dp_found = dp; 424 break; 425 } 426 } 427 428 return dp_found; 429 } 430 431 struct disk* 432 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 433 { 434 while ((dp = disk_enumerate(dp))) { 435 if (dp->d_sched_policy == policy) 436 return dp; 437 } 438 439 return NULL; 440 } 441 442 struct dsched_policy * 443 dsched_policy_enumerate(struct dsched_policy *pol) 444 { 445 if (!pol) 446 return (TAILQ_FIRST(&dsched_policy_list)); 447 else 448 return (TAILQ_NEXT(pol, link)); 449 } 450 451 void 452 dsched_cancel_bio(struct bio *bp) 453 { 454 bp->bio_buf->b_error = ENXIO; 455 bp->bio_buf->b_flags |= B_ERROR; 456 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 457 458 biodone(bp); 459 } 460 461 void 462 dsched_strategy_raw(struct disk *dp, struct bio *bp) 463 { 464 /* 465 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 466 * to avoid panics 467 */ 468 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 469 if(bp->bio_track != NULL) { 470 dsched_debug(LOG_INFO, 471 "dsched_strategy_raw sees non-NULL bio_track!! " 472 "bio: %p\n", bp); 473 bp->bio_track = NULL; 474 } 475 dev_dstrategy(dp->d_rawdev, bp); 476 } 477 478 void 479 dsched_strategy_sync(struct disk *dp, struct bio *bio) 480 { 481 struct buf *bp, *nbp; 482 struct bio *nbio; 483 484 bp = bio->bio_buf; 485 486 nbp = getpbuf(NULL); 487 nbio = &nbp->b_bio1; 488 489 nbp->b_cmd = bp->b_cmd; 490 nbp->b_bufsize = bp->b_bufsize; 491 nbp->b_runningbufspace = bp->b_runningbufspace; 492 nbp->b_bcount = bp->b_bcount; 493 nbp->b_resid = bp->b_resid; 494 nbp->b_data = bp->b_data; 495 nbp->b_kvabase = bp->b_kvabase; 496 nbp->b_kvasize = bp->b_kvasize; 497 nbp->b_dirtyend = bp->b_dirtyend; 498 499 nbio->bio_done = biodone_sync; 500 nbio->bio_flags |= BIO_SYNC; 501 nbio->bio_track = NULL; 502 503 nbio->bio_caller_info1.ptr = dp; 504 nbio->bio_offset = bio->bio_offset; 505 506 dev_dstrategy(dp->d_rawdev, nbio); 507 biowait(nbio, "dschedsync"); 508 bp->b_resid = nbp->b_resid; 509 bp->b_error = nbp->b_error; 510 biodone(bio); 511 relpbuf(nbp, NULL); 512 } 513 514 void 515 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 516 { 517 struct bio *nbio; 518 519 nbio = push_bio(bio); 520 nbio->bio_done = done; 521 nbio->bio_offset = bio->bio_offset; 522 523 dsched_set_bio_dp(nbio, dp); 524 dsched_set_bio_priv(nbio, priv); 525 526 getmicrotime(&nbio->bio_caller_info3.tv); 527 dev_dstrategy(dp->d_rawdev, nbio); 528 } 529 530 void 531 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 532 { 533 int refcount; 534 535 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 536 537 KKASSERT(refcount >= 0); 538 } 539 540 void 541 dsched_thread_io_ref(struct dsched_thread_io *tdio) 542 { 543 int refcount; 544 545 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 546 547 KKASSERT(refcount >= 0); 548 } 549 550 void 551 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 552 { 553 int refcount; 554 555 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 556 557 KKASSERT(refcount >= 0); 558 } 559 560 void 561 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 562 { 563 struct dsched_thread_io *tdio, *tdio2; 564 int refcount; 565 566 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 567 568 569 KKASSERT(refcount >= 0 || refcount <= -0x400); 570 571 if (refcount == 1) { 572 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 573 #if 0 574 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 575 print_backtrace(4); 576 #endif 577 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 578 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 579 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 580 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 581 dsched_thread_io_unref(tdio); 582 } 583 lockmgr(&diskctx->lock, LK_RELEASE); 584 if (diskctx->dp->d_sched_policy->destroy_diskctx) 585 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 586 objcache_put(dsched_diskctx_cache, diskctx); 587 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 588 } 589 } 590 591 void 592 dsched_thread_io_unref(struct dsched_thread_io *tdio) 593 { 594 struct dsched_thread_ctx *tdctx; 595 struct dsched_disk_ctx *diskctx; 596 int refcount; 597 598 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 599 600 KKASSERT(refcount >= 0 || refcount <= -0x400); 601 602 if (refcount == 1) { 603 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 604 #if 0 605 kprintf("tdio (%p) destruction started, trace:\n", tdio); 606 print_backtrace(8); 607 #endif 608 diskctx = tdio->diskctx; 609 KKASSERT(diskctx != NULL); 610 KKASSERT(tdio->qlength == 0); 611 612 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 613 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 614 615 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 616 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 617 618 lockmgr(&diskctx->lock, LK_RELEASE); 619 } 620 621 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 622 tdctx = tdio->tdctx; 623 KKASSERT(tdctx != NULL); 624 625 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 626 627 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 628 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 629 630 lockmgr(&tdctx->lock, LK_RELEASE); 631 } 632 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 633 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 634 objcache_put(dsched_tdio_cache, tdio); 635 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 636 #if 0 637 dsched_disk_ctx_unref(diskctx); 638 #endif 639 } 640 } 641 642 void 643 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 644 { 645 struct dsched_thread_io *tdio, *tdio2; 646 int refcount; 647 648 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 649 650 KKASSERT(refcount >= 0 || refcount <= -0x400); 651 652 if (refcount == 1) { 653 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 654 #if 0 655 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 656 print_backtrace(8); 657 #endif 658 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 659 660 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 661 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 662 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 663 dsched_thread_io_unref(tdio); 664 } 665 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 666 667 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 668 669 objcache_put(dsched_tdctx_cache, tdctx); 670 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 671 } 672 } 673 674 675 struct dsched_thread_io * 676 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 677 struct dsched_policy *pol) 678 { 679 struct dsched_thread_io *tdio; 680 #if 0 681 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 682 #endif 683 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 684 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 685 686 /* XXX: maybe we do need another ref for the disk list for tdio */ 687 dsched_thread_io_ref(tdio); 688 689 DSCHED_THREAD_IO_LOCKINIT(tdio); 690 tdio->dp = dp; 691 692 tdio->diskctx = dsched_get_disk_priv(dp); 693 TAILQ_INIT(&tdio->queue); 694 695 if (pol->new_tdio) 696 pol->new_tdio(tdio); 697 698 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 699 tdio->flags |= DSCHED_LINKED_DISK_CTX; 700 701 if (tdctx) { 702 tdio->tdctx = tdctx; 703 tdio->p = tdctx->p; 704 705 /* Put the tdio in the tdctx list */ 706 DSCHED_THREAD_CTX_LOCK(tdctx); 707 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 708 DSCHED_THREAD_CTX_UNLOCK(tdctx); 709 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 710 } 711 712 atomic_add_int(&dsched_stats.tdio_allocations, 1); 713 return tdio; 714 } 715 716 717 struct dsched_disk_ctx * 718 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 719 { 720 struct dsched_disk_ctx *diskctx; 721 722 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 723 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 724 dsched_disk_ctx_ref(diskctx); 725 diskctx->dp = dp; 726 DSCHED_DISK_CTX_LOCKINIT(diskctx); 727 TAILQ_INIT(&diskctx->tdio_list); 728 729 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 730 if (pol->new_diskctx) 731 pol->new_diskctx(diskctx); 732 return diskctx; 733 } 734 735 736 struct dsched_thread_ctx * 737 dsched_thread_ctx_alloc(struct proc *p) 738 { 739 struct dsched_thread_ctx *tdctx; 740 struct dsched_thread_io *tdio; 741 struct disk *dp = NULL; 742 743 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 744 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 745 dsched_thread_ctx_ref(tdctx); 746 #if 0 747 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 748 #endif 749 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 750 TAILQ_INIT(&tdctx->tdio_list); 751 tdctx->p = p; 752 753 /* XXX */ 754 while ((dp = disk_enumerate(dp))) { 755 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 756 } 757 758 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 759 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 760 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 761 762 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 763 /* XXX: no callback here */ 764 return tdctx; 765 } 766 767 void 768 policy_new(struct disk *dp, struct dsched_policy *pol) { 769 struct dsched_thread_ctx *tdctx; 770 struct dsched_disk_ctx *diskctx; 771 struct dsched_thread_io *tdio; 772 773 diskctx = dsched_disk_ctx_alloc(dp, pol); 774 dsched_disk_ctx_ref(diskctx); 775 dsched_set_disk_priv(dp, diskctx); 776 777 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 778 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 779 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 780 } 781 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 782 783 } 784 785 void 786 policy_destroy(struct disk *dp) { 787 struct dsched_disk_ctx *diskctx; 788 789 diskctx = dsched_get_disk_priv(dp); 790 KKASSERT(diskctx != NULL); 791 792 dsched_disk_ctx_unref(diskctx); /* from prepare */ 793 dsched_disk_ctx_unref(diskctx); /* from alloc */ 794 795 dsched_set_disk_priv(dp, NULL); 796 } 797 798 void 799 dsched_new_buf(struct buf *bp) 800 { 801 struct dsched_thread_ctx *tdctx = NULL; 802 803 if (dsched_inited == 0) 804 return; 805 806 if (curproc != NULL) { 807 tdctx = dsched_get_proc_priv(curproc); 808 } else { 809 /* This is a kernel thread, so no proc info is available */ 810 tdctx = dsched_get_thread_priv(curthread); 811 } 812 813 #if 0 814 /* 815 * XXX: hack. we don't want this assert because we aren't catching all 816 * threads. mi_startup() is still getting away without an tdctx. 817 */ 818 819 /* by now we should have an tdctx. if not, something bad is going on */ 820 KKASSERT(tdctx != NULL); 821 #endif 822 823 if (tdctx) { 824 dsched_thread_ctx_ref(tdctx); 825 } 826 dsched_set_buf_priv(bp, tdctx); 827 } 828 829 void 830 dsched_exit_buf(struct buf *bp) 831 { 832 struct dsched_thread_ctx *tdctx; 833 834 tdctx = dsched_get_buf_priv(bp); 835 if (tdctx != NULL) { 836 dsched_clr_buf_priv(bp); 837 dsched_thread_ctx_unref(tdctx); 838 } 839 } 840 841 void 842 dsched_new_proc(struct proc *p) 843 { 844 struct dsched_thread_ctx *tdctx; 845 846 if (dsched_inited == 0) 847 return; 848 849 KKASSERT(p != NULL); 850 851 tdctx = dsched_thread_ctx_alloc(p); 852 tdctx->p = p; 853 dsched_thread_ctx_ref(tdctx); 854 855 dsched_set_proc_priv(p, tdctx); 856 atomic_add_int(&dsched_stats.nprocs, 1); 857 } 858 859 860 void 861 dsched_new_thread(struct thread *td) 862 { 863 struct dsched_thread_ctx *tdctx; 864 865 if (dsched_inited == 0) 866 return; 867 868 KKASSERT(td != NULL); 869 870 tdctx = dsched_thread_ctx_alloc(NULL); 871 tdctx->td = td; 872 dsched_thread_ctx_ref(tdctx); 873 874 dsched_set_thread_priv(td, tdctx); 875 atomic_add_int(&dsched_stats.nthreads, 1); 876 } 877 878 void 879 dsched_exit_proc(struct proc *p) 880 { 881 struct dsched_thread_ctx *tdctx; 882 883 if (dsched_inited == 0) 884 return; 885 886 KKASSERT(p != NULL); 887 888 tdctx = dsched_get_proc_priv(p); 889 KKASSERT(tdctx != NULL); 890 891 tdctx->dead = 0xDEAD; 892 dsched_set_proc_priv(p, 0); 893 894 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 895 dsched_thread_ctx_unref(tdctx); /* one for ref */ 896 atomic_subtract_int(&dsched_stats.nprocs, 1); 897 } 898 899 900 void 901 dsched_exit_thread(struct thread *td) 902 { 903 struct dsched_thread_ctx *tdctx; 904 905 if (dsched_inited == 0) 906 return; 907 908 KKASSERT(td != NULL); 909 910 tdctx = dsched_get_thread_priv(td); 911 KKASSERT(tdctx != NULL); 912 913 tdctx->dead = 0xDEAD; 914 dsched_set_thread_priv(td, 0); 915 916 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 917 dsched_thread_ctx_unref(tdctx); /* one for ref */ 918 atomic_subtract_int(&dsched_stats.nthreads, 1); 919 } 920 921 /* DEFAULT NOOP POLICY */ 922 923 static int 924 default_prepare(struct dsched_disk_ctx *diskctx) 925 { 926 return 0; 927 } 928 929 static void 930 default_teardown(struct dsched_disk_ctx *diskctx) 931 { 932 933 } 934 935 static void 936 default_cancel(struct dsched_disk_ctx *diskctx) 937 { 938 939 } 940 941 static int 942 default_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 943 struct bio *bio) 944 { 945 dsched_strategy_raw(diskctx->dp, bio); 946 #if 0 947 dsched_strategy_async(diskctx->dp, bio, default_completed, NULL); 948 #endif 949 return 0; 950 } 951 952 953 /* 954 * dsched device stuff 955 */ 956 957 static int 958 dsched_dev_list_disks(struct dsched_ioctl *data) 959 { 960 struct disk *dp = NULL; 961 uint32_t i; 962 963 for (i = 0; (i <= data->num_elem) && (dp = disk_enumerate(dp)); i++); 964 965 if (dp == NULL) 966 return -1; 967 968 strncpy(data->dev_name, dp->d_cdev->si_name, sizeof(data->dev_name)); 969 970 if (dp->d_sched_policy) { 971 strncpy(data->pol_name, dp->d_sched_policy->name, 972 sizeof(data->pol_name)); 973 } else { 974 strncpy(data->pol_name, "N/A (error)", 12); 975 } 976 977 return 0; 978 } 979 980 static int 981 dsched_dev_list_disk(struct dsched_ioctl *data) 982 { 983 struct disk *dp = NULL; 984 int found = 0; 985 986 while ((dp = disk_enumerate(dp))) { 987 if (!strncmp(dp->d_cdev->si_name, data->dev_name, 988 sizeof(data->dev_name))) { 989 KKASSERT(dp->d_sched_policy != NULL); 990 991 found = 1; 992 strncpy(data->pol_name, dp->d_sched_policy->name, 993 sizeof(data->pol_name)); 994 break; 995 } 996 } 997 if (!found) 998 return -1; 999 1000 return 0; 1001 } 1002 1003 static int 1004 dsched_dev_list_policies(struct dsched_ioctl *data) 1005 { 1006 struct dsched_policy *pol = NULL; 1007 uint32_t i; 1008 1009 for (i = 0; (i <= data->num_elem) && (pol = dsched_policy_enumerate(pol)); i++); 1010 1011 if (pol == NULL) 1012 return -1; 1013 1014 strncpy(data->pol_name, pol->name, sizeof(data->pol_name)); 1015 return 0; 1016 } 1017 1018 static int 1019 dsched_dev_handle_switch(char *disk, char *policy) 1020 { 1021 struct disk *dp; 1022 struct dsched_policy *pol; 1023 1024 dp = dsched_find_disk(disk); 1025 pol = dsched_find_policy(policy); 1026 1027 if ((dp == NULL) || (pol == NULL)) 1028 return -1; 1029 1030 return (dsched_switch(dp, pol)); 1031 } 1032 1033 static int 1034 dsched_dev_open(struct dev_open_args *ap) 1035 { 1036 /* 1037 * Only allow read-write access. 1038 */ 1039 if (((ap->a_oflags & FWRITE) == 0) || ((ap->a_oflags & FREAD) == 0)) 1040 return(EPERM); 1041 1042 /* 1043 * We don't allow nonblocking access. 1044 */ 1045 if ((ap->a_oflags & O_NONBLOCK) != 0) { 1046 kprintf("dsched_dev: can't do nonblocking access\n"); 1047 return(ENODEV); 1048 } 1049 1050 return 0; 1051 } 1052 1053 static int 1054 dsched_dev_close(struct dev_close_args *ap) 1055 { 1056 return 0; 1057 } 1058 1059 static int 1060 dsched_dev_ioctl(struct dev_ioctl_args *ap) 1061 { 1062 int error; 1063 struct dsched_ioctl *data; 1064 1065 error = 0; 1066 data = (struct dsched_ioctl *)ap->a_data; 1067 1068 switch(ap->a_cmd) { 1069 case DSCHED_SET_DEVICE_POLICY: 1070 if (dsched_dev_handle_switch(data->dev_name, data->pol_name)) 1071 error = ENOENT; /* No such file or directory */ 1072 break; 1073 1074 case DSCHED_LIST_DISK: 1075 if (dsched_dev_list_disk(data) != 0) { 1076 error = EINVAL; /* Invalid argument */ 1077 } 1078 break; 1079 1080 case DSCHED_LIST_DISKS: 1081 if (dsched_dev_list_disks(data) != 0) { 1082 error = EINVAL; /* Invalid argument */ 1083 } 1084 break; 1085 1086 case DSCHED_LIST_POLICIES: 1087 if (dsched_dev_list_policies(data) != 0) { 1088 error = EINVAL; /* Invalid argument */ 1089 } 1090 break; 1091 1092 1093 default: 1094 error = ENOTTY; /* Inappropriate ioctl for device */ 1095 break; 1096 } 1097 1098 return(error); 1099 } 1100 1101 1102 1103 1104 1105 1106 /* 1107 * SYSINIT stuff 1108 */ 1109 1110 1111 static void 1112 dsched_init(void) 1113 { 1114 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1115 NULL, NULL, NULL, 1116 objcache_malloc_alloc, 1117 objcache_malloc_free, 1118 &dsched_thread_io_malloc_args ); 1119 1120 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1121 NULL, NULL, NULL, 1122 objcache_malloc_alloc, 1123 objcache_malloc_free, 1124 &dsched_thread_ctx_malloc_args ); 1125 1126 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1127 NULL, NULL, NULL, 1128 objcache_malloc_alloc, 1129 objcache_malloc_free, 1130 &dsched_disk_ctx_malloc_args ); 1131 1132 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1133 1134 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1135 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1136 1137 dsched_register(&dsched_default_policy); 1138 1139 dsched_inited = 1; 1140 } 1141 1142 static void 1143 dsched_uninit(void) 1144 { 1145 } 1146 1147 static void 1148 dsched_dev_init(void) 1149 { 1150 dsched_dev = make_dev(&dsched_dev_ops, 1151 0, 1152 UID_ROOT, 1153 GID_WHEEL, 1154 0600, 1155 "dsched"); 1156 } 1157 1158 static void 1159 dsched_dev_uninit(void) 1160 { 1161 destroy_dev(dsched_dev); 1162 } 1163 1164 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1165 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1166 SYSINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_init, NULL); 1167 SYSUNINIT(subr_dsched_dev_register, SI_SUB_DRIVERS, SI_ORDER_ANY, dsched_dev_uninit, NULL); 1168 1169 /* 1170 * SYSCTL stuff 1171 */ 1172 static int 1173 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1174 { 1175 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1176 } 1177 1178 static int 1179 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1180 { 1181 struct dsched_policy *pol = NULL; 1182 int error, first = 1; 1183 1184 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1185 1186 while ((pol = dsched_policy_enumerate(pol))) { 1187 if (!first) { 1188 error = SYSCTL_OUT(req, " ", 1); 1189 if (error) 1190 break; 1191 } else { 1192 first = 0; 1193 } 1194 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1195 if (error) 1196 break; 1197 1198 } 1199 1200 lockmgr(&dsched_lock, LK_RELEASE); 1201 1202 error = SYSCTL_OUT(req, "", 1); 1203 1204 return error; 1205 } 1206 1207 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1208 "Disk Scheduler Framework (dsched) magic"); 1209 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1210 0, "Enable dsched debugging"); 1211 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1212 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1213 "dsched statistics"); 1214 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1215 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1216 1217