1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 TAILQ_HEAD(tdio_list_head, dsched_thread_io); 56 57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 58 59 static dsched_prepare_t noop_prepare; 60 static dsched_teardown_t noop_teardown; 61 static dsched_cancel_t noop_cancel; 62 static dsched_queue_t noop_queue; 63 64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio); 65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx); 67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio); 68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx); 69 70 static struct dsched_thread_io *dsched_thread_io_alloc( 71 struct disk *dp, struct dsched_thread_ctx *tdctx, 72 struct dsched_policy *pol, int tdctx_locked); 73 74 static int dsched_inited = 0; 75 static int default_set = 0; 76 77 struct lock dsched_lock; 78 static int dsched_debug_enable = 0; 79 80 struct dsched_stats dsched_stats; 81 82 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 83 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 84 struct objcache_malloc_args dsched_thread_io_malloc_args = { 85 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 86 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 87 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 88 89 static struct objcache *dsched_diskctx_cache; 90 static struct objcache *dsched_tdctx_cache; 91 static struct objcache *dsched_tdio_cache; 92 93 struct lock dsched_tdctx_lock; 94 95 static struct dsched_policy_head dsched_policy_list = 96 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 97 98 static struct dsched_policy dsched_noop_policy = { 99 .name = "noop", 100 101 .prepare = noop_prepare, 102 .teardown = noop_teardown, 103 .cancel_all = noop_cancel, 104 .bio_queue = noop_queue 105 }; 106 107 static struct dsched_policy *default_policy = &dsched_noop_policy; 108 109 /* 110 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 111 * using kvprintf 112 */ 113 int 114 dsched_debug(int level, char *fmt, ...) 115 { 116 __va_list ap; 117 118 __va_start(ap, fmt); 119 if (level <= dsched_debug_enable) 120 kvprintf(fmt, ap); 121 __va_end(ap); 122 123 return 0; 124 } 125 126 /* 127 * Called on disk_create() 128 * tries to read which policy to use from loader.conf, if there's 129 * none specified, the default policy is used. 130 */ 131 void 132 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 133 { 134 char tunable_key[SPECNAMELEN + 48]; 135 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 136 char *ptr; 137 struct dsched_policy *policy = NULL; 138 139 /* Also look for serno stuff? */ 140 lockmgr(&dsched_lock, LK_EXCLUSIVE); 141 142 ksnprintf(tunable_key, sizeof(tunable_key), 143 "dsched.policy.%s%d", head_name, unit); 144 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 145 sizeof(sched_policy)) != 0) { 146 policy = dsched_find_policy(sched_policy); 147 } 148 149 ksnprintf(tunable_key, sizeof(tunable_key), 150 "dsched.policy.%s", head_name); 151 152 for (ptr = tunable_key; *ptr; ptr++) { 153 if (*ptr == '/') 154 *ptr = '-'; 155 } 156 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 157 sizeof(sched_policy)) != 0)) { 158 policy = dsched_find_policy(sched_policy); 159 } 160 161 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 162 if (!policy && !default_set && 163 (TUNABLE_STR_FETCH(tunable_key, sched_policy, 164 sizeof(sched_policy)) != 0)) { 165 policy = dsched_find_policy(sched_policy); 166 } 167 168 if (!policy) { 169 if (!default_set && bootverbose) { 170 dsched_debug(0, 171 "No policy for %s%d specified, " 172 "or policy not found\n", 173 head_name, unit); 174 } 175 dsched_set_policy(dp, default_policy); 176 } else { 177 dsched_set_policy(dp, policy); 178 } 179 180 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 181 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 182 else 183 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 184 for (ptr = tunable_key; *ptr; ptr++) { 185 if (*ptr == '/') 186 *ptr = '-'; 187 } 188 dsched_sysctl_add_disk( 189 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 190 tunable_key); 191 192 lockmgr(&dsched_lock, LK_RELEASE); 193 } 194 195 /* 196 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 197 * there's any policy associated with the serial number of the device. 198 */ 199 void 200 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 201 { 202 char tunable_key[SPECNAMELEN + 48]; 203 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 204 struct dsched_policy *policy = NULL; 205 206 if (info->d_serialno == NULL) 207 return; 208 209 lockmgr(&dsched_lock, LK_EXCLUSIVE); 210 211 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 212 info->d_serialno); 213 214 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 215 sizeof(sched_policy)) != 0)) { 216 policy = dsched_find_policy(sched_policy); 217 } 218 219 if (policy) { 220 dsched_switch(dp, policy); 221 } 222 223 dsched_sysctl_add_disk( 224 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 225 info->d_serialno); 226 227 lockmgr(&dsched_lock, LK_RELEASE); 228 } 229 230 /* 231 * Called on disk_destroy() 232 * shuts down the scheduler core and cancels all remaining bios 233 */ 234 void 235 dsched_disk_destroy_callback(struct disk *dp) 236 { 237 struct dsched_policy *old_policy; 238 struct dsched_disk_ctx *diskctx; 239 240 lockmgr(&dsched_lock, LK_EXCLUSIVE); 241 242 diskctx = dsched_get_disk_priv(dp); 243 244 old_policy = dp->d_sched_policy; 245 dp->d_sched_policy = &dsched_noop_policy; 246 old_policy->cancel_all(dsched_get_disk_priv(dp)); 247 old_policy->teardown(dsched_get_disk_priv(dp)); 248 249 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 250 sysctl_ctx_free(&diskctx->sysctl_ctx); 251 252 policy_destroy(dp); 253 atomic_subtract_int(&old_policy->ref_count, 1); 254 KKASSERT(old_policy->ref_count >= 0); 255 256 lockmgr(&dsched_lock, LK_RELEASE); 257 } 258 259 260 /* 261 * Caller must have dp->diskctx locked 262 */ 263 void 264 dsched_queue(struct disk *dp, struct bio *bio) 265 { 266 struct dsched_thread_ctx *tdctx; 267 struct dsched_thread_io *tdio; 268 struct dsched_disk_ctx *diskctx; 269 int error; 270 271 if (dp->d_sched_policy == &dsched_noop_policy) { 272 dsched_clr_buf_priv(bio->bio_buf); 273 atomic_add_int(&dsched_stats.no_tdctx, 1); 274 dsched_strategy_raw(dp, bio); 275 return; 276 } 277 278 error = 0; 279 tdctx = dsched_get_buf_priv(bio->bio_buf); 280 if (tdctx == NULL) { 281 /* We don't handle this case, let dsched dispatch */ 282 atomic_add_int(&dsched_stats.no_tdctx, 1); 283 dsched_strategy_raw(dp, bio); 284 return; 285 } 286 287 DSCHED_THREAD_CTX_LOCK(tdctx); 288 289 /* 290 * XXX: 291 * iterate in reverse to make sure we find the most up-to-date 292 * tdio for a given disk. After a switch it may take some time 293 * for everything to clean up. 294 */ 295 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) { 296 if (tdio->dp == dp) { 297 dsched_thread_io_ref(tdio); 298 break; 299 } 300 } 301 if (tdio == NULL) { 302 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy, 1); 303 dsched_thread_io_ref(tdio); 304 } 305 306 DSCHED_THREAD_CTX_UNLOCK(tdctx); 307 dsched_clr_buf_priv(bio->bio_buf); 308 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 309 310 diskctx = dsched_get_disk_priv(dp); 311 dsched_disk_ctx_ref(diskctx); 312 313 if (dp->d_sched_policy != &dsched_noop_policy) 314 KKASSERT(tdio->debug_policy == dp->d_sched_policy); 315 316 KKASSERT(tdio->debug_inited == 0xF00F1234); 317 318 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 319 320 if (error) { 321 dsched_strategy_raw(dp, bio); 322 } 323 dsched_disk_ctx_unref(diskctx); 324 dsched_thread_io_unref(tdio); 325 } 326 327 328 /* 329 * Called from each module_init or module_attach of each policy 330 * registers the policy in the local policy list. 331 */ 332 int 333 dsched_register(struct dsched_policy *d_policy) 334 { 335 struct dsched_policy *policy; 336 int error = 0; 337 338 lockmgr(&dsched_lock, LK_EXCLUSIVE); 339 340 policy = dsched_find_policy(d_policy->name); 341 342 if (!policy) { 343 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 344 atomic_add_int(&d_policy->ref_count, 1); 345 } else { 346 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 347 d_policy->name); 348 error = EEXIST; 349 } 350 351 lockmgr(&dsched_lock, LK_RELEASE); 352 return error; 353 } 354 355 /* 356 * Called from each module_detach of each policy 357 * unregisters the policy 358 */ 359 int 360 dsched_unregister(struct dsched_policy *d_policy) 361 { 362 struct dsched_policy *policy; 363 364 lockmgr(&dsched_lock, LK_EXCLUSIVE); 365 policy = dsched_find_policy(d_policy->name); 366 367 if (policy) { 368 if (policy->ref_count > 1) { 369 lockmgr(&dsched_lock, LK_RELEASE); 370 return EBUSY; 371 } 372 TAILQ_REMOVE(&dsched_policy_list, policy, link); 373 atomic_subtract_int(&policy->ref_count, 1); 374 KKASSERT(policy->ref_count == 0); 375 } 376 lockmgr(&dsched_lock, LK_RELEASE); 377 378 return 0; 379 } 380 381 382 /* 383 * switches the policy by first removing the old one and then 384 * enabling the new one. 385 */ 386 int 387 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 388 { 389 struct dsched_policy *old_policy; 390 391 /* If we are asked to set the same policy, do nothing */ 392 if (dp->d_sched_policy == new_policy) 393 return 0; 394 395 /* lock everything down, diskwise */ 396 lockmgr(&dsched_lock, LK_EXCLUSIVE); 397 old_policy = dp->d_sched_policy; 398 399 atomic_subtract_int(&old_policy->ref_count, 1); 400 KKASSERT(old_policy->ref_count >= 0); 401 402 dp->d_sched_policy = &dsched_noop_policy; 403 old_policy->teardown(dsched_get_disk_priv(dp)); 404 policy_destroy(dp); 405 406 /* Bring everything back to life */ 407 dsched_set_policy(dp, new_policy); 408 lockmgr(&dsched_lock, LK_RELEASE); 409 410 return 0; 411 } 412 413 414 /* 415 * Loads a given policy and attaches it to the specified disk. 416 * Also initializes the core for the policy 417 */ 418 void 419 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 420 { 421 int locked = 0; 422 423 /* Check if it is locked already. if not, we acquire the devfs lock */ 424 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 425 lockmgr(&dsched_lock, LK_EXCLUSIVE); 426 locked = 1; 427 } 428 429 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 430 431 policy_new(dp, new_policy); 432 new_policy->prepare(dsched_get_disk_priv(dp)); 433 dp->d_sched_policy = new_policy; 434 atomic_add_int(&new_policy->ref_count, 1); 435 436 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 437 438 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 439 new_policy->name); 440 441 /* If we acquired the lock, we also get rid of it */ 442 if (locked) 443 lockmgr(&dsched_lock, LK_RELEASE); 444 } 445 446 struct dsched_policy* 447 dsched_find_policy(char *search) 448 { 449 struct dsched_policy *policy; 450 struct dsched_policy *policy_found = NULL; 451 int locked = 0; 452 453 /* Check if it is locked already. if not, we acquire the devfs lock */ 454 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 455 lockmgr(&dsched_lock, LK_EXCLUSIVE); 456 locked = 1; 457 } 458 459 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 460 if (!strcmp(policy->name, search)) { 461 policy_found = policy; 462 break; 463 } 464 } 465 466 /* If we acquired the lock, we also get rid of it */ 467 if (locked) 468 lockmgr(&dsched_lock, LK_RELEASE); 469 470 return policy_found; 471 } 472 473 /* 474 * Returns ref'd disk 475 */ 476 struct disk * 477 dsched_find_disk(char *search) 478 { 479 struct disk marker; 480 struct disk *dp = NULL; 481 482 while ((dp = disk_enumerate(&marker, dp)) != NULL) { 483 if (strcmp(dp->d_cdev->si_name, search) == 0) { 484 disk_enumerate_stop(&marker, NULL); 485 /* leave ref on dp */ 486 break; 487 } 488 } 489 return dp; 490 } 491 492 struct disk * 493 dsched_disk_enumerate(struct disk *marker, struct disk *dp, 494 struct dsched_policy *policy) 495 { 496 while ((dp = disk_enumerate(marker, dp)) != NULL) { 497 if (dp->d_sched_policy == policy) 498 break; 499 } 500 return NULL; 501 } 502 503 struct dsched_policy * 504 dsched_policy_enumerate(struct dsched_policy *pol) 505 { 506 if (!pol) 507 return (TAILQ_FIRST(&dsched_policy_list)); 508 else 509 return (TAILQ_NEXT(pol, link)); 510 } 511 512 void 513 dsched_cancel_bio(struct bio *bp) 514 { 515 bp->bio_buf->b_error = ENXIO; 516 bp->bio_buf->b_flags |= B_ERROR; 517 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 518 519 biodone(bp); 520 } 521 522 void 523 dsched_strategy_raw(struct disk *dp, struct bio *bp) 524 { 525 /* 526 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 527 * to avoid panics 528 */ 529 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 530 if(bp->bio_track != NULL) { 531 dsched_debug(LOG_INFO, 532 "dsched_strategy_raw sees non-NULL bio_track!! " 533 "bio: %p\n", bp); 534 bp->bio_track = NULL; 535 } 536 dev_dstrategy(dp->d_rawdev, bp); 537 } 538 539 void 540 dsched_strategy_sync(struct disk *dp, struct bio *bio) 541 { 542 struct buf *bp, *nbp; 543 struct bio *nbio; 544 545 bp = bio->bio_buf; 546 547 nbp = getpbuf(NULL); 548 nbio = &nbp->b_bio1; 549 550 nbp->b_cmd = bp->b_cmd; 551 nbp->b_bufsize = bp->b_bufsize; 552 nbp->b_runningbufspace = bp->b_runningbufspace; 553 nbp->b_bcount = bp->b_bcount; 554 nbp->b_resid = bp->b_resid; 555 nbp->b_data = bp->b_data; 556 #if 0 557 /* 558 * Buffers undergoing device I/O do not need a kvabase/size. 559 */ 560 nbp->b_kvabase = bp->b_kvabase; 561 nbp->b_kvasize = bp->b_kvasize; 562 #endif 563 nbp->b_dirtyend = bp->b_dirtyend; 564 565 nbio->bio_done = biodone_sync; 566 nbio->bio_flags |= BIO_SYNC; 567 nbio->bio_track = NULL; 568 569 nbio->bio_caller_info1.ptr = dp; 570 nbio->bio_offset = bio->bio_offset; 571 572 dev_dstrategy(dp->d_rawdev, nbio); 573 biowait(nbio, "dschedsync"); 574 bp->b_resid = nbp->b_resid; 575 bp->b_error = nbp->b_error; 576 biodone(bio); 577 #if 0 578 nbp->b_kvabase = NULL; 579 nbp->b_kvasize = 0; 580 #endif 581 relpbuf(nbp, NULL); 582 } 583 584 void 585 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 586 { 587 struct bio *nbio; 588 589 nbio = push_bio(bio); 590 nbio->bio_done = done; 591 nbio->bio_offset = bio->bio_offset; 592 593 dsched_set_bio_dp(nbio, dp); 594 dsched_set_bio_priv(nbio, priv); 595 596 getmicrotime(&nbio->bio_caller_info3.tv); 597 dev_dstrategy(dp->d_rawdev, nbio); 598 } 599 600 /* 601 * A special bio done call back function 602 * used by policy having request polling implemented. 603 */ 604 static void 605 request_polling_biodone(struct bio *bp) 606 { 607 struct dsched_disk_ctx *diskctx = NULL; 608 struct disk *dp = NULL; 609 struct bio *obio; 610 struct dsched_policy *policy; 611 612 dp = dsched_get_bio_dp(bp); 613 policy = dp->d_sched_policy; 614 diskctx = dsched_get_disk_priv(dp); 615 KKASSERT(diskctx && policy); 616 dsched_disk_ctx_ref(diskctx); 617 618 /* 619 * XXX: 620 * the bio_done function should not be blocked ! 621 */ 622 if (diskctx->dp->d_sched_policy->bio_done) 623 diskctx->dp->d_sched_policy->bio_done(bp); 624 625 obio = pop_bio(bp); 626 biodone(obio); 627 628 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1); 629 630 /* call the polling function, 631 * XXX: 632 * the polling function should not be blocked! 633 */ 634 if (policy->polling_func) 635 policy->polling_func(diskctx); 636 else 637 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n"); 638 dsched_disk_ctx_unref(diskctx); 639 } 640 641 /* 642 * A special dsched strategy used by policy having request polling 643 * (polling function) implemented. 644 * 645 * The strategy is the just like dsched_strategy_async(), but 646 * the biodone call back is set to a preset one. 647 * 648 * If the policy needs its own biodone callback, it should 649 * register it in the policy structure. (bio_done field) 650 * 651 * The current_tag_queue_depth is maintained by this function 652 * and the request_polling_biodone() function 653 */ 654 655 void 656 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx) 657 { 658 atomic_add_int(&diskctx->current_tag_queue_depth, 1); 659 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio)); 660 } 661 662 /* 663 * Ref and deref various structures. The 1->0 transition of the reference 664 * count actually transitions 1->0x80000000 and causes the object to be 665 * destroyed. It is possible for transitory references to occur on the 666 * object while it is being destroyed. We use bit 31 to indicate that 667 * destruction is in progress and to prevent nested destructions. 668 */ 669 void 670 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 671 { 672 int refcount; 673 674 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 675 } 676 677 void 678 dsched_thread_io_ref(struct dsched_thread_io *tdio) 679 { 680 int refcount; 681 682 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 683 } 684 685 void 686 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 687 { 688 int refcount; 689 690 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 691 } 692 693 void 694 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 695 { 696 int refs; 697 int nrefs; 698 699 /* 700 * Handle 1->0 transitions for diskctx and nested destruction 701 * recursions. If the refs are already in destruction mode (bit 31 702 * set) on the 1->0 transition we don't try to destruct it again. 703 * 704 * 0x80000001->0x80000000 transitions are handled normally and 705 * thus avoid nested dstruction. 706 */ 707 for (;;) { 708 refs = diskctx->refcount; 709 cpu_ccfence(); 710 nrefs = refs - 1; 711 712 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 713 if (nrefs) { 714 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 715 break; 716 continue; 717 } 718 nrefs = 0x80000000; 719 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) { 720 dsched_disk_ctx_destroy(diskctx); 721 break; 722 } 723 } 724 } 725 726 static 727 void 728 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx) 729 { 730 struct dsched_thread_io *tdio; 731 int refs; 732 int nrefs; 733 734 #if 0 735 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 736 print_backtrace(4); 737 #endif 738 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 739 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) { 740 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 741 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 742 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 743 tdio->diskctx = NULL; 744 lockmgr(&diskctx->lock, LK_RELEASE); 745 lockmgr(&tdio->lock, LK_EXCLUSIVE); 746 dsched_thread_io_unref_destroy(tdio); 747 lockmgr(&tdio->lock, LK_RELEASE); 748 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 749 } 750 lockmgr(&diskctx->lock, LK_RELEASE); 751 752 /* 753 * Expect diskctx->refcount to be 0x80000000. If it isn't someone 754 * else still has a temporary ref on the diskctx and we have to 755 * transition it back to an undestroyed-state (albeit without any 756 * associations), so the other user destroys it properly when the 757 * ref is released. 758 */ 759 while ((refs = diskctx->refcount) != 0x80000000) { 760 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx); 761 cpu_ccfence(); 762 KKASSERT(refs & 0x80000000); 763 nrefs = refs & 0x7FFFFFFF; 764 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 765 return; 766 } 767 768 /* 769 * Really for sure now. 770 */ 771 if (diskctx->dp->d_sched_policy->destroy_diskctx) 772 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 773 objcache_put(dsched_diskctx_cache, diskctx); 774 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 775 } 776 777 void 778 dsched_thread_io_unref(struct dsched_thread_io *tdio) 779 { 780 int refs; 781 int nrefs; 782 783 /* 784 * Handle 1->0 transitions for tdio and nested destruction 785 * recursions. If the refs are already in destruction mode (bit 31 786 * set) on the 1->0 transition we don't try to destruct it again. 787 * 788 * 0x80000001->0x80000000 transitions are handled normally and 789 * thus avoid nested dstruction. 790 */ 791 for (;;) { 792 refs = tdio->refcount; 793 cpu_ccfence(); 794 nrefs = refs - 1; 795 796 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 797 if (nrefs) { 798 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 799 break; 800 continue; 801 } 802 nrefs = 0x80000000; 803 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 804 dsched_thread_io_destroy(tdio); 805 break; 806 } 807 } 808 } 809 810 /* 811 * Unref and destroy the tdio even if additional refs are present. 812 */ 813 static 814 void 815 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio) 816 { 817 int refs; 818 int nrefs; 819 820 /* 821 * If not already transitioned to destroy-in-progress we transition 822 * to destroy-in-progress, cleanup our ref, and destroy the tdio. 823 */ 824 for (;;) { 825 refs = tdio->refcount; 826 cpu_ccfence(); 827 nrefs = refs - 1; 828 829 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 830 if (nrefs & 0x80000000) { 831 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 832 break; 833 continue; 834 } 835 nrefs |= 0x80000000; 836 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 837 dsched_thread_io_destroy(tdio); 838 break; 839 } 840 } 841 } 842 843 static void 844 dsched_async_thread_io_destroy(struct bio *bio) 845 { 846 struct bio *obio; 847 void *ident = dsched_get_bio_priv(bio); 848 849 obio = pop_bio(bio); 850 biodone(obio); 851 wakeup(ident); 852 } 853 854 855 static void 856 dsched_thread_io_drain(struct dsched_thread_io *tdio) 857 { 858 struct bio *bio; 859 struct bio *nbio; 860 861 while (tdio->qlength != 0) { 862 bio = TAILQ_LAST(&tdio->queue, tdio_queue); 863 TAILQ_REMOVE(&tdio->queue, bio, link); 864 865 nbio = push_bio(bio); 866 nbio->bio_done = &dsched_async_thread_io_destroy; 867 nbio->bio_offset = bio->bio_offset; 868 869 dsched_set_bio_dp(nbio, tdio->dp); 870 dsched_set_bio_priv(nbio, (void *)tdio); 871 TAILQ_INSERT_TAIL(&tdio->queue, nbio, link); 872 873 lksleep((void *)tdio, &tdio->lock, 0, "tdiow", 0); 874 } 875 } 876 877 static void 878 dsched_thread_io_destroy(struct dsched_thread_io *tdio) 879 { 880 struct dsched_thread_ctx *tdctx; 881 struct dsched_disk_ctx *diskctx; 882 int refs; 883 int nrefs; 884 885 #if 0 886 kprintf("tdio (%p) destruction started, trace:\n", tdio); 887 print_backtrace(8); 888 #endif 889 KKASSERT(tdio->qlength == 0); 890 891 while ((diskctx = tdio->diskctx) != NULL) { 892 dsched_disk_ctx_ref(diskctx); 893 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 894 if (diskctx != tdio->diskctx) { 895 lockmgr(&diskctx->lock, LK_RELEASE); 896 dsched_disk_ctx_unref(diskctx); 897 continue; 898 } 899 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 900 if (diskctx->dp->d_sched_policy->destroy_tdio) 901 diskctx->dp->d_sched_policy->destroy_tdio(tdio); 902 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 903 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 904 tdio->diskctx = NULL; 905 dsched_thread_io_unref(tdio); 906 lockmgr(&diskctx->lock, LK_RELEASE); 907 dsched_disk_ctx_unref(diskctx); 908 } 909 while ((tdctx = tdio->tdctx) != NULL) { 910 dsched_thread_ctx_ref(tdctx); 911 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 912 if (tdctx != tdio->tdctx) { 913 lockmgr(&tdctx->lock, LK_RELEASE); 914 dsched_thread_ctx_unref(tdctx); 915 continue; 916 } 917 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 918 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 919 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 920 tdio->tdctx = NULL; 921 dsched_thread_io_unref(tdio); 922 lockmgr(&tdctx->lock, LK_RELEASE); 923 dsched_thread_ctx_unref(tdctx); 924 } 925 926 /* 927 * Expect tdio->refcount to be 0x80000000. If it isn't someone else 928 * still has a temporary ref on the tdio and we have to transition 929 * it back to an undestroyed-state (albeit without any associations) 930 * so the other user destroys it properly when the ref is released. 931 */ 932 while ((refs = tdio->refcount) != 0x80000000) { 933 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio); 934 cpu_ccfence(); 935 KKASSERT(refs & 0x80000000); 936 nrefs = refs & 0x7FFFFFFF; 937 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 938 return; 939 } 940 941 /* 942 * Really for sure now. 943 */ 944 objcache_put(dsched_tdio_cache, tdio); 945 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 946 } 947 948 void 949 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 950 { 951 int refs; 952 int nrefs; 953 954 /* 955 * Handle 1->0 transitions for tdctx and nested destruction 956 * recursions. If the refs are already in destruction mode (bit 31 957 * set) on the 1->0 transition we don't try to destruct it again. 958 * 959 * 0x80000001->0x80000000 transitions are handled normally and 960 * thus avoid nested dstruction. 961 */ 962 for (;;) { 963 refs = tdctx->refcount; 964 cpu_ccfence(); 965 nrefs = refs - 1; 966 967 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 968 if (nrefs) { 969 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) 970 break; 971 continue; 972 } 973 nrefs = 0x80000000; 974 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) { 975 dsched_thread_ctx_destroy(tdctx); 976 break; 977 } 978 } 979 } 980 981 static void 982 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx) 983 { 984 struct dsched_thread_io *tdio; 985 986 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 987 988 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) { 989 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 990 lockmgr(&tdio->lock, LK_EXCLUSIVE); 991 dsched_thread_io_drain(tdio); 992 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 993 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 994 tdio->tdctx = NULL; 995 lockmgr(&tdio->lock, LK_RELEASE); 996 lockmgr(&tdctx->lock, LK_RELEASE); /* avoid deadlock */ 997 dsched_thread_io_unref_destroy(tdio); 998 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 999 } 1000 KKASSERT(tdctx->refcount == 0x80000000); 1001 1002 lockmgr(&tdctx->lock, LK_RELEASE); 1003 1004 objcache_put(dsched_tdctx_cache, tdctx); 1005 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 1006 } 1007 1008 /* 1009 * Ensures that a tdio is assigned to tdctx and disk. 1010 */ 1011 static 1012 struct dsched_thread_io * 1013 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 1014 struct dsched_policy *pol, int tdctx_locked) 1015 { 1016 struct dsched_thread_io *tdio; 1017 #if 0 1018 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 1019 #endif 1020 tdio = objcache_get(dsched_tdio_cache, M_INTWAIT); 1021 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 1022 1023 dsched_thread_io_ref(tdio); /* prevent ripout */ 1024 dsched_thread_io_ref(tdio); /* for diskctx ref */ 1025 1026 DSCHED_THREAD_IO_LOCKINIT(tdio); 1027 tdio->dp = dp; 1028 1029 tdio->diskctx = dsched_get_disk_priv(dp); 1030 TAILQ_INIT(&tdio->queue); 1031 1032 if (pol->new_tdio) 1033 pol->new_tdio(tdio); 1034 1035 DSCHED_DISK_CTX_LOCK(tdio->diskctx); 1036 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 1037 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 1038 DSCHED_DISK_CTX_UNLOCK(tdio->diskctx); 1039 1040 if (tdctx) { 1041 /* 1042 * Put the tdio in the tdctx list. Inherit the temporary 1043 * ref (one ref for each list). 1044 */ 1045 if (tdctx_locked == 0) 1046 DSCHED_THREAD_CTX_LOCK(tdctx); 1047 tdio->tdctx = tdctx; 1048 tdio->p = tdctx->p; 1049 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 1050 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 1051 if (tdctx_locked == 0) 1052 DSCHED_THREAD_CTX_UNLOCK(tdctx); 1053 } else { 1054 dsched_thread_io_unref(tdio); 1055 } 1056 1057 tdio->debug_policy = pol; 1058 tdio->debug_inited = 0xF00F1234; 1059 1060 atomic_add_int(&dsched_stats.tdio_allocations, 1); 1061 1062 return(tdio); 1063 } 1064 1065 1066 struct dsched_disk_ctx * 1067 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 1068 { 1069 struct dsched_disk_ctx *diskctx; 1070 1071 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 1072 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 1073 dsched_disk_ctx_ref(diskctx); 1074 diskctx->dp = dp; 1075 DSCHED_DISK_CTX_LOCKINIT(diskctx); 1076 TAILQ_INIT(&diskctx->tdio_list); 1077 /* 1078 * XXX: magic number 32: most device has a tag queue 1079 * of depth 32. 1080 * Better to retrive more precise value from the driver 1081 */ 1082 diskctx->max_tag_queue_depth = 32; 1083 diskctx->current_tag_queue_depth = 0; 1084 1085 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 1086 if (pol->new_diskctx) 1087 pol->new_diskctx(diskctx); 1088 return diskctx; 1089 } 1090 1091 1092 struct dsched_thread_ctx * 1093 dsched_thread_ctx_alloc(struct proc *p) 1094 { 1095 struct dsched_thread_ctx *tdctx; 1096 1097 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 1098 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 1099 dsched_thread_ctx_ref(tdctx); 1100 #if 0 1101 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 1102 #endif 1103 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 1104 TAILQ_INIT(&tdctx->tdio_list); 1105 tdctx->p = p; 1106 1107 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 1108 /* XXX: no callback here */ 1109 1110 return tdctx; 1111 } 1112 1113 void 1114 policy_new(struct disk *dp, struct dsched_policy *pol) 1115 { 1116 struct dsched_disk_ctx *diskctx; 1117 1118 diskctx = dsched_disk_ctx_alloc(dp, pol); 1119 dsched_disk_ctx_ref(diskctx); 1120 dsched_set_disk_priv(dp, diskctx); 1121 } 1122 1123 void 1124 policy_destroy(struct disk *dp) { 1125 struct dsched_disk_ctx *diskctx; 1126 1127 diskctx = dsched_get_disk_priv(dp); 1128 KKASSERT(diskctx != NULL); 1129 1130 dsched_disk_ctx_unref(diskctx); /* from prepare */ 1131 dsched_disk_ctx_unref(diskctx); /* from alloc */ 1132 1133 dsched_set_disk_priv(dp, NULL); 1134 } 1135 1136 void 1137 dsched_new_buf(struct buf *bp) 1138 { 1139 struct dsched_thread_ctx *tdctx = NULL; 1140 1141 if (dsched_inited == 0) 1142 return; 1143 1144 if (curproc != NULL) { 1145 tdctx = dsched_get_proc_priv(curproc); 1146 } else { 1147 /* This is a kernel thread, so no proc info is available */ 1148 tdctx = dsched_get_thread_priv(curthread); 1149 } 1150 1151 #if 0 1152 /* 1153 * XXX: hack. we don't want this assert because we aren't catching all 1154 * threads. mi_startup() is still getting away without an tdctx. 1155 */ 1156 1157 /* by now we should have an tdctx. if not, something bad is going on */ 1158 KKASSERT(tdctx != NULL); 1159 #endif 1160 1161 if (tdctx) { 1162 dsched_thread_ctx_ref(tdctx); 1163 } 1164 dsched_set_buf_priv(bp, tdctx); 1165 } 1166 1167 void 1168 dsched_exit_buf(struct buf *bp) 1169 { 1170 struct dsched_thread_ctx *tdctx; 1171 1172 tdctx = dsched_get_buf_priv(bp); 1173 if (tdctx != NULL) { 1174 dsched_clr_buf_priv(bp); 1175 dsched_thread_ctx_unref(tdctx); 1176 } 1177 } 1178 1179 void 1180 dsched_new_proc(struct proc *p) 1181 { 1182 struct dsched_thread_ctx *tdctx; 1183 1184 if (dsched_inited == 0) 1185 return; 1186 1187 KKASSERT(p != NULL); 1188 1189 tdctx = dsched_thread_ctx_alloc(p); 1190 tdctx->p = p; 1191 dsched_thread_ctx_ref(tdctx); 1192 1193 dsched_set_proc_priv(p, tdctx); 1194 atomic_add_int(&dsched_stats.nprocs, 1); 1195 } 1196 1197 1198 void 1199 dsched_new_thread(struct thread *td) 1200 { 1201 struct dsched_thread_ctx *tdctx; 1202 1203 if (dsched_inited == 0) 1204 return; 1205 1206 KKASSERT(td != NULL); 1207 1208 tdctx = dsched_thread_ctx_alloc(NULL); 1209 tdctx->td = td; 1210 dsched_thread_ctx_ref(tdctx); 1211 1212 dsched_set_thread_priv(td, tdctx); 1213 atomic_add_int(&dsched_stats.nthreads, 1); 1214 } 1215 1216 void 1217 dsched_exit_proc(struct proc *p) 1218 { 1219 struct dsched_thread_ctx *tdctx; 1220 1221 if (dsched_inited == 0) 1222 return; 1223 1224 KKASSERT(p != NULL); 1225 1226 tdctx = dsched_get_proc_priv(p); 1227 KKASSERT(tdctx != NULL); 1228 1229 tdctx->dead = 0xDEAD; 1230 dsched_set_proc_priv(p, NULL); 1231 1232 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1233 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1234 atomic_subtract_int(&dsched_stats.nprocs, 1); 1235 } 1236 1237 1238 void 1239 dsched_exit_thread(struct thread *td) 1240 { 1241 struct dsched_thread_ctx *tdctx; 1242 1243 if (dsched_inited == 0) 1244 return; 1245 1246 KKASSERT(td != NULL); 1247 1248 tdctx = dsched_get_thread_priv(td); 1249 KKASSERT(tdctx != NULL); 1250 1251 tdctx->dead = 0xDEAD; 1252 dsched_set_thread_priv(td, 0); 1253 1254 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1255 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1256 atomic_subtract_int(&dsched_stats.nthreads, 1); 1257 } 1258 1259 /* 1260 * Returns ref'd tdio. 1261 * 1262 * tdio may have additional refs for the diskctx and tdctx it resides on. 1263 */ 1264 void 1265 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 1266 struct dsched_policy *pol) 1267 { 1268 struct dsched_thread_ctx *tdctx; 1269 1270 tdctx = dsched_get_thread_priv(curthread); 1271 KKASSERT(tdctx != NULL); 1272 dsched_thread_io_alloc(diskctx->dp, tdctx, pol, 0); 1273 } 1274 1275 /* DEFAULT NOOP POLICY */ 1276 1277 static int 1278 noop_prepare(struct dsched_disk_ctx *diskctx) 1279 { 1280 return 0; 1281 } 1282 1283 static void 1284 noop_teardown(struct dsched_disk_ctx *diskctx) 1285 { 1286 1287 } 1288 1289 static void 1290 noop_cancel(struct dsched_disk_ctx *diskctx) 1291 { 1292 1293 } 1294 1295 static int 1296 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 1297 struct bio *bio) 1298 { 1299 dsched_strategy_raw(diskctx->dp, bio); 1300 #if 0 1301 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 1302 #endif 1303 return 0; 1304 } 1305 1306 /* 1307 * SYSINIT stuff 1308 */ 1309 static void 1310 dsched_init(void) 1311 { 1312 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1313 NULL, NULL, NULL, 1314 objcache_malloc_alloc, 1315 objcache_malloc_free, 1316 &dsched_thread_io_malloc_args ); 1317 1318 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1319 NULL, NULL, NULL, 1320 objcache_malloc_alloc, 1321 objcache_malloc_free, 1322 &dsched_thread_ctx_malloc_args ); 1323 1324 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1325 NULL, NULL, NULL, 1326 objcache_malloc_alloc, 1327 objcache_malloc_free, 1328 &dsched_disk_ctx_malloc_args ); 1329 1330 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1331 1332 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1333 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1334 1335 dsched_register(&dsched_noop_policy); 1336 1337 dsched_inited = 1; 1338 } 1339 1340 static void 1341 dsched_uninit(void) 1342 { 1343 } 1344 1345 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1346 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1347 1348 /* 1349 * SYSCTL stuff 1350 */ 1351 static int 1352 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1353 { 1354 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1355 } 1356 1357 static int 1358 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1359 { 1360 struct dsched_policy *pol = NULL; 1361 int error, first = 1; 1362 1363 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1364 1365 while ((pol = dsched_policy_enumerate(pol))) { 1366 if (!first) { 1367 error = SYSCTL_OUT(req, " ", 1); 1368 if (error) 1369 break; 1370 } else { 1371 first = 0; 1372 } 1373 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1374 if (error) 1375 break; 1376 1377 } 1378 1379 lockmgr(&dsched_lock, LK_RELEASE); 1380 1381 error = SYSCTL_OUT(req, "", 1); 1382 1383 return error; 1384 } 1385 1386 static int 1387 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1388 { 1389 char buf[DSCHED_POLICY_NAME_LENGTH]; 1390 struct dsched_disk_ctx *diskctx = arg1; 1391 struct dsched_policy *pol = NULL; 1392 int error; 1393 1394 if (diskctx == NULL) { 1395 return 0; 1396 } 1397 1398 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1399 1400 pol = diskctx->dp->d_sched_policy; 1401 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1402 1403 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1404 if (error || req->newptr == NULL) { 1405 lockmgr(&dsched_lock, LK_RELEASE); 1406 return (error); 1407 } 1408 1409 pol = dsched_find_policy(buf); 1410 if (pol == NULL) { 1411 lockmgr(&dsched_lock, LK_RELEASE); 1412 return 0; 1413 } 1414 1415 dsched_switch(diskctx->dp, pol); 1416 1417 lockmgr(&dsched_lock, LK_RELEASE); 1418 1419 return error; 1420 } 1421 1422 static int 1423 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1424 { 1425 char buf[DSCHED_POLICY_NAME_LENGTH]; 1426 struct dsched_policy *pol = NULL; 1427 int error; 1428 1429 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1430 1431 pol = default_policy; 1432 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1433 1434 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1435 if (error || req->newptr == NULL) { 1436 lockmgr(&dsched_lock, LK_RELEASE); 1437 return (error); 1438 } 1439 1440 pol = dsched_find_policy(buf); 1441 if (pol == NULL) { 1442 lockmgr(&dsched_lock, LK_RELEASE); 1443 return 0; 1444 } 1445 1446 default_set = 1; 1447 default_policy = pol; 1448 1449 lockmgr(&dsched_lock, LK_RELEASE); 1450 1451 return error; 1452 } 1453 1454 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1455 "Disk Scheduler Framework (dsched) magic"); 1456 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1457 "List of disks and their policies"); 1458 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1459 0, "Enable dsched debugging"); 1460 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1461 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1462 "dsched statistics"); 1463 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1464 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1465 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1466 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1467 1468 static void 1469 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1470 { 1471 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1472 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1473 sysctl_ctx_init(&diskctx->sysctl_ctx); 1474 } 1475 1476 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1477 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1478 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1479 } 1480