1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 #include "spdk/conf.h" 39 40 #include "spdk/env.h" 41 #include "spdk/event.h" 42 #include "spdk/io_channel.h" 43 #include "spdk/likely.h" 44 #include "spdk/queue.h" 45 #include "spdk/nvme_spec.h" 46 #include "spdk/scsi_spec.h" 47 #include "spdk/util.h" 48 49 #include "spdk_internal/bdev.h" 50 #include "spdk_internal/log.h" 51 #include "spdk/string.h" 52 53 #ifdef SPDK_CONFIG_VTUNE 54 #include "ittnotify.h" 55 #include "ittnotify_types.h" 56 int __itt_init_ittlib(const char *, __itt_group_id); 57 #endif 58 59 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 60 #define SPDK_BDEV_IO_CACHE_SIZE 256 61 #define BUF_SMALL_POOL_SIZE 8192 62 #define BUF_LARGE_POOL_SIZE 1024 63 #define NOMEM_THRESHOLD_COUNT 8 64 #define ZERO_BUFFER_SIZE 0x100000 65 #define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000 66 #define SPDK_BDEV_SEC_TO_USEC 1000000ULL 67 #define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1 68 #define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 10000 69 70 struct spdk_bdev_mgr { 71 struct spdk_mempool *bdev_io_pool; 72 73 struct spdk_mempool *buf_small_pool; 74 struct spdk_mempool *buf_large_pool; 75 76 void *zero_buffer; 77 78 TAILQ_HEAD(, spdk_bdev_module) bdev_modules; 79 80 TAILQ_HEAD(, spdk_bdev) bdevs; 81 82 bool init_complete; 83 bool module_init_complete; 84 85 #ifdef SPDK_CONFIG_VTUNE 86 __itt_domain *domain; 87 #endif 88 }; 89 90 static struct spdk_bdev_mgr g_bdev_mgr = { 91 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 92 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 93 .init_complete = false, 94 .module_init_complete = false, 95 }; 96 97 static spdk_bdev_init_cb g_init_cb_fn = NULL; 98 static void *g_init_cb_arg = NULL; 99 100 static spdk_bdev_fini_cb g_fini_cb_fn = NULL; 101 static void *g_fini_cb_arg = NULL; 102 static struct spdk_thread *g_fini_thread = NULL; 103 104 struct spdk_bdev_qos { 105 /** Rate limit, in I/O per second */ 106 uint64_t rate_limit; 107 108 /** The channel that all I/O are funneled through */ 109 struct spdk_bdev_channel *ch; 110 111 /** The thread on which the poller is running. */ 112 struct spdk_thread *thread; 113 114 /** Queue of I/O waiting to be issued. */ 115 bdev_io_tailq_t queued; 116 117 /** Maximum allowed IOs to be issued in one timeslice (e.g., 1ms) and 118 * only valid for the master channel which manages the outstanding IOs. */ 119 uint64_t max_ios_per_timeslice; 120 121 /** Submitted IO in one timeslice (e.g., 1ms) */ 122 uint64_t io_submitted_this_timeslice; 123 124 /** Polller that processes queued I/O commands each time slice. */ 125 struct spdk_poller *poller; 126 }; 127 128 struct spdk_bdev_mgmt_channel { 129 bdev_io_stailq_t need_buf_small; 130 bdev_io_stailq_t need_buf_large; 131 132 /* 133 * Each thread keeps a cache of bdev_io - this allows 134 * bdev threads which are *not* DPDK threads to still 135 * benefit from a per-thread bdev_io cache. Without 136 * this, non-DPDK threads fetching from the mempool 137 * incur a cmpxchg on get and put. 138 */ 139 bdev_io_stailq_t per_thread_cache; 140 uint32_t per_thread_cache_count; 141 142 TAILQ_HEAD(, spdk_bdev_shared_resource) shared_resources; 143 }; 144 145 /* 146 * Per-module (or per-io_device) data. Multiple bdevs built on the same io_device 147 * will queue here their IO that awaits retry. It makes it posible to retry sending 148 * IO to one bdev after IO from other bdev completes. 149 */ 150 struct spdk_bdev_shared_resource { 151 /* The bdev management channel */ 152 struct spdk_bdev_mgmt_channel *mgmt_ch; 153 154 /* 155 * Count of I/O submitted to bdev module and waiting for completion. 156 * Incremented before submit_request() is called on an spdk_bdev_io. 157 */ 158 uint64_t io_outstanding; 159 160 /* 161 * Queue of IO awaiting retry because of a previous NOMEM status returned 162 * on this channel. 163 */ 164 bdev_io_tailq_t nomem_io; 165 166 /* 167 * Threshold which io_outstanding must drop to before retrying nomem_io. 168 */ 169 uint64_t nomem_threshold; 170 171 /* I/O channel allocated by a bdev module */ 172 struct spdk_io_channel *shared_ch; 173 174 /* Refcount of bdev channels using this resource */ 175 uint32_t ref; 176 177 TAILQ_ENTRY(spdk_bdev_shared_resource) link; 178 }; 179 180 #define BDEV_CH_RESET_IN_PROGRESS (1 << 0) 181 #define BDEV_CH_QOS_ENABLED (1 << 1) 182 183 struct spdk_bdev_channel { 184 struct spdk_bdev *bdev; 185 186 /* The channel for the underlying device */ 187 struct spdk_io_channel *channel; 188 189 /* Per io_device per thread data */ 190 struct spdk_bdev_shared_resource *shared_resource; 191 192 struct spdk_bdev_io_stat stat; 193 194 /* 195 * Count of I/O submitted through this channel and waiting for completion. 196 * Incremented before submit_request() is called on an spdk_bdev_io. 197 */ 198 uint64_t io_outstanding; 199 200 bdev_io_tailq_t queued_resets; 201 202 uint32_t flags; 203 204 #ifdef SPDK_CONFIG_VTUNE 205 uint64_t start_tsc; 206 uint64_t interval_tsc; 207 __itt_string_handle *handle; 208 #endif 209 210 }; 211 212 struct spdk_bdev_desc { 213 struct spdk_bdev *bdev; 214 spdk_bdev_remove_cb_t remove_cb; 215 void *remove_ctx; 216 bool write; 217 TAILQ_ENTRY(spdk_bdev_desc) link; 218 }; 219 220 #define __bdev_to_io_dev(bdev) (((char *)bdev) + 1) 221 #define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1)) 222 223 static void spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg); 224 225 struct spdk_bdev * 226 spdk_bdev_first(void) 227 { 228 struct spdk_bdev *bdev; 229 230 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 231 if (bdev) { 232 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name); 233 } 234 235 return bdev; 236 } 237 238 struct spdk_bdev * 239 spdk_bdev_next(struct spdk_bdev *prev) 240 { 241 struct spdk_bdev *bdev; 242 243 bdev = TAILQ_NEXT(prev, link); 244 if (bdev) { 245 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 246 } 247 248 return bdev; 249 } 250 251 static struct spdk_bdev * 252 _bdev_next_leaf(struct spdk_bdev *bdev) 253 { 254 while (bdev != NULL) { 255 if (bdev->claim_module == NULL) { 256 return bdev; 257 } else { 258 bdev = TAILQ_NEXT(bdev, link); 259 } 260 } 261 262 return bdev; 263 } 264 265 struct spdk_bdev * 266 spdk_bdev_first_leaf(void) 267 { 268 struct spdk_bdev *bdev; 269 270 bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs)); 271 272 if (bdev) { 273 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Starting bdev iteration at %s\n", bdev->name); 274 } 275 276 return bdev; 277 } 278 279 struct spdk_bdev * 280 spdk_bdev_next_leaf(struct spdk_bdev *prev) 281 { 282 struct spdk_bdev *bdev; 283 284 bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link)); 285 286 if (bdev) { 287 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 288 } 289 290 return bdev; 291 } 292 293 struct spdk_bdev * 294 spdk_bdev_get_by_name(const char *bdev_name) 295 { 296 struct spdk_bdev_alias *tmp; 297 struct spdk_bdev *bdev = spdk_bdev_first(); 298 299 while (bdev != NULL) { 300 if (strcmp(bdev_name, bdev->name) == 0) { 301 return bdev; 302 } 303 304 TAILQ_FOREACH(tmp, &bdev->aliases, tailq) { 305 if (strcmp(bdev_name, tmp->alias) == 0) { 306 return bdev; 307 } 308 } 309 310 bdev = spdk_bdev_next(bdev); 311 } 312 313 return NULL; 314 } 315 316 static void 317 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 318 { 319 assert(bdev_io->get_buf_cb != NULL); 320 assert(buf != NULL); 321 assert(bdev_io->u.bdev.iovs != NULL); 322 323 bdev_io->buf = buf; 324 bdev_io->u.bdev.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 325 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->buf_len; 326 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 327 } 328 329 static void 330 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 331 { 332 struct spdk_mempool *pool; 333 struct spdk_bdev_io *tmp; 334 void *buf; 335 bdev_io_stailq_t *stailq; 336 struct spdk_bdev_mgmt_channel *ch; 337 338 assert(bdev_io->u.bdev.iovcnt == 1); 339 340 buf = bdev_io->buf; 341 ch = bdev_io->ch->shared_resource->mgmt_ch; 342 343 if (bdev_io->buf_len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 344 pool = g_bdev_mgr.buf_small_pool; 345 stailq = &ch->need_buf_small; 346 } else { 347 pool = g_bdev_mgr.buf_large_pool; 348 stailq = &ch->need_buf_large; 349 } 350 351 if (STAILQ_EMPTY(stailq)) { 352 spdk_mempool_put(pool, buf); 353 } else { 354 tmp = STAILQ_FIRST(stailq); 355 STAILQ_REMOVE_HEAD(stailq, buf_link); 356 spdk_bdev_io_set_buf(tmp, buf); 357 } 358 } 359 360 void 361 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len) 362 { 363 struct spdk_mempool *pool; 364 bdev_io_stailq_t *stailq; 365 void *buf = NULL; 366 struct spdk_bdev_mgmt_channel *mgmt_ch; 367 368 assert(cb != NULL); 369 assert(bdev_io->u.bdev.iovs != NULL); 370 371 if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) { 372 /* Buffer already present */ 373 cb(bdev_io->ch->channel, bdev_io); 374 return; 375 } 376 377 assert(len <= SPDK_BDEV_LARGE_BUF_MAX_SIZE); 378 mgmt_ch = bdev_io->ch->shared_resource->mgmt_ch; 379 380 bdev_io->buf_len = len; 381 bdev_io->get_buf_cb = cb; 382 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 383 pool = g_bdev_mgr.buf_small_pool; 384 stailq = &mgmt_ch->need_buf_small; 385 } else { 386 pool = g_bdev_mgr.buf_large_pool; 387 stailq = &mgmt_ch->need_buf_large; 388 } 389 390 buf = spdk_mempool_get(pool); 391 392 if (!buf) { 393 STAILQ_INSERT_TAIL(stailq, bdev_io, buf_link); 394 } else { 395 spdk_bdev_io_set_buf(bdev_io, buf); 396 } 397 } 398 399 static int 400 spdk_bdev_module_get_max_ctx_size(void) 401 { 402 struct spdk_bdev_module *bdev_module; 403 int max_bdev_module_size = 0; 404 405 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 406 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 407 max_bdev_module_size = bdev_module->get_ctx_size(); 408 } 409 } 410 411 return max_bdev_module_size; 412 } 413 414 void 415 spdk_bdev_config_text(FILE *fp) 416 { 417 struct spdk_bdev_module *bdev_module; 418 419 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 420 if (bdev_module->config_text) { 421 bdev_module->config_text(fp); 422 } 423 } 424 } 425 426 void 427 spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w) 428 { 429 struct spdk_bdev_module *bdev_module; 430 struct spdk_bdev *bdev; 431 432 assert(w != NULL); 433 434 spdk_json_write_array_begin(w); 435 436 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 437 if (bdev_module->config_json) { 438 bdev_module->config_json(w); 439 } 440 } 441 442 TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, link) { 443 spdk_bdev_config_json(bdev, w); 444 } 445 446 spdk_json_write_array_end(w); 447 } 448 449 static int 450 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf) 451 { 452 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 453 454 STAILQ_INIT(&ch->need_buf_small); 455 STAILQ_INIT(&ch->need_buf_large); 456 457 STAILQ_INIT(&ch->per_thread_cache); 458 ch->per_thread_cache_count = 0; 459 460 TAILQ_INIT(&ch->shared_resources); 461 462 return 0; 463 } 464 465 static void 466 spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf) 467 { 468 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 469 struct spdk_bdev_io *bdev_io; 470 471 if (!STAILQ_EMPTY(&ch->need_buf_small) || !STAILQ_EMPTY(&ch->need_buf_large)) { 472 SPDK_ERRLOG("Pending I/O list wasn't empty on mgmt channel free\n"); 473 } 474 475 if (!TAILQ_EMPTY(&ch->shared_resources)) { 476 SPDK_ERRLOG("Module channel list wasn't empty on mgmt channel free\n"); 477 } 478 479 while (!STAILQ_EMPTY(&ch->per_thread_cache)) { 480 bdev_io = STAILQ_FIRST(&ch->per_thread_cache); 481 STAILQ_REMOVE_HEAD(&ch->per_thread_cache, buf_link); 482 ch->per_thread_cache_count--; 483 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 484 } 485 486 assert(ch->per_thread_cache_count == 0); 487 } 488 489 static void 490 spdk_bdev_init_complete(int rc) 491 { 492 spdk_bdev_init_cb cb_fn = g_init_cb_fn; 493 void *cb_arg = g_init_cb_arg; 494 struct spdk_bdev_module *m; 495 496 g_bdev_mgr.init_complete = true; 497 g_init_cb_fn = NULL; 498 g_init_cb_arg = NULL; 499 500 /* 501 * For modules that need to know when subsystem init is complete, 502 * inform them now. 503 */ 504 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 505 if (m->init_complete) { 506 m->init_complete(); 507 } 508 } 509 510 cb_fn(cb_arg, rc); 511 } 512 513 static void 514 spdk_bdev_module_action_complete(void) 515 { 516 struct spdk_bdev_module *m; 517 518 /* 519 * Don't finish bdev subsystem initialization if 520 * module pre-initialization is still in progress, or 521 * the subsystem been already initialized. 522 */ 523 if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) { 524 return; 525 } 526 527 /* 528 * Check all bdev modules for inits/examinations in progress. If any 529 * exist, return immediately since we cannot finish bdev subsystem 530 * initialization until all are completed. 531 */ 532 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 533 if (m->action_in_progress > 0) { 534 return; 535 } 536 } 537 538 /* 539 * Modules already finished initialization - now that all 540 * the bdev modules have finished their asynchronous I/O 541 * processing, the entire bdev layer can be marked as complete. 542 */ 543 spdk_bdev_init_complete(0); 544 } 545 546 static void 547 spdk_bdev_module_action_done(struct spdk_bdev_module *module) 548 { 549 assert(module->action_in_progress > 0); 550 module->action_in_progress--; 551 spdk_bdev_module_action_complete(); 552 } 553 554 void 555 spdk_bdev_module_init_done(struct spdk_bdev_module *module) 556 { 557 spdk_bdev_module_action_done(module); 558 } 559 560 void 561 spdk_bdev_module_examine_done(struct spdk_bdev_module *module) 562 { 563 spdk_bdev_module_action_done(module); 564 } 565 566 static int 567 spdk_bdev_modules_init(void) 568 { 569 struct spdk_bdev_module *module; 570 int rc = 0; 571 572 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 573 rc = module->module_init(); 574 if (rc != 0) { 575 break; 576 } 577 } 578 579 g_bdev_mgr.module_init_complete = true; 580 return rc; 581 } 582 void 583 spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg) 584 { 585 int cache_size; 586 int rc = 0; 587 char mempool_name[32]; 588 589 assert(cb_fn != NULL); 590 591 g_init_cb_fn = cb_fn; 592 g_init_cb_arg = cb_arg; 593 594 snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid()); 595 596 g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name, 597 SPDK_BDEV_IO_POOL_SIZE, 598 sizeof(struct spdk_bdev_io) + 599 spdk_bdev_module_get_max_ctx_size(), 600 0, 601 SPDK_ENV_SOCKET_ID_ANY); 602 603 if (g_bdev_mgr.bdev_io_pool == NULL) { 604 SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n"); 605 spdk_bdev_init_complete(-1); 606 return; 607 } 608 609 /** 610 * Ensure no more than half of the total buffers end up local caches, by 611 * using spdk_env_get_core_count() to determine how many local caches we need 612 * to account for. 613 */ 614 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 615 snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid()); 616 617 g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name, 618 BUF_SMALL_POOL_SIZE, 619 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 620 cache_size, 621 SPDK_ENV_SOCKET_ID_ANY); 622 if (!g_bdev_mgr.buf_small_pool) { 623 SPDK_ERRLOG("create rbuf small pool failed\n"); 624 spdk_bdev_init_complete(-1); 625 return; 626 } 627 628 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 629 snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid()); 630 631 g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name, 632 BUF_LARGE_POOL_SIZE, 633 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 634 cache_size, 635 SPDK_ENV_SOCKET_ID_ANY); 636 if (!g_bdev_mgr.buf_large_pool) { 637 SPDK_ERRLOG("create rbuf large pool failed\n"); 638 spdk_bdev_init_complete(-1); 639 return; 640 } 641 642 g_bdev_mgr.zero_buffer = spdk_dma_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE, 643 NULL); 644 if (!g_bdev_mgr.zero_buffer) { 645 SPDK_ERRLOG("create bdev zero buffer failed\n"); 646 spdk_bdev_init_complete(-1); 647 return; 648 } 649 650 #ifdef SPDK_CONFIG_VTUNE 651 g_bdev_mgr.domain = __itt_domain_create("spdk_bdev"); 652 #endif 653 654 spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create, 655 spdk_bdev_mgmt_channel_destroy, 656 sizeof(struct spdk_bdev_mgmt_channel)); 657 658 rc = spdk_bdev_modules_init(); 659 if (rc != 0) { 660 SPDK_ERRLOG("bdev modules init failed\n"); 661 spdk_bdev_init_complete(-1); 662 return; 663 } 664 665 spdk_bdev_module_action_complete(); 666 } 667 668 static void 669 spdk_bdev_mgr_unregister_cb(void *io_device) 670 { 671 spdk_bdev_fini_cb cb_fn = g_fini_cb_fn; 672 673 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 674 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 675 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 676 SPDK_BDEV_IO_POOL_SIZE); 677 } 678 679 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 680 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 681 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 682 BUF_SMALL_POOL_SIZE); 683 assert(false); 684 } 685 686 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 687 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 688 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 689 BUF_LARGE_POOL_SIZE); 690 assert(false); 691 } 692 693 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 694 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 695 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 696 spdk_dma_free(g_bdev_mgr.zero_buffer); 697 698 cb_fn(g_fini_cb_arg); 699 g_fini_cb_fn = NULL; 700 g_fini_cb_arg = NULL; 701 } 702 703 static struct spdk_bdev_module *g_resume_bdev_module = NULL; 704 705 static void 706 spdk_bdev_module_finish_iter(void *arg) 707 { 708 struct spdk_bdev_module *bdev_module; 709 710 /* Start iterating from the last touched module */ 711 if (!g_resume_bdev_module) { 712 bdev_module = TAILQ_FIRST(&g_bdev_mgr.bdev_modules); 713 } else { 714 bdev_module = TAILQ_NEXT(g_resume_bdev_module, tailq); 715 } 716 717 while (bdev_module) { 718 if (bdev_module->async_fini) { 719 /* Save our place so we can resume later. We must 720 * save the variable here, before calling module_fini() 721 * below, because in some cases the module may immediately 722 * call spdk_bdev_module_finish_done() and re-enter 723 * this function to continue iterating. */ 724 g_resume_bdev_module = bdev_module; 725 } 726 727 if (bdev_module->module_fini) { 728 bdev_module->module_fini(); 729 } 730 731 if (bdev_module->async_fini) { 732 return; 733 } 734 735 bdev_module = TAILQ_NEXT(bdev_module, tailq); 736 } 737 738 g_resume_bdev_module = NULL; 739 spdk_io_device_unregister(&g_bdev_mgr, spdk_bdev_mgr_unregister_cb); 740 } 741 742 void 743 spdk_bdev_module_finish_done(void) 744 { 745 if (spdk_get_thread() != g_fini_thread) { 746 spdk_thread_send_msg(g_fini_thread, spdk_bdev_module_finish_iter, NULL); 747 } else { 748 spdk_bdev_module_finish_iter(NULL); 749 } 750 } 751 752 static void 753 _spdk_bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno) 754 { 755 struct spdk_bdev *bdev = cb_arg; 756 757 if (bdeverrno && bdev) { 758 SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n", 759 bdev->name); 760 761 /* 762 * Since the call to spdk_bdev_unregister() failed, we have no way to free this 763 * bdev; try to continue by manually removing this bdev from the list and continue 764 * with the next bdev in the list. 765 */ 766 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 767 } 768 769 if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) { 770 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Done unregistering bdevs\n"); 771 /* 772 * Bdev module finish need to be deffered as we might be in the middle of some context 773 * (like bdev part free) that will use this bdev (or private bdev driver ctx data) 774 * after returning. 775 */ 776 spdk_thread_send_msg(spdk_get_thread(), spdk_bdev_module_finish_iter, NULL); 777 return; 778 } 779 780 /* 781 * Unregister the first bdev in the list. 782 * 783 * spdk_bdev_unregister() will handle the case where the bdev has open descriptors by 784 * calling the remove_cb of the descriptors first. 785 * 786 * Once this bdev and all of its open descriptors have been cleaned up, this function 787 * will be called again via the unregister completion callback to continue the cleanup 788 * process with the next bdev. 789 */ 790 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 791 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Unregistering bdev '%s'\n", bdev->name); 792 spdk_bdev_unregister(bdev, _spdk_bdev_finish_unregister_bdevs_iter, bdev); 793 } 794 795 void 796 spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg) 797 { 798 assert(cb_fn != NULL); 799 800 g_fini_thread = spdk_get_thread(); 801 802 g_fini_cb_fn = cb_fn; 803 g_fini_cb_arg = cb_arg; 804 805 _spdk_bdev_finish_unregister_bdevs_iter(NULL, 0); 806 } 807 808 static struct spdk_bdev_io * 809 spdk_bdev_get_io(struct spdk_bdev_channel *channel) 810 { 811 struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch; 812 struct spdk_bdev_io *bdev_io; 813 814 if (ch->per_thread_cache_count > 0) { 815 bdev_io = STAILQ_FIRST(&ch->per_thread_cache); 816 STAILQ_REMOVE_HEAD(&ch->per_thread_cache, buf_link); 817 ch->per_thread_cache_count--; 818 } else { 819 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 820 if (!bdev_io) { 821 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 822 return NULL; 823 } 824 } 825 826 return bdev_io; 827 } 828 829 static void 830 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 831 { 832 struct spdk_bdev_mgmt_channel *ch = bdev_io->ch->shared_resource->mgmt_ch; 833 834 if (bdev_io->buf != NULL) { 835 spdk_bdev_io_put_buf(bdev_io); 836 } 837 838 if (ch->per_thread_cache_count < SPDK_BDEV_IO_CACHE_SIZE) { 839 ch->per_thread_cache_count++; 840 STAILQ_INSERT_TAIL(&ch->per_thread_cache, bdev_io, buf_link); 841 } else { 842 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 843 } 844 } 845 846 static void 847 _spdk_bdev_qos_io_submit(struct spdk_bdev_channel *ch) 848 { 849 struct spdk_bdev_io *bdev_io = NULL; 850 struct spdk_bdev *bdev = ch->bdev; 851 struct spdk_bdev_qos *qos = bdev->qos; 852 struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; 853 854 while (!TAILQ_EMPTY(&qos->queued)) { 855 if (qos->io_submitted_this_timeslice < qos->max_ios_per_timeslice) { 856 bdev_io = TAILQ_FIRST(&qos->queued); 857 TAILQ_REMOVE(&qos->queued, bdev_io, link); 858 qos->io_submitted_this_timeslice++; 859 ch->io_outstanding++; 860 shared_resource->io_outstanding++; 861 bdev->fn_table->submit_request(ch->channel, bdev_io); 862 } else { 863 break; 864 } 865 } 866 } 867 868 static void 869 _spdk_bdev_io_submit(void *ctx) 870 { 871 struct spdk_bdev_io *bdev_io = ctx; 872 struct spdk_bdev *bdev = bdev_io->bdev; 873 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 874 struct spdk_io_channel *ch = bdev_ch->channel; 875 struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource; 876 877 bdev_io->submit_tsc = spdk_get_ticks(); 878 bdev_ch->io_outstanding++; 879 shared_resource->io_outstanding++; 880 bdev_io->in_submit_request = true; 881 if (spdk_likely(bdev_ch->flags == 0)) { 882 if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) { 883 bdev->fn_table->submit_request(ch, bdev_io); 884 } else { 885 bdev_ch->io_outstanding--; 886 shared_resource->io_outstanding--; 887 TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, link); 888 } 889 } else if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) { 890 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 891 } else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) { 892 bdev_ch->io_outstanding--; 893 shared_resource->io_outstanding--; 894 TAILQ_INSERT_TAIL(&bdev->qos->queued, bdev_io, link); 895 _spdk_bdev_qos_io_submit(bdev_ch); 896 } else { 897 SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags); 898 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 899 } 900 bdev_io->in_submit_request = false; 901 } 902 903 static void 904 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 905 { 906 struct spdk_bdev *bdev = bdev_io->bdev; 907 908 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 909 910 if (bdev_io->ch->flags & BDEV_CH_QOS_ENABLED) { 911 bdev_io->io_submit_ch = bdev_io->ch; 912 bdev_io->ch = bdev->qos->ch; 913 spdk_thread_send_msg(bdev->qos->thread, _spdk_bdev_io_submit, bdev_io); 914 } else { 915 _spdk_bdev_io_submit(bdev_io); 916 } 917 } 918 919 static void 920 spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io) 921 { 922 struct spdk_bdev *bdev = bdev_io->bdev; 923 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 924 struct spdk_io_channel *ch = bdev_ch->channel; 925 926 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 927 928 bdev_io->in_submit_request = true; 929 bdev->fn_table->submit_request(ch, bdev_io); 930 bdev_io->in_submit_request = false; 931 } 932 933 static void 934 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 935 struct spdk_bdev *bdev, void *cb_arg, 936 spdk_bdev_io_completion_cb cb) 937 { 938 bdev_io->bdev = bdev; 939 bdev_io->caller_ctx = cb_arg; 940 bdev_io->cb = cb; 941 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 942 bdev_io->in_submit_request = false; 943 bdev_io->buf = NULL; 944 bdev_io->io_submit_ch = NULL; 945 } 946 947 bool 948 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 949 { 950 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 951 } 952 953 int 954 spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 955 { 956 if (bdev->fn_table->dump_info_json) { 957 return bdev->fn_table->dump_info_json(bdev->ctxt, w); 958 } 959 960 return 0; 961 } 962 963 void 964 spdk_bdev_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 965 { 966 assert(bdev != NULL); 967 assert(w != NULL); 968 969 if (bdev->fn_table->write_config_json) { 970 bdev->fn_table->write_config_json(bdev, w); 971 } else { 972 spdk_json_write_object_begin(w); 973 spdk_json_write_named_string(w, "name", bdev->name); 974 spdk_json_write_object_end(w); 975 } 976 } 977 978 static void 979 spdk_bdev_qos_update_max_ios_per_timeslice(struct spdk_bdev_qos *qos) 980 { 981 uint64_t max_ios_per_timeslice = 0; 982 983 max_ios_per_timeslice = qos->rate_limit * SPDK_BDEV_QOS_TIMESLICE_IN_USEC / 984 SPDK_BDEV_SEC_TO_USEC; 985 qos->max_ios_per_timeslice = spdk_max(max_ios_per_timeslice, 986 SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE); 987 } 988 989 static int 990 spdk_bdev_channel_poll_qos(void *arg) 991 { 992 struct spdk_bdev_qos *qos = arg; 993 994 /* Reset for next round of rate limiting */ 995 qos->io_submitted_this_timeslice = 0; 996 997 _spdk_bdev_qos_io_submit(qos->ch); 998 999 return -1; 1000 } 1001 1002 static void 1003 _spdk_bdev_channel_destroy_resource(struct spdk_bdev_channel *ch) 1004 { 1005 struct spdk_bdev_shared_resource *shared_resource; 1006 1007 if (!ch) { 1008 return; 1009 } 1010 1011 if (ch->channel) { 1012 spdk_put_io_channel(ch->channel); 1013 } 1014 1015 assert(ch->io_outstanding == 0); 1016 1017 shared_resource = ch->shared_resource; 1018 if (shared_resource) { 1019 assert(ch->io_outstanding == 0); 1020 assert(shared_resource->ref > 0); 1021 shared_resource->ref--; 1022 if (shared_resource->ref == 0) { 1023 assert(shared_resource->io_outstanding == 0); 1024 spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch)); 1025 TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link); 1026 free(shared_resource); 1027 } 1028 } 1029 } 1030 1031 /* Caller must hold bdev->mutex. */ 1032 static int 1033 _spdk_bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch) 1034 { 1035 struct spdk_bdev_qos *qos = bdev->qos; 1036 1037 /* Rate limiting on this bdev enabled */ 1038 if (qos) { 1039 if (qos->ch == NULL) { 1040 struct spdk_io_channel *io_ch; 1041 1042 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch, 1043 bdev->name, spdk_get_thread()); 1044 1045 /* No qos channel has been selected, so set one up */ 1046 1047 /* Take another reference to ch */ 1048 io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev)); 1049 qos->ch = ch; 1050 1051 qos->thread = spdk_io_channel_get_thread(io_ch); 1052 1053 TAILQ_INIT(&qos->queued); 1054 spdk_bdev_qos_update_max_ios_per_timeslice(qos); 1055 qos->io_submitted_this_timeslice = 0; 1056 1057 qos->poller = spdk_poller_register(spdk_bdev_channel_poll_qos, 1058 qos, 1059 SPDK_BDEV_QOS_TIMESLICE_IN_USEC); 1060 } 1061 1062 ch->flags |= BDEV_CH_QOS_ENABLED; 1063 } 1064 1065 return 0; 1066 } 1067 1068 static int 1069 spdk_bdev_channel_create(void *io_device, void *ctx_buf) 1070 { 1071 struct spdk_bdev *bdev = __bdev_from_io_dev(io_device); 1072 struct spdk_bdev_channel *ch = ctx_buf; 1073 struct spdk_io_channel *mgmt_io_ch; 1074 struct spdk_bdev_mgmt_channel *mgmt_ch; 1075 struct spdk_bdev_shared_resource *shared_resource; 1076 1077 ch->bdev = bdev; 1078 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt); 1079 if (!ch->channel) { 1080 return -1; 1081 } 1082 1083 mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr); 1084 if (!mgmt_io_ch) { 1085 return -1; 1086 } 1087 1088 mgmt_ch = spdk_io_channel_get_ctx(mgmt_io_ch); 1089 TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) { 1090 if (shared_resource->shared_ch == ch->channel) { 1091 spdk_put_io_channel(mgmt_io_ch); 1092 shared_resource->ref++; 1093 break; 1094 } 1095 } 1096 1097 if (shared_resource == NULL) { 1098 shared_resource = calloc(1, sizeof(*shared_resource)); 1099 if (shared_resource == NULL) { 1100 spdk_put_io_channel(mgmt_io_ch); 1101 return -1; 1102 } 1103 1104 shared_resource->mgmt_ch = mgmt_ch; 1105 shared_resource->io_outstanding = 0; 1106 TAILQ_INIT(&shared_resource->nomem_io); 1107 shared_resource->nomem_threshold = 0; 1108 shared_resource->shared_ch = ch->channel; 1109 shared_resource->ref = 1; 1110 TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link); 1111 } 1112 1113 memset(&ch->stat, 0, sizeof(ch->stat)); 1114 ch->io_outstanding = 0; 1115 TAILQ_INIT(&ch->queued_resets); 1116 ch->flags = 0; 1117 ch->shared_resource = shared_resource; 1118 1119 #ifdef SPDK_CONFIG_VTUNE 1120 { 1121 char *name; 1122 __itt_init_ittlib(NULL, 0); 1123 name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch); 1124 if (!name) { 1125 _spdk_bdev_channel_destroy_resource(ch); 1126 return -1; 1127 } 1128 ch->handle = __itt_string_handle_create(name); 1129 free(name); 1130 ch->start_tsc = spdk_get_ticks(); 1131 ch->interval_tsc = spdk_get_ticks_hz() / 100; 1132 } 1133 #endif 1134 1135 pthread_mutex_lock(&bdev->mutex); 1136 1137 if (_spdk_bdev_enable_qos(bdev, ch)) { 1138 _spdk_bdev_channel_destroy_resource(ch); 1139 pthread_mutex_unlock(&bdev->mutex); 1140 return -1; 1141 } 1142 1143 pthread_mutex_unlock(&bdev->mutex); 1144 1145 return 0; 1146 } 1147 1148 /* 1149 * Abort I/O that are waiting on a data buffer. These types of I/O are 1150 * linked using the spdk_bdev_io buf_link TAILQ_ENTRY. 1151 */ 1152 static void 1153 _spdk_bdev_abort_buf_io(bdev_io_stailq_t *queue, struct spdk_bdev_channel *ch) 1154 { 1155 bdev_io_stailq_t tmp; 1156 struct spdk_bdev_io *bdev_io; 1157 1158 STAILQ_INIT(&tmp); 1159 1160 while (!STAILQ_EMPTY(queue)) { 1161 bdev_io = STAILQ_FIRST(queue); 1162 STAILQ_REMOVE_HEAD(queue, buf_link); 1163 if (bdev_io->ch == ch) { 1164 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1165 } else { 1166 STAILQ_INSERT_TAIL(&tmp, bdev_io, buf_link); 1167 } 1168 } 1169 1170 STAILQ_SWAP(&tmp, queue, spdk_bdev_io); 1171 } 1172 1173 /* 1174 * Abort I/O that are queued waiting for submission. These types of I/O are 1175 * linked using the spdk_bdev_io link TAILQ_ENTRY. 1176 */ 1177 static void 1178 _spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch) 1179 { 1180 struct spdk_bdev_io *bdev_io, *tmp; 1181 1182 TAILQ_FOREACH_SAFE(bdev_io, queue, link, tmp) { 1183 if (bdev_io->ch == ch) { 1184 TAILQ_REMOVE(queue, bdev_io, link); 1185 /* 1186 * spdk_bdev_io_complete() assumes that the completed I/O had 1187 * been submitted to the bdev module. Since in this case it 1188 * hadn't, bump io_outstanding to account for the decrement 1189 * that spdk_bdev_io_complete() will do. 1190 */ 1191 if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) { 1192 ch->io_outstanding++; 1193 ch->shared_resource->io_outstanding++; 1194 } 1195 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1196 } 1197 } 1198 } 1199 1200 static void 1201 spdk_bdev_qos_channel_destroy(void *cb_arg) 1202 { 1203 struct spdk_bdev_qos *qos = cb_arg; 1204 1205 spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch)); 1206 spdk_poller_unregister(&qos->poller); 1207 1208 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Free QoS %p.\n", qos); 1209 1210 free(qos); 1211 } 1212 1213 static int 1214 spdk_bdev_qos_destroy(struct spdk_bdev *bdev) 1215 { 1216 /* 1217 * Cleanly shutting down the QoS poller is tricky, because 1218 * during the asynchronous operation the user could open 1219 * a new descriptor and create a new channel, spawning 1220 * a new QoS poller. 1221 * 1222 * The strategy is to create a new QoS structure here and swap it 1223 * in. The shutdown path then continues to refer to the old one 1224 * until it completes and then releases it. 1225 */ 1226 struct spdk_bdev_qos *new_qos, *old_qos; 1227 1228 old_qos = bdev->qos; 1229 1230 new_qos = calloc(1, sizeof(*new_qos)); 1231 if (!new_qos) { 1232 SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n"); 1233 return -ENOMEM; 1234 } 1235 1236 /* Copy the old QoS data into the newly allocated structure */ 1237 memcpy(new_qos, old_qos, sizeof(*new_qos)); 1238 1239 /* Zero out the key parts of the QoS structure */ 1240 new_qos->ch = NULL; 1241 new_qos->thread = NULL; 1242 new_qos->max_ios_per_timeslice = 0; 1243 new_qos->io_submitted_this_timeslice = 0; 1244 new_qos->poller = NULL; 1245 TAILQ_INIT(&new_qos->queued); 1246 1247 bdev->qos = new_qos; 1248 1249 spdk_thread_send_msg(old_qos->thread, spdk_bdev_qos_channel_destroy, 1250 old_qos); 1251 1252 /* It is safe to continue with destroying the bdev even though the QoS channel hasn't 1253 * been destroyed yet. The destruction path will end up waiting for the final 1254 * channel to be put before it releases resources. */ 1255 1256 return 0; 1257 } 1258 1259 static void 1260 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 1261 { 1262 struct spdk_bdev_channel *ch = ctx_buf; 1263 struct spdk_bdev_mgmt_channel *mgmt_ch; 1264 struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource; 1265 1266 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name, 1267 spdk_get_thread()); 1268 1269 mgmt_ch = shared_resource->mgmt_ch; 1270 1271 _spdk_bdev_abort_queued_io(&ch->queued_resets, ch); 1272 _spdk_bdev_abort_queued_io(&shared_resource->nomem_io, ch); 1273 _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_small, ch); 1274 _spdk_bdev_abort_buf_io(&mgmt_ch->need_buf_large, ch); 1275 1276 _spdk_bdev_channel_destroy_resource(ch); 1277 } 1278 1279 int 1280 spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias) 1281 { 1282 struct spdk_bdev_alias *tmp; 1283 1284 if (alias == NULL) { 1285 SPDK_ERRLOG("Empty alias passed\n"); 1286 return -EINVAL; 1287 } 1288 1289 if (spdk_bdev_get_by_name(alias)) { 1290 SPDK_ERRLOG("Bdev name/alias: %s already exists\n", alias); 1291 return -EEXIST; 1292 } 1293 1294 tmp = calloc(1, sizeof(*tmp)); 1295 if (tmp == NULL) { 1296 SPDK_ERRLOG("Unable to allocate alias\n"); 1297 return -ENOMEM; 1298 } 1299 1300 tmp->alias = strdup(alias); 1301 if (tmp->alias == NULL) { 1302 free(tmp); 1303 SPDK_ERRLOG("Unable to allocate alias\n"); 1304 return -ENOMEM; 1305 } 1306 1307 TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq); 1308 1309 return 0; 1310 } 1311 1312 int 1313 spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias) 1314 { 1315 struct spdk_bdev_alias *tmp; 1316 1317 TAILQ_FOREACH(tmp, &bdev->aliases, tailq) { 1318 if (strcmp(alias, tmp->alias) == 0) { 1319 TAILQ_REMOVE(&bdev->aliases, tmp, tailq); 1320 free(tmp->alias); 1321 free(tmp); 1322 return 0; 1323 } 1324 } 1325 1326 SPDK_INFOLOG(SPDK_LOG_BDEV, "Alias %s does not exists\n", alias); 1327 1328 return -ENOENT; 1329 } 1330 1331 struct spdk_io_channel * 1332 spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) 1333 { 1334 return spdk_get_io_channel(__bdev_to_io_dev(desc->bdev)); 1335 } 1336 1337 const char * 1338 spdk_bdev_get_name(const struct spdk_bdev *bdev) 1339 { 1340 return bdev->name; 1341 } 1342 1343 const char * 1344 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 1345 { 1346 return bdev->product_name; 1347 } 1348 1349 const struct spdk_bdev_aliases_list * 1350 spdk_bdev_get_aliases(const struct spdk_bdev *bdev) 1351 { 1352 return &bdev->aliases; 1353 } 1354 1355 uint32_t 1356 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 1357 { 1358 return bdev->blocklen; 1359 } 1360 1361 uint64_t 1362 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 1363 { 1364 return bdev->blockcnt; 1365 } 1366 1367 uint64_t 1368 spdk_bdev_get_qos_ios_per_sec(struct spdk_bdev *bdev) 1369 { 1370 uint64_t rate_limit = 0; 1371 1372 pthread_mutex_lock(&bdev->mutex); 1373 if (bdev->qos) { 1374 rate_limit = bdev->qos->rate_limit; 1375 } 1376 pthread_mutex_unlock(&bdev->mutex); 1377 1378 return rate_limit; 1379 } 1380 1381 size_t 1382 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 1383 { 1384 /* TODO: push this logic down to the bdev modules */ 1385 if (bdev->need_aligned_buffer) { 1386 return bdev->blocklen; 1387 } 1388 1389 return 1; 1390 } 1391 1392 uint32_t 1393 spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev) 1394 { 1395 return bdev->optimal_io_boundary; 1396 } 1397 1398 bool 1399 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 1400 { 1401 return bdev->write_cache; 1402 } 1403 1404 const struct spdk_uuid * 1405 spdk_bdev_get_uuid(const struct spdk_bdev *bdev) 1406 { 1407 return &bdev->uuid; 1408 } 1409 1410 int 1411 spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size) 1412 { 1413 int ret; 1414 1415 pthread_mutex_lock(&bdev->mutex); 1416 1417 /* bdev has open descriptors */ 1418 if (!TAILQ_EMPTY(&bdev->open_descs) && 1419 bdev->blockcnt > size) { 1420 ret = -EBUSY; 1421 } else { 1422 bdev->blockcnt = size; 1423 ret = 0; 1424 } 1425 1426 pthread_mutex_unlock(&bdev->mutex); 1427 1428 return ret; 1429 } 1430 1431 /* 1432 * Convert I/O offset and length from bytes to blocks. 1433 * 1434 * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size. 1435 */ 1436 static uint64_t 1437 spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks, 1438 uint64_t num_bytes, uint64_t *num_blocks) 1439 { 1440 uint32_t block_size = bdev->blocklen; 1441 1442 *offset_blocks = offset_bytes / block_size; 1443 *num_blocks = num_bytes / block_size; 1444 1445 return (offset_bytes % block_size) | (num_bytes % block_size); 1446 } 1447 1448 static bool 1449 spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks) 1450 { 1451 /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there 1452 * has been an overflow and hence the offset has been wrapped around */ 1453 if (offset_blocks + num_blocks < offset_blocks) { 1454 return false; 1455 } 1456 1457 /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */ 1458 if (offset_blocks + num_blocks > bdev->blockcnt) { 1459 return false; 1460 } 1461 1462 return true; 1463 } 1464 1465 int 1466 spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1467 void *buf, uint64_t offset, uint64_t nbytes, 1468 spdk_bdev_io_completion_cb cb, void *cb_arg) 1469 { 1470 uint64_t offset_blocks, num_blocks; 1471 1472 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1473 return -EINVAL; 1474 } 1475 1476 return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 1477 } 1478 1479 int 1480 spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1481 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 1482 spdk_bdev_io_completion_cb cb, void *cb_arg) 1483 { 1484 struct spdk_bdev *bdev = desc->bdev; 1485 struct spdk_bdev_io *bdev_io; 1486 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1487 1488 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1489 return -EINVAL; 1490 } 1491 1492 bdev_io = spdk_bdev_get_io(channel); 1493 if (!bdev_io) { 1494 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 1495 return -ENOMEM; 1496 } 1497 1498 bdev_io->ch = channel; 1499 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 1500 bdev_io->u.bdev.iov.iov_base = buf; 1501 bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen; 1502 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1503 bdev_io->u.bdev.iovcnt = 1; 1504 bdev_io->u.bdev.num_blocks = num_blocks; 1505 bdev_io->u.bdev.offset_blocks = offset_blocks; 1506 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1507 1508 spdk_bdev_io_submit(bdev_io); 1509 return 0; 1510 } 1511 1512 int 1513 spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1514 struct iovec *iov, int iovcnt, 1515 uint64_t offset, uint64_t nbytes, 1516 spdk_bdev_io_completion_cb cb, void *cb_arg) 1517 { 1518 uint64_t offset_blocks, num_blocks; 1519 1520 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1521 return -EINVAL; 1522 } 1523 1524 return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 1525 } 1526 1527 int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1528 struct iovec *iov, int iovcnt, 1529 uint64_t offset_blocks, uint64_t num_blocks, 1530 spdk_bdev_io_completion_cb cb, void *cb_arg) 1531 { 1532 struct spdk_bdev *bdev = desc->bdev; 1533 struct spdk_bdev_io *bdev_io; 1534 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1535 1536 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1537 return -EINVAL; 1538 } 1539 1540 bdev_io = spdk_bdev_get_io(channel); 1541 if (!bdev_io) { 1542 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 1543 return -ENOMEM; 1544 } 1545 1546 bdev_io->ch = channel; 1547 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 1548 bdev_io->u.bdev.iovs = iov; 1549 bdev_io->u.bdev.iovcnt = iovcnt; 1550 bdev_io->u.bdev.num_blocks = num_blocks; 1551 bdev_io->u.bdev.offset_blocks = offset_blocks; 1552 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1553 1554 spdk_bdev_io_submit(bdev_io); 1555 return 0; 1556 } 1557 1558 int 1559 spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1560 void *buf, uint64_t offset, uint64_t nbytes, 1561 spdk_bdev_io_completion_cb cb, void *cb_arg) 1562 { 1563 uint64_t offset_blocks, num_blocks; 1564 1565 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1566 return -EINVAL; 1567 } 1568 1569 return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 1570 } 1571 1572 int 1573 spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1574 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 1575 spdk_bdev_io_completion_cb cb, void *cb_arg) 1576 { 1577 struct spdk_bdev *bdev = desc->bdev; 1578 struct spdk_bdev_io *bdev_io; 1579 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1580 1581 if (!desc->write) { 1582 return -EBADF; 1583 } 1584 1585 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1586 return -EINVAL; 1587 } 1588 1589 bdev_io = spdk_bdev_get_io(channel); 1590 if (!bdev_io) { 1591 SPDK_ERRLOG("bdev_io memory allocation failed duing write\n"); 1592 return -ENOMEM; 1593 } 1594 1595 bdev_io->ch = channel; 1596 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 1597 bdev_io->u.bdev.iov.iov_base = buf; 1598 bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen; 1599 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1600 bdev_io->u.bdev.iovcnt = 1; 1601 bdev_io->u.bdev.num_blocks = num_blocks; 1602 bdev_io->u.bdev.offset_blocks = offset_blocks; 1603 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1604 1605 spdk_bdev_io_submit(bdev_io); 1606 return 0; 1607 } 1608 1609 int 1610 spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1611 struct iovec *iov, int iovcnt, 1612 uint64_t offset, uint64_t len, 1613 spdk_bdev_io_completion_cb cb, void *cb_arg) 1614 { 1615 uint64_t offset_blocks, num_blocks; 1616 1617 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 1618 return -EINVAL; 1619 } 1620 1621 return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 1622 } 1623 1624 int 1625 spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1626 struct iovec *iov, int iovcnt, 1627 uint64_t offset_blocks, uint64_t num_blocks, 1628 spdk_bdev_io_completion_cb cb, void *cb_arg) 1629 { 1630 struct spdk_bdev *bdev = desc->bdev; 1631 struct spdk_bdev_io *bdev_io; 1632 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1633 1634 if (!desc->write) { 1635 return -EBADF; 1636 } 1637 1638 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1639 return -EINVAL; 1640 } 1641 1642 bdev_io = spdk_bdev_get_io(channel); 1643 if (!bdev_io) { 1644 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 1645 return -ENOMEM; 1646 } 1647 1648 bdev_io->ch = channel; 1649 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 1650 bdev_io->u.bdev.iovs = iov; 1651 bdev_io->u.bdev.iovcnt = iovcnt; 1652 bdev_io->u.bdev.num_blocks = num_blocks; 1653 bdev_io->u.bdev.offset_blocks = offset_blocks; 1654 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1655 1656 spdk_bdev_io_submit(bdev_io); 1657 return 0; 1658 } 1659 1660 int 1661 spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1662 uint64_t offset, uint64_t len, 1663 spdk_bdev_io_completion_cb cb, void *cb_arg) 1664 { 1665 uint64_t offset_blocks, num_blocks; 1666 1667 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 1668 return -EINVAL; 1669 } 1670 1671 return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1672 } 1673 1674 int 1675 spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1676 uint64_t offset_blocks, uint64_t num_blocks, 1677 spdk_bdev_io_completion_cb cb, void *cb_arg) 1678 { 1679 struct spdk_bdev *bdev = desc->bdev; 1680 struct spdk_bdev_io *bdev_io; 1681 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1682 uint64_t len; 1683 bool split_request = false; 1684 1685 if (num_blocks > UINT64_MAX / spdk_bdev_get_block_size(bdev)) { 1686 SPDK_ERRLOG("length argument out of range in write_zeroes\n"); 1687 return -ERANGE; 1688 } 1689 1690 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1691 return -EINVAL; 1692 } 1693 1694 bdev_io = spdk_bdev_get_io(channel); 1695 1696 if (!bdev_io) { 1697 SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n"); 1698 return -ENOMEM; 1699 } 1700 1701 bdev_io->ch = channel; 1702 bdev_io->u.bdev.offset_blocks = offset_blocks; 1703 1704 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) { 1705 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 1706 bdev_io->u.bdev.num_blocks = num_blocks; 1707 bdev_io->u.bdev.iovs = NULL; 1708 bdev_io->u.bdev.iovcnt = 0; 1709 1710 } else { 1711 assert(spdk_bdev_get_block_size(bdev) <= ZERO_BUFFER_SIZE); 1712 1713 len = spdk_bdev_get_block_size(bdev) * num_blocks; 1714 1715 if (len > ZERO_BUFFER_SIZE) { 1716 split_request = true; 1717 len = ZERO_BUFFER_SIZE; 1718 } 1719 1720 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 1721 bdev_io->u.bdev.iov.iov_base = g_bdev_mgr.zero_buffer; 1722 bdev_io->u.bdev.iov.iov_len = len; 1723 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1724 bdev_io->u.bdev.iovcnt = 1; 1725 bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev); 1726 bdev_io->u.bdev.split_remaining_num_blocks = num_blocks - bdev_io->u.bdev.num_blocks; 1727 bdev_io->u.bdev.split_current_offset_blocks = offset_blocks + bdev_io->u.bdev.num_blocks; 1728 } 1729 1730 if (split_request) { 1731 bdev_io->u.bdev.stored_user_cb = cb; 1732 spdk_bdev_io_init(bdev_io, bdev, cb_arg, spdk_bdev_write_zeroes_split); 1733 } else { 1734 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1735 } 1736 spdk_bdev_io_submit(bdev_io); 1737 return 0; 1738 } 1739 1740 int 1741 spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1742 uint64_t offset, uint64_t nbytes, 1743 spdk_bdev_io_completion_cb cb, void *cb_arg) 1744 { 1745 uint64_t offset_blocks, num_blocks; 1746 1747 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1748 return -EINVAL; 1749 } 1750 1751 return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1752 } 1753 1754 int 1755 spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1756 uint64_t offset_blocks, uint64_t num_blocks, 1757 spdk_bdev_io_completion_cb cb, void *cb_arg) 1758 { 1759 struct spdk_bdev *bdev = desc->bdev; 1760 struct spdk_bdev_io *bdev_io; 1761 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1762 1763 if (!desc->write) { 1764 return -EBADF; 1765 } 1766 1767 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1768 return -EINVAL; 1769 } 1770 1771 if (num_blocks == 0) { 1772 SPDK_ERRLOG("Can't unmap 0 bytes\n"); 1773 return -EINVAL; 1774 } 1775 1776 bdev_io = spdk_bdev_get_io(channel); 1777 if (!bdev_io) { 1778 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 1779 return -ENOMEM; 1780 } 1781 1782 bdev_io->ch = channel; 1783 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 1784 bdev_io->u.bdev.iov.iov_base = NULL; 1785 bdev_io->u.bdev.iov.iov_len = 0; 1786 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1787 bdev_io->u.bdev.iovcnt = 1; 1788 bdev_io->u.bdev.offset_blocks = offset_blocks; 1789 bdev_io->u.bdev.num_blocks = num_blocks; 1790 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1791 1792 spdk_bdev_io_submit(bdev_io); 1793 return 0; 1794 } 1795 1796 int 1797 spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1798 uint64_t offset, uint64_t length, 1799 spdk_bdev_io_completion_cb cb, void *cb_arg) 1800 { 1801 uint64_t offset_blocks, num_blocks; 1802 1803 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) { 1804 return -EINVAL; 1805 } 1806 1807 return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1808 } 1809 1810 int 1811 spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1812 uint64_t offset_blocks, uint64_t num_blocks, 1813 spdk_bdev_io_completion_cb cb, void *cb_arg) 1814 { 1815 struct spdk_bdev *bdev = desc->bdev; 1816 struct spdk_bdev_io *bdev_io; 1817 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1818 1819 if (!desc->write) { 1820 return -EBADF; 1821 } 1822 1823 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1824 return -EINVAL; 1825 } 1826 1827 bdev_io = spdk_bdev_get_io(channel); 1828 if (!bdev_io) { 1829 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 1830 return -ENOMEM; 1831 } 1832 1833 bdev_io->ch = channel; 1834 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 1835 bdev_io->u.bdev.iovs = NULL; 1836 bdev_io->u.bdev.iovcnt = 0; 1837 bdev_io->u.bdev.offset_blocks = offset_blocks; 1838 bdev_io->u.bdev.num_blocks = num_blocks; 1839 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1840 1841 spdk_bdev_io_submit(bdev_io); 1842 return 0; 1843 } 1844 1845 static void 1846 _spdk_bdev_reset_dev(struct spdk_io_channel_iter *i, int status) 1847 { 1848 struct spdk_bdev_channel *ch = spdk_io_channel_iter_get_ctx(i); 1849 struct spdk_bdev_io *bdev_io; 1850 1851 bdev_io = TAILQ_FIRST(&ch->queued_resets); 1852 TAILQ_REMOVE(&ch->queued_resets, bdev_io, link); 1853 spdk_bdev_io_submit_reset(bdev_io); 1854 } 1855 1856 static void 1857 _spdk_bdev_reset_freeze_channel(struct spdk_io_channel_iter *i) 1858 { 1859 struct spdk_io_channel *ch; 1860 struct spdk_bdev_channel *channel; 1861 struct spdk_bdev_mgmt_channel *mgmt_channel; 1862 struct spdk_bdev_shared_resource *shared_resource; 1863 bdev_io_tailq_t tmp_queued; 1864 1865 TAILQ_INIT(&tmp_queued); 1866 1867 ch = spdk_io_channel_iter_get_channel(i); 1868 channel = spdk_io_channel_get_ctx(ch); 1869 shared_resource = channel->shared_resource; 1870 mgmt_channel = shared_resource->mgmt_ch; 1871 1872 channel->flags |= BDEV_CH_RESET_IN_PROGRESS; 1873 1874 if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) { 1875 /* The QoS object is always valid and readable while 1876 * the channel flag is set, so the lock here should not 1877 * be necessary. We're not in the fast path though, so 1878 * just take it anyway. */ 1879 pthread_mutex_lock(&channel->bdev->mutex); 1880 if (channel->bdev->qos->ch == channel) { 1881 TAILQ_SWAP(&channel->bdev->qos->queued, &tmp_queued, spdk_bdev_io, link); 1882 } 1883 pthread_mutex_unlock(&channel->bdev->mutex); 1884 } 1885 1886 _spdk_bdev_abort_queued_io(&shared_resource->nomem_io, channel); 1887 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel); 1888 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel); 1889 _spdk_bdev_abort_queued_io(&tmp_queued, channel); 1890 1891 spdk_for_each_channel_continue(i, 0); 1892 } 1893 1894 static void 1895 _spdk_bdev_start_reset(void *ctx) 1896 { 1897 struct spdk_bdev_channel *ch = ctx; 1898 1899 spdk_for_each_channel(__bdev_to_io_dev(ch->bdev), _spdk_bdev_reset_freeze_channel, 1900 ch, _spdk_bdev_reset_dev); 1901 } 1902 1903 static void 1904 _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch) 1905 { 1906 struct spdk_bdev *bdev = ch->bdev; 1907 1908 assert(!TAILQ_EMPTY(&ch->queued_resets)); 1909 1910 pthread_mutex_lock(&bdev->mutex); 1911 if (bdev->reset_in_progress == NULL) { 1912 bdev->reset_in_progress = TAILQ_FIRST(&ch->queued_resets); 1913 /* 1914 * Take a channel reference for the target bdev for the life of this 1915 * reset. This guards against the channel getting destroyed while 1916 * spdk_for_each_channel() calls related to this reset IO are in 1917 * progress. We will release the reference when this reset is 1918 * completed. 1919 */ 1920 bdev->reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev)); 1921 _spdk_bdev_start_reset(ch); 1922 } 1923 pthread_mutex_unlock(&bdev->mutex); 1924 } 1925 1926 int 1927 spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1928 spdk_bdev_io_completion_cb cb, void *cb_arg) 1929 { 1930 struct spdk_bdev *bdev = desc->bdev; 1931 struct spdk_bdev_io *bdev_io; 1932 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1933 1934 bdev_io = spdk_bdev_get_io(channel); 1935 if (!bdev_io) { 1936 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 1937 return -ENOMEM; 1938 } 1939 1940 bdev_io->ch = channel; 1941 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 1942 bdev_io->u.reset.ch_ref = NULL; 1943 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1944 1945 pthread_mutex_lock(&bdev->mutex); 1946 TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, link); 1947 pthread_mutex_unlock(&bdev->mutex); 1948 1949 _spdk_bdev_channel_start_reset(channel); 1950 1951 return 0; 1952 } 1953 1954 void 1955 spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 1956 struct spdk_bdev_io_stat *stat) 1957 { 1958 #ifdef SPDK_CONFIG_VTUNE 1959 SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n"); 1960 memset(stat, 0, sizeof(*stat)); 1961 return; 1962 #endif 1963 1964 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1965 1966 channel->stat.ticks_rate = spdk_get_ticks_hz(); 1967 *stat = channel->stat; 1968 memset(&channel->stat, 0, sizeof(channel->stat)); 1969 } 1970 1971 int 1972 spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1973 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1974 spdk_bdev_io_completion_cb cb, void *cb_arg) 1975 { 1976 struct spdk_bdev *bdev = desc->bdev; 1977 struct spdk_bdev_io *bdev_io; 1978 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1979 1980 if (!desc->write) { 1981 return -EBADF; 1982 } 1983 1984 bdev_io = spdk_bdev_get_io(channel); 1985 if (!bdev_io) { 1986 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1987 return -ENOMEM; 1988 } 1989 1990 bdev_io->ch = channel; 1991 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN; 1992 bdev_io->u.nvme_passthru.cmd = *cmd; 1993 bdev_io->u.nvme_passthru.buf = buf; 1994 bdev_io->u.nvme_passthru.nbytes = nbytes; 1995 bdev_io->u.nvme_passthru.md_buf = NULL; 1996 bdev_io->u.nvme_passthru.md_len = 0; 1997 1998 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1999 2000 spdk_bdev_io_submit(bdev_io); 2001 return 0; 2002 } 2003 2004 int 2005 spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 2006 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 2007 spdk_bdev_io_completion_cb cb, void *cb_arg) 2008 { 2009 struct spdk_bdev *bdev = desc->bdev; 2010 struct spdk_bdev_io *bdev_io; 2011 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 2012 2013 if (!desc->write) { 2014 /* 2015 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 2016 * to easily determine if the command is a read or write, but for now just 2017 * do not allow io_passthru with a read-only descriptor. 2018 */ 2019 return -EBADF; 2020 } 2021 2022 bdev_io = spdk_bdev_get_io(channel); 2023 if (!bdev_io) { 2024 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 2025 return -ENOMEM; 2026 } 2027 2028 bdev_io->ch = channel; 2029 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO; 2030 bdev_io->u.nvme_passthru.cmd = *cmd; 2031 bdev_io->u.nvme_passthru.buf = buf; 2032 bdev_io->u.nvme_passthru.nbytes = nbytes; 2033 bdev_io->u.nvme_passthru.md_buf = NULL; 2034 bdev_io->u.nvme_passthru.md_len = 0; 2035 2036 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 2037 2038 spdk_bdev_io_submit(bdev_io); 2039 return 0; 2040 } 2041 2042 int 2043 spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 2044 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len, 2045 spdk_bdev_io_completion_cb cb, void *cb_arg) 2046 { 2047 struct spdk_bdev *bdev = desc->bdev; 2048 struct spdk_bdev_io *bdev_io; 2049 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 2050 2051 if (!desc->write) { 2052 /* 2053 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 2054 * to easily determine if the command is a read or write, but for now just 2055 * do not allow io_passthru with a read-only descriptor. 2056 */ 2057 return -EBADF; 2058 } 2059 2060 bdev_io = spdk_bdev_get_io(channel); 2061 if (!bdev_io) { 2062 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 2063 return -ENOMEM; 2064 } 2065 2066 bdev_io->ch = channel; 2067 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD; 2068 bdev_io->u.nvme_passthru.cmd = *cmd; 2069 bdev_io->u.nvme_passthru.buf = buf; 2070 bdev_io->u.nvme_passthru.nbytes = nbytes; 2071 bdev_io->u.nvme_passthru.md_buf = md_buf; 2072 bdev_io->u.nvme_passthru.md_len = md_len; 2073 2074 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 2075 2076 spdk_bdev_io_submit(bdev_io); 2077 return 0; 2078 } 2079 2080 int 2081 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 2082 { 2083 if (!bdev_io) { 2084 SPDK_ERRLOG("bdev_io is NULL\n"); 2085 return -1; 2086 } 2087 2088 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 2089 SPDK_ERRLOG("bdev_io is in pending state\n"); 2090 assert(false); 2091 return -1; 2092 } 2093 2094 spdk_bdev_put_io(bdev_io); 2095 2096 return 0; 2097 } 2098 2099 static void 2100 _spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch) 2101 { 2102 struct spdk_bdev *bdev = bdev_ch->bdev; 2103 struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource; 2104 struct spdk_bdev_io *bdev_io; 2105 2106 if (shared_resource->io_outstanding > shared_resource->nomem_threshold) { 2107 /* 2108 * Allow some more I/O to complete before retrying the nomem_io queue. 2109 * Some drivers (such as nvme) cannot immediately take a new I/O in 2110 * the context of a completion, because the resources for the I/O are 2111 * not released until control returns to the bdev poller. Also, we 2112 * may require several small I/O to complete before a larger I/O 2113 * (that requires splitting) can be submitted. 2114 */ 2115 return; 2116 } 2117 2118 while (!TAILQ_EMPTY(&shared_resource->nomem_io)) { 2119 bdev_io = TAILQ_FIRST(&shared_resource->nomem_io); 2120 TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, link); 2121 bdev_io->ch->io_outstanding++; 2122 shared_resource->io_outstanding++; 2123 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 2124 bdev->fn_table->submit_request(bdev_io->ch->channel, bdev_io); 2125 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NOMEM) { 2126 break; 2127 } 2128 } 2129 } 2130 2131 static inline void 2132 _spdk_bdev_io_complete(void *ctx) 2133 { 2134 struct spdk_bdev_io *bdev_io = ctx; 2135 2136 if (spdk_unlikely(bdev_io->in_submit_request || bdev_io->io_submit_ch)) { 2137 /* 2138 * Send the completion to the thread that originally submitted the I/O, 2139 * which may not be the current thread in the case of QoS. 2140 */ 2141 if (bdev_io->io_submit_ch) { 2142 bdev_io->ch = bdev_io->io_submit_ch; 2143 bdev_io->io_submit_ch = NULL; 2144 } 2145 2146 /* 2147 * Defer completion to avoid potential infinite recursion if the 2148 * user's completion callback issues a new I/O. 2149 */ 2150 spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->ch->channel), 2151 _spdk_bdev_io_complete, bdev_io); 2152 return; 2153 } 2154 2155 if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 2156 switch (bdev_io->type) { 2157 case SPDK_BDEV_IO_TYPE_READ: 2158 bdev_io->ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 2159 bdev_io->ch->stat.num_read_ops++; 2160 bdev_io->ch->stat.read_latency_ticks += (spdk_get_ticks() - bdev_io->submit_tsc); 2161 break; 2162 case SPDK_BDEV_IO_TYPE_WRITE: 2163 bdev_io->ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen; 2164 bdev_io->ch->stat.num_write_ops++; 2165 bdev_io->ch->stat.write_latency_ticks += (spdk_get_ticks() - bdev_io->submit_tsc); 2166 break; 2167 default: 2168 break; 2169 } 2170 } 2171 2172 #ifdef SPDK_CONFIG_VTUNE 2173 uint64_t now_tsc = spdk_get_ticks(); 2174 if (now_tsc > (bdev_io->ch->start_tsc + bdev_io->ch->interval_tsc)) { 2175 uint64_t data[5]; 2176 2177 data[0] = bdev_io->ch->stat.num_read_ops; 2178 data[1] = bdev_io->ch->stat.bytes_read; 2179 data[2] = bdev_io->ch->stat.num_write_ops; 2180 data[3] = bdev_io->ch->stat.bytes_written; 2181 data[4] = bdev_io->bdev->fn_table->get_spin_time ? 2182 bdev_io->bdev->fn_table->get_spin_time(bdev_io->ch->channel) : 0; 2183 2184 __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->ch->handle, 2185 __itt_metadata_u64, 5, data); 2186 2187 memset(&bdev_io->ch->stat, 0, sizeof(bdev_io->ch->stat)); 2188 bdev_io->ch->start_tsc = now_tsc; 2189 } 2190 #endif 2191 2192 assert(bdev_io->cb != NULL); 2193 assert(spdk_get_thread() == spdk_io_channel_get_thread(bdev_io->ch->channel)); 2194 2195 bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS, 2196 bdev_io->caller_ctx); 2197 } 2198 2199 static void 2200 _spdk_bdev_reset_complete(struct spdk_io_channel_iter *i, int status) 2201 { 2202 struct spdk_bdev_io *bdev_io = spdk_io_channel_iter_get_ctx(i); 2203 2204 if (bdev_io->u.reset.ch_ref != NULL) { 2205 spdk_put_io_channel(bdev_io->u.reset.ch_ref); 2206 bdev_io->u.reset.ch_ref = NULL; 2207 } 2208 2209 _spdk_bdev_io_complete(bdev_io); 2210 } 2211 2212 static void 2213 _spdk_bdev_unfreeze_channel(struct spdk_io_channel_iter *i) 2214 { 2215 struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i); 2216 struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); 2217 2218 ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS; 2219 if (!TAILQ_EMPTY(&ch->queued_resets)) { 2220 _spdk_bdev_channel_start_reset(ch); 2221 } 2222 2223 spdk_for_each_channel_continue(i, 0); 2224 } 2225 2226 void 2227 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 2228 { 2229 struct spdk_bdev *bdev = bdev_io->bdev; 2230 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 2231 struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource; 2232 2233 bdev_io->status = status; 2234 2235 if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) { 2236 bool unlock_channels = false; 2237 2238 if (status == SPDK_BDEV_IO_STATUS_NOMEM) { 2239 SPDK_ERRLOG("NOMEM returned for reset\n"); 2240 } 2241 pthread_mutex_lock(&bdev->mutex); 2242 if (bdev_io == bdev->reset_in_progress) { 2243 bdev->reset_in_progress = NULL; 2244 unlock_channels = true; 2245 } 2246 pthread_mutex_unlock(&bdev->mutex); 2247 2248 if (unlock_channels) { 2249 spdk_for_each_channel(__bdev_to_io_dev(bdev), _spdk_bdev_unfreeze_channel, 2250 bdev_io, _spdk_bdev_reset_complete); 2251 return; 2252 } 2253 } else { 2254 assert(bdev_ch->io_outstanding > 0); 2255 assert(shared_resource->io_outstanding > 0); 2256 bdev_ch->io_outstanding--; 2257 shared_resource->io_outstanding--; 2258 2259 if (spdk_unlikely(status == SPDK_BDEV_IO_STATUS_NOMEM)) { 2260 TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, link); 2261 /* 2262 * Wait for some of the outstanding I/O to complete before we 2263 * retry any of the nomem_io. Normally we will wait for 2264 * NOMEM_THRESHOLD_COUNT I/O to complete but for low queue 2265 * depth channels we will instead wait for half to complete. 2266 */ 2267 shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2, 2268 (int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT); 2269 return; 2270 } 2271 2272 if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) { 2273 _spdk_bdev_ch_retry_io(bdev_ch); 2274 } 2275 } 2276 2277 _spdk_bdev_io_complete(bdev_io); 2278 } 2279 2280 void 2281 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 2282 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 2283 { 2284 if (sc == SPDK_SCSI_STATUS_GOOD) { 2285 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 2286 } else { 2287 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 2288 bdev_io->error.scsi.sc = sc; 2289 bdev_io->error.scsi.sk = sk; 2290 bdev_io->error.scsi.asc = asc; 2291 bdev_io->error.scsi.ascq = ascq; 2292 } 2293 2294 spdk_bdev_io_complete(bdev_io, bdev_io->status); 2295 } 2296 2297 void 2298 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 2299 int *sc, int *sk, int *asc, int *ascq) 2300 { 2301 assert(sc != NULL); 2302 assert(sk != NULL); 2303 assert(asc != NULL); 2304 assert(ascq != NULL); 2305 2306 switch (bdev_io->status) { 2307 case SPDK_BDEV_IO_STATUS_SUCCESS: 2308 *sc = SPDK_SCSI_STATUS_GOOD; 2309 *sk = SPDK_SCSI_SENSE_NO_SENSE; 2310 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 2311 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 2312 break; 2313 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 2314 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 2315 break; 2316 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 2317 *sc = bdev_io->error.scsi.sc; 2318 *sk = bdev_io->error.scsi.sk; 2319 *asc = bdev_io->error.scsi.asc; 2320 *ascq = bdev_io->error.scsi.ascq; 2321 break; 2322 default: 2323 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 2324 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 2325 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 2326 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 2327 break; 2328 } 2329 } 2330 2331 void 2332 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 2333 { 2334 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 2335 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 2336 } else { 2337 bdev_io->error.nvme.sct = sct; 2338 bdev_io->error.nvme.sc = sc; 2339 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 2340 } 2341 2342 spdk_bdev_io_complete(bdev_io, bdev_io->status); 2343 } 2344 2345 void 2346 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 2347 { 2348 assert(sct != NULL); 2349 assert(sc != NULL); 2350 2351 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 2352 *sct = bdev_io->error.nvme.sct; 2353 *sc = bdev_io->error.nvme.sc; 2354 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 2355 *sct = SPDK_NVME_SCT_GENERIC; 2356 *sc = SPDK_NVME_SC_SUCCESS; 2357 } else { 2358 *sct = SPDK_NVME_SCT_GENERIC; 2359 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 2360 } 2361 } 2362 2363 struct spdk_thread * 2364 spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io) 2365 { 2366 return spdk_io_channel_get_thread(bdev_io->ch->channel); 2367 } 2368 2369 static void 2370 _spdk_bdev_qos_config(struct spdk_bdev *bdev) 2371 { 2372 struct spdk_conf_section *sp = NULL; 2373 const char *val = NULL; 2374 uint64_t ios_per_sec = 0; 2375 int i = 0; 2376 2377 sp = spdk_conf_find_section(NULL, "QoS"); 2378 if (!sp) { 2379 return; 2380 } 2381 2382 while (true) { 2383 val = spdk_conf_section_get_nmval(sp, "Limit_IOPS", i, 0); 2384 if (!val) { 2385 break; 2386 } 2387 2388 if (strcmp(bdev->name, val) != 0) { 2389 i++; 2390 continue; 2391 } 2392 2393 val = spdk_conf_section_get_nmval(sp, "Limit_IOPS", i, 1); 2394 if (!val) { 2395 return; 2396 } 2397 2398 ios_per_sec = strtoull(val, NULL, 10); 2399 if (ios_per_sec > 0) { 2400 if (ios_per_sec % SPDK_BDEV_QOS_MIN_IOS_PER_SEC) { 2401 SPDK_ERRLOG("Assigned IOPS %" PRIu64 " on bdev %s is not multiple of %u\n", 2402 ios_per_sec, bdev->name, SPDK_BDEV_QOS_MIN_IOS_PER_SEC); 2403 SPDK_ERRLOG("Failed to enable QoS on this bdev %s\n", bdev->name); 2404 } else { 2405 bdev->qos = calloc(1, sizeof(*bdev->qos)); 2406 if (!bdev->qos) { 2407 SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n"); 2408 return; 2409 } 2410 bdev->qos->rate_limit = ios_per_sec; 2411 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Bdev:%s QoS:%lu\n", 2412 bdev->name, bdev->qos->rate_limit); 2413 } 2414 } 2415 2416 return; 2417 } 2418 } 2419 2420 static int 2421 spdk_bdev_init(struct spdk_bdev *bdev) 2422 { 2423 assert(bdev->module != NULL); 2424 2425 if (!bdev->name) { 2426 SPDK_ERRLOG("Bdev name is NULL\n"); 2427 return -EINVAL; 2428 } 2429 2430 if (spdk_bdev_get_by_name(bdev->name)) { 2431 SPDK_ERRLOG("Bdev name:%s already exists\n", bdev->name); 2432 return -EEXIST; 2433 } 2434 2435 bdev->status = SPDK_BDEV_STATUS_READY; 2436 2437 TAILQ_INIT(&bdev->open_descs); 2438 2439 TAILQ_INIT(&bdev->aliases); 2440 2441 bdev->reset_in_progress = NULL; 2442 2443 _spdk_bdev_qos_config(bdev); 2444 2445 spdk_io_device_register(__bdev_to_io_dev(bdev), 2446 spdk_bdev_channel_create, spdk_bdev_channel_destroy, 2447 sizeof(struct spdk_bdev_channel)); 2448 2449 pthread_mutex_init(&bdev->mutex, NULL); 2450 return 0; 2451 } 2452 2453 static void 2454 spdk_bdev_destroy_cb(void *io_device) 2455 { 2456 int rc; 2457 struct spdk_bdev *bdev; 2458 spdk_bdev_unregister_cb cb_fn; 2459 void *cb_arg; 2460 2461 bdev = __bdev_from_io_dev(io_device); 2462 cb_fn = bdev->unregister_cb; 2463 cb_arg = bdev->unregister_ctx; 2464 2465 rc = bdev->fn_table->destruct(bdev->ctxt); 2466 if (rc < 0) { 2467 SPDK_ERRLOG("destruct failed\n"); 2468 } 2469 if (rc <= 0 && cb_fn != NULL) { 2470 cb_fn(cb_arg, rc); 2471 } 2472 } 2473 2474 2475 static void 2476 spdk_bdev_fini(struct spdk_bdev *bdev) 2477 { 2478 pthread_mutex_destroy(&bdev->mutex); 2479 2480 free(bdev->qos); 2481 2482 spdk_io_device_unregister(__bdev_to_io_dev(bdev), spdk_bdev_destroy_cb); 2483 } 2484 2485 static void 2486 spdk_bdev_start(struct spdk_bdev *bdev) 2487 { 2488 struct spdk_bdev_module *module; 2489 2490 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Inserting bdev %s into list\n", bdev->name); 2491 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 2492 2493 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 2494 if (module->examine) { 2495 module->action_in_progress++; 2496 module->examine(bdev); 2497 } 2498 } 2499 } 2500 2501 int 2502 spdk_bdev_register(struct spdk_bdev *bdev) 2503 { 2504 int rc = spdk_bdev_init(bdev); 2505 2506 if (rc == 0) { 2507 spdk_bdev_start(bdev); 2508 } 2509 2510 return rc; 2511 } 2512 2513 static void 2514 spdk_vbdev_remove_base_bdevs(struct spdk_bdev *vbdev) 2515 { 2516 struct spdk_bdev **bdevs; 2517 struct spdk_bdev *base; 2518 size_t i, j, k; 2519 bool found; 2520 2521 /* Iterate over base bdevs to remove vbdev from them. */ 2522 for (i = 0; i < vbdev->base_bdevs_cnt; i++) { 2523 found = false; 2524 base = vbdev->base_bdevs[i]; 2525 2526 for (j = 0; j < base->vbdevs_cnt; j++) { 2527 if (base->vbdevs[j] != vbdev) { 2528 continue; 2529 } 2530 2531 for (k = j; k + 1 < base->vbdevs_cnt; k++) { 2532 base->vbdevs[k] = base->vbdevs[k + 1]; 2533 } 2534 2535 base->vbdevs_cnt--; 2536 if (base->vbdevs_cnt > 0) { 2537 bdevs = realloc(base->vbdevs, base->vbdevs_cnt * sizeof(bdevs[0])); 2538 /* It would be odd if shrinking memory block fail. */ 2539 assert(bdevs); 2540 base->vbdevs = bdevs; 2541 } else { 2542 free(base->vbdevs); 2543 base->vbdevs = NULL; 2544 } 2545 2546 found = true; 2547 break; 2548 } 2549 2550 if (!found) { 2551 SPDK_WARNLOG("Bdev '%s' is not base bdev of '%s'.\n", base->name, vbdev->name); 2552 } 2553 } 2554 2555 free(vbdev->base_bdevs); 2556 vbdev->base_bdevs = NULL; 2557 vbdev->base_bdevs_cnt = 0; 2558 } 2559 2560 static int 2561 spdk_vbdev_set_base_bdevs(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, size_t cnt) 2562 { 2563 struct spdk_bdev **vbdevs; 2564 struct spdk_bdev *base; 2565 size_t i; 2566 2567 /* Adding base bdevs isn't supported (yet?). */ 2568 assert(vbdev->base_bdevs_cnt == 0); 2569 2570 vbdev->base_bdevs = malloc(cnt * sizeof(vbdev->base_bdevs[0])); 2571 if (!vbdev->base_bdevs) { 2572 SPDK_ERRLOG("%s - realloc() failed\n", vbdev->name); 2573 return -ENOMEM; 2574 } 2575 2576 memcpy(vbdev->base_bdevs, base_bdevs, cnt * sizeof(vbdev->base_bdevs[0])); 2577 vbdev->base_bdevs_cnt = cnt; 2578 2579 /* Iterate over base bdevs to add this vbdev to them. */ 2580 for (i = 0; i < cnt; i++) { 2581 base = vbdev->base_bdevs[i]; 2582 2583 assert(base != NULL); 2584 assert(base->claim_module != NULL); 2585 2586 vbdevs = realloc(base->vbdevs, (base->vbdevs_cnt + 1) * sizeof(vbdevs[0])); 2587 if (!vbdevs) { 2588 SPDK_ERRLOG("%s - realloc() failed\n", base->name); 2589 spdk_vbdev_remove_base_bdevs(vbdev); 2590 return -ENOMEM; 2591 } 2592 2593 vbdevs[base->vbdevs_cnt] = vbdev; 2594 base->vbdevs = vbdevs; 2595 base->vbdevs_cnt++; 2596 } 2597 2598 return 0; 2599 } 2600 2601 int 2602 spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) 2603 { 2604 int rc; 2605 2606 rc = spdk_bdev_init(vbdev); 2607 if (rc) { 2608 return rc; 2609 } 2610 2611 if (base_bdev_count == 0) { 2612 spdk_bdev_start(vbdev); 2613 return 0; 2614 } 2615 2616 rc = spdk_vbdev_set_base_bdevs(vbdev, base_bdevs, base_bdev_count); 2617 if (rc) { 2618 spdk_bdev_fini(vbdev); 2619 return rc; 2620 } 2621 2622 spdk_bdev_start(vbdev); 2623 return 0; 2624 2625 } 2626 2627 void 2628 spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno) 2629 { 2630 if (bdev->unregister_cb != NULL) { 2631 bdev->unregister_cb(bdev->unregister_ctx, bdeverrno); 2632 } 2633 } 2634 2635 static void 2636 _remove_notify(void *arg) 2637 { 2638 struct spdk_bdev_desc *desc = arg; 2639 2640 desc->remove_cb(desc->remove_ctx); 2641 } 2642 2643 void 2644 spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg) 2645 { 2646 struct spdk_bdev_desc *desc, *tmp; 2647 bool do_destruct = true; 2648 struct spdk_thread *thread; 2649 2650 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Removing bdev %s from list\n", bdev->name); 2651 2652 thread = spdk_get_thread(); 2653 if (!thread) { 2654 /* The user called this from a non-SPDK thread. */ 2655 cb_fn(cb_arg, -ENOTSUP); 2656 return; 2657 } 2658 2659 pthread_mutex_lock(&bdev->mutex); 2660 2661 spdk_vbdev_remove_base_bdevs(bdev); 2662 2663 bdev->status = SPDK_BDEV_STATUS_REMOVING; 2664 bdev->unregister_cb = cb_fn; 2665 bdev->unregister_ctx = cb_arg; 2666 2667 TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) { 2668 if (desc->remove_cb) { 2669 do_destruct = false; 2670 /* 2671 * Defer invocation of the remove_cb to a separate message that will 2672 * run later on this thread. This ensures this context unwinds and 2673 * we don't recursively unregister this bdev again if the remove_cb 2674 * immediately closes its descriptor. 2675 */ 2676 spdk_thread_send_msg(thread, _remove_notify, desc); 2677 } 2678 } 2679 2680 if (!do_destruct) { 2681 pthread_mutex_unlock(&bdev->mutex); 2682 return; 2683 } 2684 2685 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 2686 pthread_mutex_unlock(&bdev->mutex); 2687 2688 spdk_bdev_fini(bdev); 2689 } 2690 2691 int 2692 spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, 2693 void *remove_ctx, struct spdk_bdev_desc **_desc) 2694 { 2695 struct spdk_bdev_desc *desc; 2696 2697 desc = calloc(1, sizeof(*desc)); 2698 if (desc == NULL) { 2699 SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n"); 2700 return -ENOMEM; 2701 } 2702 2703 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name, 2704 spdk_get_thread()); 2705 2706 pthread_mutex_lock(&bdev->mutex); 2707 2708 if (write && bdev->claim_module) { 2709 SPDK_ERRLOG("Could not open %s - already claimed\n", bdev->name); 2710 free(desc); 2711 pthread_mutex_unlock(&bdev->mutex); 2712 return -EPERM; 2713 } 2714 2715 TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link); 2716 2717 desc->bdev = bdev; 2718 desc->remove_cb = remove_cb; 2719 desc->remove_ctx = remove_ctx; 2720 desc->write = write; 2721 *_desc = desc; 2722 2723 pthread_mutex_unlock(&bdev->mutex); 2724 2725 return 0; 2726 } 2727 2728 void 2729 spdk_bdev_close(struct spdk_bdev_desc *desc) 2730 { 2731 struct spdk_bdev *bdev = desc->bdev; 2732 bool do_unregister = false; 2733 2734 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name, 2735 spdk_get_thread()); 2736 2737 pthread_mutex_lock(&bdev->mutex); 2738 2739 TAILQ_REMOVE(&bdev->open_descs, desc, link); 2740 free(desc); 2741 2742 /* If no more descriptors, kill QoS channel */ 2743 if (bdev->qos && TAILQ_EMPTY(&bdev->open_descs)) { 2744 SPDK_DEBUGLOG(SPDK_LOG_BDEV, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n", 2745 bdev->name, spdk_get_thread()); 2746 2747 if (spdk_bdev_qos_destroy(bdev)) { 2748 /* There isn't anything we can do to recover here. Just let the 2749 * old QoS poller keep running. The QoS handling won't change 2750 * cores when the user allocates a new channel, but it won't break. */ 2751 SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n"); 2752 } 2753 } 2754 2755 if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) { 2756 do_unregister = true; 2757 } 2758 pthread_mutex_unlock(&bdev->mutex); 2759 2760 if (do_unregister == true) { 2761 spdk_bdev_unregister(bdev, bdev->unregister_cb, bdev->unregister_ctx); 2762 } 2763 } 2764 2765 int 2766 spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 2767 struct spdk_bdev_module *module) 2768 { 2769 if (bdev->claim_module != NULL) { 2770 SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name, 2771 bdev->claim_module->name); 2772 return -EPERM; 2773 } 2774 2775 if (desc && !desc->write) { 2776 desc->write = true; 2777 } 2778 2779 bdev->claim_module = module; 2780 return 0; 2781 } 2782 2783 void 2784 spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) 2785 { 2786 assert(bdev->claim_module != NULL); 2787 bdev->claim_module = NULL; 2788 } 2789 2790 struct spdk_bdev * 2791 spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc) 2792 { 2793 return desc->bdev; 2794 } 2795 2796 void 2797 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 2798 { 2799 struct iovec *iovs; 2800 int iovcnt; 2801 2802 if (bdev_io == NULL) { 2803 return; 2804 } 2805 2806 switch (bdev_io->type) { 2807 case SPDK_BDEV_IO_TYPE_READ: 2808 iovs = bdev_io->u.bdev.iovs; 2809 iovcnt = bdev_io->u.bdev.iovcnt; 2810 break; 2811 case SPDK_BDEV_IO_TYPE_WRITE: 2812 iovs = bdev_io->u.bdev.iovs; 2813 iovcnt = bdev_io->u.bdev.iovcnt; 2814 break; 2815 default: 2816 iovs = NULL; 2817 iovcnt = 0; 2818 break; 2819 } 2820 2821 if (iovp) { 2822 *iovp = iovs; 2823 } 2824 if (iovcntp) { 2825 *iovcntp = iovcnt; 2826 } 2827 } 2828 2829 void 2830 spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module) 2831 { 2832 2833 if (spdk_bdev_module_list_find(bdev_module->name)) { 2834 SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name); 2835 assert(false); 2836 } 2837 2838 if (bdev_module->async_init) { 2839 bdev_module->action_in_progress = 1; 2840 } 2841 2842 /* 2843 * Modules with examine callbacks must be initialized first, so they are 2844 * ready to handle examine callbacks from later modules that will 2845 * register physical bdevs. 2846 */ 2847 if (bdev_module->examine != NULL) { 2848 TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 2849 } else { 2850 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 2851 } 2852 } 2853 2854 struct spdk_bdev_module * 2855 spdk_bdev_module_list_find(const char *name) 2856 { 2857 struct spdk_bdev_module *bdev_module; 2858 2859 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 2860 if (strcmp(name, bdev_module->name) == 0) { 2861 break; 2862 } 2863 } 2864 2865 return bdev_module; 2866 } 2867 2868 static void 2869 spdk_bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 2870 { 2871 uint64_t len; 2872 2873 if (!success) { 2874 bdev_io->cb = bdev_io->u.bdev.stored_user_cb; 2875 _spdk_bdev_io_complete(bdev_io); 2876 return; 2877 } 2878 2879 /* no need to perform the error checking from write_zeroes_blocks because this request already passed those checks. */ 2880 len = spdk_min(spdk_bdev_get_block_size(bdev_io->bdev) * bdev_io->u.bdev.split_remaining_num_blocks, 2881 ZERO_BUFFER_SIZE); 2882 2883 bdev_io->u.bdev.offset_blocks = bdev_io->u.bdev.split_current_offset_blocks; 2884 bdev_io->u.bdev.iov.iov_len = len; 2885 bdev_io->u.bdev.num_blocks = len / spdk_bdev_get_block_size(bdev_io->bdev); 2886 bdev_io->u.bdev.split_remaining_num_blocks -= bdev_io->u.bdev.num_blocks; 2887 bdev_io->u.bdev.split_current_offset_blocks += bdev_io->u.bdev.num_blocks; 2888 2889 /* if this round completes the i/o, change the callback to be the original user callback */ 2890 if (bdev_io->u.bdev.split_remaining_num_blocks == 0) { 2891 spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, bdev_io->u.bdev.stored_user_cb); 2892 } else { 2893 spdk_bdev_io_init(bdev_io, bdev_io->bdev, cb_arg, spdk_bdev_write_zeroes_split); 2894 } 2895 spdk_bdev_io_submit(bdev_io); 2896 } 2897 2898 struct set_qos_limit_ctx { 2899 void (*cb_fn)(void *cb_arg, int status); 2900 void *cb_arg; 2901 struct spdk_bdev *bdev; 2902 }; 2903 2904 static void 2905 _spdk_bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status) 2906 { 2907 pthread_mutex_lock(&ctx->bdev->mutex); 2908 ctx->bdev->qos_mod_in_progress = false; 2909 pthread_mutex_unlock(&ctx->bdev->mutex); 2910 2911 ctx->cb_fn(ctx->cb_arg, status); 2912 free(ctx); 2913 } 2914 2915 static void 2916 _spdk_bdev_disable_qos_done(void *cb_arg) 2917 { 2918 struct set_qos_limit_ctx *ctx = cb_arg; 2919 struct spdk_bdev *bdev = ctx->bdev; 2920 struct spdk_bdev_qos *qos; 2921 2922 pthread_mutex_lock(&bdev->mutex); 2923 qos = bdev->qos; 2924 bdev->qos = NULL; 2925 pthread_mutex_unlock(&bdev->mutex); 2926 2927 _spdk_bdev_abort_queued_io(&qos->queued, qos->ch); 2928 spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch)); 2929 spdk_poller_unregister(&qos->poller); 2930 2931 free(qos); 2932 2933 _spdk_bdev_set_qos_limit_done(ctx, 0); 2934 } 2935 2936 static void 2937 _spdk_bdev_disable_qos_msg_done(struct spdk_io_channel_iter *i, int status) 2938 { 2939 void *io_device = spdk_io_channel_iter_get_io_device(i); 2940 struct spdk_bdev *bdev = __bdev_from_io_dev(io_device); 2941 struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i); 2942 struct spdk_thread *thread; 2943 2944 pthread_mutex_lock(&bdev->mutex); 2945 thread = bdev->qos->thread; 2946 pthread_mutex_unlock(&bdev->mutex); 2947 2948 spdk_thread_send_msg(thread, _spdk_bdev_disable_qos_done, ctx); 2949 } 2950 2951 static void 2952 _spdk_bdev_disable_qos_msg(struct spdk_io_channel_iter *i) 2953 { 2954 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2955 struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch); 2956 2957 bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED; 2958 2959 spdk_for_each_channel_continue(i, 0); 2960 } 2961 2962 static void 2963 _spdk_bdev_update_qos_limit_iops_msg(void *cb_arg) 2964 { 2965 struct set_qos_limit_ctx *ctx = cb_arg; 2966 struct spdk_bdev *bdev = ctx->bdev; 2967 2968 pthread_mutex_lock(&bdev->mutex); 2969 spdk_bdev_qos_update_max_ios_per_timeslice(bdev->qos); 2970 pthread_mutex_unlock(&bdev->mutex); 2971 2972 _spdk_bdev_set_qos_limit_done(ctx, 0); 2973 } 2974 2975 static void 2976 _spdk_bdev_enable_qos_msg(struct spdk_io_channel_iter *i) 2977 { 2978 void *io_device = spdk_io_channel_iter_get_io_device(i); 2979 struct spdk_bdev *bdev = __bdev_from_io_dev(io_device); 2980 struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i); 2981 struct spdk_bdev_channel *bdev_ch = spdk_io_channel_get_ctx(ch); 2982 int rc; 2983 2984 pthread_mutex_lock(&bdev->mutex); 2985 rc = _spdk_bdev_enable_qos(bdev, bdev_ch); 2986 pthread_mutex_unlock(&bdev->mutex); 2987 spdk_for_each_channel_continue(i, rc); 2988 } 2989 2990 static void 2991 _spdk_bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status) 2992 { 2993 struct set_qos_limit_ctx *ctx = spdk_io_channel_iter_get_ctx(i); 2994 2995 _spdk_bdev_set_qos_limit_done(ctx, status); 2996 } 2997 2998 void 2999 spdk_bdev_set_qos_limit_iops(struct spdk_bdev *bdev, uint64_t ios_per_sec, 3000 void (*cb_fn)(void *cb_arg, int status), void *cb_arg) 3001 { 3002 struct set_qos_limit_ctx *ctx; 3003 3004 if (ios_per_sec > 0 && ios_per_sec % SPDK_BDEV_QOS_MIN_IOS_PER_SEC) { 3005 SPDK_ERRLOG("Requested ios_per_sec limit %" PRIu64 " is not a multiple of %u\n", 3006 ios_per_sec, SPDK_BDEV_QOS_MIN_IOS_PER_SEC); 3007 cb_fn(cb_arg, -EINVAL); 3008 return; 3009 } 3010 3011 ctx = calloc(1, sizeof(*ctx)); 3012 if (ctx == NULL) { 3013 cb_fn(cb_arg, -ENOMEM); 3014 return; 3015 } 3016 3017 ctx->cb_fn = cb_fn; 3018 ctx->cb_arg = cb_arg; 3019 ctx->bdev = bdev; 3020 3021 pthread_mutex_lock(&bdev->mutex); 3022 if (bdev->qos_mod_in_progress) { 3023 pthread_mutex_unlock(&bdev->mutex); 3024 free(ctx); 3025 cb_fn(cb_arg, -EAGAIN); 3026 return; 3027 } 3028 bdev->qos_mod_in_progress = true; 3029 3030 if (ios_per_sec > 0) { 3031 if (bdev->qos == NULL) { 3032 /* Enabling */ 3033 bdev->qos = calloc(1, sizeof(*bdev->qos)); 3034 if (!bdev->qos) { 3035 pthread_mutex_unlock(&bdev->mutex); 3036 SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n"); 3037 free(ctx); 3038 cb_fn(cb_arg, -ENOMEM); 3039 return; 3040 } 3041 3042 bdev->qos->rate_limit = ios_per_sec; 3043 spdk_for_each_channel(__bdev_to_io_dev(bdev), 3044 _spdk_bdev_enable_qos_msg, ctx, 3045 _spdk_bdev_enable_qos_done); 3046 } else { 3047 /* Updating */ 3048 bdev->qos->rate_limit = ios_per_sec; 3049 spdk_thread_send_msg(bdev->qos->thread, _spdk_bdev_update_qos_limit_iops_msg, ctx); 3050 } 3051 } else { 3052 if (bdev->qos != NULL) { 3053 /* Disabling */ 3054 spdk_for_each_channel(__bdev_to_io_dev(bdev), 3055 _spdk_bdev_disable_qos_msg, ctx, 3056 _spdk_bdev_disable_qos_msg_done); 3057 } else { 3058 pthread_mutex_unlock(&bdev->mutex); 3059 _spdk_bdev_set_qos_limit_done(ctx, 0); 3060 return; 3061 } 3062 } 3063 3064 pthread_mutex_unlock(&bdev->mutex); 3065 } 3066 3067 SPDK_LOG_REGISTER_COMPONENT("bdev", SPDK_LOG_BDEV) 3068