1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include "spdk/env.h" 40 #include "spdk/io_channel.h" 41 #include "spdk/likely.h" 42 #include "spdk/queue.h" 43 #include "spdk/nvme_spec.h" 44 #include "spdk/scsi_spec.h" 45 #include "spdk/util.h" 46 47 #include "spdk_internal/bdev.h" 48 #include "spdk_internal/log.h" 49 #include "spdk/string.h" 50 51 #ifdef SPDK_CONFIG_VTUNE 52 #include "ittnotify.h" 53 #include "ittnotify_types.h" 54 int __itt_init_ittlib(const char *, __itt_group_id); 55 #endif 56 57 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 58 #define BUF_SMALL_POOL_SIZE 8192 59 #define BUF_LARGE_POOL_SIZE 1024 60 #define NOMEM_THRESHOLD_COUNT 8 61 62 typedef TAILQ_HEAD(, spdk_bdev_io) bdev_io_tailq_t; 63 64 struct spdk_bdev_mgr { 65 struct spdk_mempool *bdev_io_pool; 66 67 struct spdk_mempool *buf_small_pool; 68 struct spdk_mempool *buf_large_pool; 69 70 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 71 72 TAILQ_HEAD(, spdk_bdev) bdevs; 73 74 spdk_bdev_poller_start_cb start_poller_fn; 75 spdk_bdev_poller_stop_cb stop_poller_fn; 76 77 bool init_complete; 78 bool module_init_complete; 79 80 #ifdef SPDK_CONFIG_VTUNE 81 __itt_domain *domain; 82 #endif 83 }; 84 85 static struct spdk_bdev_mgr g_bdev_mgr = { 86 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 87 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 88 .start_poller_fn = NULL, 89 .stop_poller_fn = NULL, 90 .init_complete = false, 91 .module_init_complete = false, 92 }; 93 94 static spdk_bdev_init_cb g_cb_fn = NULL; 95 static void *g_cb_arg = NULL; 96 97 98 struct spdk_bdev_mgmt_channel { 99 bdev_io_tailq_t need_buf_small; 100 bdev_io_tailq_t need_buf_large; 101 }; 102 103 struct spdk_bdev_desc { 104 struct spdk_bdev *bdev; 105 spdk_bdev_remove_cb_t remove_cb; 106 void *remove_ctx; 107 bool write; 108 TAILQ_ENTRY(spdk_bdev_desc) link; 109 }; 110 111 #define BDEV_CH_RESET_IN_PROGRESS (1 << 0) 112 113 struct spdk_bdev_channel { 114 struct spdk_bdev *bdev; 115 116 /* The channel for the underlying device */ 117 struct spdk_io_channel *channel; 118 119 /* Channel for the bdev manager */ 120 struct spdk_io_channel *mgmt_channel; 121 122 struct spdk_bdev_io_stat stat; 123 124 /* 125 * Count of I/O submitted to bdev module and waiting for completion. 126 * Incremented before submit_request() is called on an spdk_bdev_io. 127 */ 128 uint64_t io_outstanding; 129 130 bdev_io_tailq_t queued_resets; 131 132 /* 133 * Queue of IO awaiting retry because of a previous NOMEM status returned 134 * on this channel. 135 */ 136 bdev_io_tailq_t nomem_io; 137 138 /* 139 * Threshold which io_outstanding must drop to before retrying nomem_io. 140 */ 141 uint64_t nomem_threshold; 142 143 uint32_t flags; 144 145 #ifdef SPDK_CONFIG_VTUNE 146 uint64_t start_tsc; 147 uint64_t interval_tsc; 148 __itt_string_handle *handle; 149 #endif 150 151 }; 152 153 struct spdk_bdev * 154 spdk_bdev_first(void) 155 { 156 struct spdk_bdev *bdev; 157 158 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 159 if (bdev) { 160 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Starting bdev iteration at %s\n", bdev->name); 161 } 162 163 return bdev; 164 } 165 166 struct spdk_bdev * 167 spdk_bdev_next(struct spdk_bdev *prev) 168 { 169 struct spdk_bdev *bdev; 170 171 bdev = TAILQ_NEXT(prev, link); 172 if (bdev) { 173 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 174 } 175 176 return bdev; 177 } 178 179 static struct spdk_bdev * 180 _bdev_next_leaf(struct spdk_bdev *bdev) 181 { 182 while (bdev != NULL) { 183 if (TAILQ_EMPTY(&bdev->vbdevs)) { 184 return bdev; 185 } else { 186 bdev = TAILQ_NEXT(bdev, link); 187 } 188 } 189 190 return bdev; 191 } 192 193 struct spdk_bdev * 194 spdk_bdev_first_leaf(void) 195 { 196 struct spdk_bdev *bdev; 197 198 bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs)); 199 200 if (bdev) { 201 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Starting bdev iteration at %s\n", bdev->name); 202 } 203 204 return bdev; 205 } 206 207 struct spdk_bdev * 208 spdk_bdev_next_leaf(struct spdk_bdev *prev) 209 { 210 struct spdk_bdev *bdev; 211 212 bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link)); 213 214 if (bdev) { 215 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 216 } 217 218 return bdev; 219 } 220 221 struct spdk_bdev * 222 spdk_bdev_get_by_name(const char *bdev_name) 223 { 224 struct spdk_bdev *bdev = spdk_bdev_first(); 225 226 while (bdev != NULL) { 227 if (strcmp(bdev_name, bdev->name) == 0) { 228 return bdev; 229 } 230 bdev = spdk_bdev_next(bdev); 231 } 232 233 return NULL; 234 } 235 236 static void 237 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 238 { 239 assert(bdev_io->get_buf_cb != NULL); 240 assert(buf != NULL); 241 assert(bdev_io->u.bdev.iovs != NULL); 242 243 bdev_io->buf = buf; 244 bdev_io->u.bdev.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 245 bdev_io->u.bdev.iovs[0].iov_len = bdev_io->buf_len; 246 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 247 } 248 249 static void 250 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 251 { 252 struct spdk_mempool *pool; 253 struct spdk_bdev_io *tmp; 254 void *buf; 255 bdev_io_tailq_t *tailq; 256 struct spdk_bdev_mgmt_channel *ch; 257 258 assert(bdev_io->u.bdev.iovcnt == 1); 259 260 buf = bdev_io->buf; 261 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 262 263 if (bdev_io->buf_len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 264 pool = g_bdev_mgr.buf_small_pool; 265 tailq = &ch->need_buf_small; 266 } else { 267 pool = g_bdev_mgr.buf_large_pool; 268 tailq = &ch->need_buf_large; 269 } 270 271 if (TAILQ_EMPTY(tailq)) { 272 spdk_mempool_put(pool, buf); 273 } else { 274 tmp = TAILQ_FIRST(tailq); 275 TAILQ_REMOVE(tailq, tmp, buf_link); 276 spdk_bdev_io_set_buf(tmp, buf); 277 } 278 } 279 280 void 281 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len) 282 { 283 struct spdk_mempool *pool; 284 bdev_io_tailq_t *tailq; 285 void *buf = NULL; 286 struct spdk_bdev_mgmt_channel *ch; 287 288 assert(cb != NULL); 289 assert(bdev_io->u.bdev.iovs != NULL); 290 291 if (spdk_unlikely(bdev_io->u.bdev.iovs[0].iov_base != NULL)) { 292 /* Buffer already present */ 293 cb(bdev_io->ch->channel, bdev_io); 294 return; 295 } 296 297 assert(len <= SPDK_BDEV_LARGE_BUF_MAX_SIZE); 298 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 299 300 bdev_io->buf_len = len; 301 bdev_io->get_buf_cb = cb; 302 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 303 pool = g_bdev_mgr.buf_small_pool; 304 tailq = &ch->need_buf_small; 305 } else { 306 pool = g_bdev_mgr.buf_large_pool; 307 tailq = &ch->need_buf_large; 308 } 309 310 buf = spdk_mempool_get(pool); 311 312 if (!buf) { 313 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 314 } else { 315 spdk_bdev_io_set_buf(bdev_io, buf); 316 } 317 } 318 319 static int 320 spdk_bdev_module_get_max_ctx_size(void) 321 { 322 struct spdk_bdev_module_if *bdev_module; 323 int max_bdev_module_size = 0; 324 325 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 326 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 327 max_bdev_module_size = bdev_module->get_ctx_size(); 328 } 329 } 330 331 return max_bdev_module_size; 332 } 333 334 void 335 spdk_bdev_config_text(FILE *fp) 336 { 337 struct spdk_bdev_module_if *bdev_module; 338 339 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 340 if (bdev_module->config_text) { 341 bdev_module->config_text(fp); 342 } 343 } 344 } 345 346 static int 347 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf) 348 { 349 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 350 351 TAILQ_INIT(&ch->need_buf_small); 352 TAILQ_INIT(&ch->need_buf_large); 353 354 return 0; 355 } 356 357 static void 358 spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf) 359 { 360 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 361 362 if (!TAILQ_EMPTY(&ch->need_buf_small) || !TAILQ_EMPTY(&ch->need_buf_large)) { 363 SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n"); 364 } 365 } 366 367 static void 368 spdk_bdev_init_complete(int rc) 369 { 370 spdk_bdev_init_cb cb_fn = g_cb_fn; 371 void *cb_arg = g_cb_arg; 372 373 g_bdev_mgr.init_complete = true; 374 g_cb_fn = NULL; 375 g_cb_arg = NULL; 376 377 cb_fn(cb_arg, rc); 378 } 379 380 static void 381 spdk_bdev_module_action_complete(void) 382 { 383 struct spdk_bdev_module_if *m; 384 385 /* 386 * Don't finish bdev subsystem initialization if 387 * module pre-initialization is still in progress, or 388 * the subsystem been already initialized. 389 */ 390 if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) { 391 return; 392 } 393 394 /* 395 * Check all bdev modules for inits/examinations in progress. If any 396 * exist, return immediately since we cannot finish bdev subsystem 397 * initialization until all are completed. 398 */ 399 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 400 if (m->action_in_progress > 0) { 401 return; 402 } 403 } 404 405 /* 406 * Modules already finished initialization - now that all 407 * the bdev modules have finished their asynchronous I/O 408 * processing, the entire bdev layer can be marked as complete. 409 */ 410 spdk_bdev_init_complete(0); 411 } 412 413 static void 414 spdk_bdev_module_action_done(struct spdk_bdev_module_if *module) 415 { 416 assert(module->action_in_progress > 0); 417 module->action_in_progress--; 418 spdk_bdev_module_action_complete(); 419 } 420 421 void 422 spdk_bdev_module_init_done(struct spdk_bdev_module_if *module) 423 { 424 spdk_bdev_module_action_done(module); 425 } 426 427 void 428 spdk_bdev_module_examine_done(struct spdk_bdev_module_if *module) 429 { 430 spdk_bdev_module_action_done(module); 431 } 432 433 static int 434 spdk_bdev_modules_init(void) 435 { 436 struct spdk_bdev_module_if *module; 437 int rc = 0; 438 439 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 440 rc = module->module_init(); 441 if (rc != 0) { 442 break; 443 } 444 } 445 446 g_bdev_mgr.module_init_complete = true; 447 return rc; 448 } 449 450 void 451 spdk_bdev_poller_start(struct spdk_bdev_poller **ppoller, 452 spdk_bdev_poller_fn fn, 453 void *arg, 454 uint32_t lcore, 455 uint64_t period_microseconds) 456 { 457 g_bdev_mgr.start_poller_fn(ppoller, fn, arg, lcore, period_microseconds); 458 } 459 460 void 461 spdk_bdev_poller_stop(struct spdk_bdev_poller **ppoller) 462 { 463 g_bdev_mgr.stop_poller_fn(ppoller); 464 } 465 466 void 467 spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg, 468 spdk_bdev_poller_start_cb start_poller_fn, 469 spdk_bdev_poller_stop_cb stop_poller_fn) 470 { 471 int cache_size; 472 int rc = 0; 473 char mempool_name[32]; 474 475 assert(cb_fn != NULL); 476 477 g_cb_fn = cb_fn; 478 g_cb_arg = cb_arg; 479 480 g_bdev_mgr.start_poller_fn = start_poller_fn; 481 g_bdev_mgr.stop_poller_fn = stop_poller_fn; 482 483 snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid()); 484 485 g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name, 486 SPDK_BDEV_IO_POOL_SIZE, 487 sizeof(struct spdk_bdev_io) + 488 spdk_bdev_module_get_max_ctx_size(), 489 64, 490 SPDK_ENV_SOCKET_ID_ANY); 491 492 if (g_bdev_mgr.bdev_io_pool == NULL) { 493 SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n"); 494 spdk_bdev_init_complete(-1); 495 return; 496 } 497 498 /** 499 * Ensure no more than half of the total buffers end up local caches, by 500 * using spdk_env_get_core_count() to determine how many local caches we need 501 * to account for. 502 */ 503 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 504 snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid()); 505 506 g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name, 507 BUF_SMALL_POOL_SIZE, 508 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 509 cache_size, 510 SPDK_ENV_SOCKET_ID_ANY); 511 if (!g_bdev_mgr.buf_small_pool) { 512 SPDK_ERRLOG("create rbuf small pool failed\n"); 513 spdk_bdev_init_complete(-1); 514 return; 515 } 516 517 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 518 snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid()); 519 520 g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name, 521 BUF_LARGE_POOL_SIZE, 522 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 523 cache_size, 524 SPDK_ENV_SOCKET_ID_ANY); 525 if (!g_bdev_mgr.buf_large_pool) { 526 SPDK_ERRLOG("create rbuf large pool failed\n"); 527 spdk_bdev_init_complete(-1); 528 return; 529 } 530 531 #ifdef SPDK_CONFIG_VTUNE 532 g_bdev_mgr.domain = __itt_domain_create("spdk_bdev"); 533 #endif 534 535 spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create, 536 spdk_bdev_mgmt_channel_destroy, 537 sizeof(struct spdk_bdev_mgmt_channel)); 538 539 rc = spdk_bdev_modules_init(); 540 if (rc != 0) { 541 SPDK_ERRLOG("bdev modules init failed\n"); 542 spdk_bdev_init_complete(-1); 543 return; 544 } 545 546 spdk_bdev_module_action_complete(); 547 } 548 549 void 550 spdk_bdev_finish(void) 551 { 552 struct spdk_bdev_module_if *bdev_module; 553 554 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 555 if (bdev_module->module_fini) { 556 bdev_module->module_fini(); 557 } 558 } 559 560 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 561 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 562 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 563 SPDK_BDEV_IO_POOL_SIZE); 564 } 565 566 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 567 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 568 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 569 BUF_SMALL_POOL_SIZE); 570 assert(false); 571 } 572 573 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 574 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 575 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 576 BUF_LARGE_POOL_SIZE); 577 assert(false); 578 } 579 580 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 581 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 582 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 583 584 spdk_io_device_unregister(&g_bdev_mgr, NULL); 585 } 586 587 struct spdk_bdev_io * 588 spdk_bdev_get_io(void) 589 { 590 struct spdk_bdev_io *bdev_io; 591 592 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 593 if (!bdev_io) { 594 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 595 abort(); 596 } 597 598 memset(bdev_io, 0, offsetof(struct spdk_bdev_io, u)); 599 600 return bdev_io; 601 } 602 603 static void 604 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 605 { 606 if (bdev_io->buf != NULL) { 607 spdk_bdev_io_put_buf(bdev_io); 608 } 609 610 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 611 } 612 613 static void 614 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 615 { 616 struct spdk_bdev *bdev = bdev_io->bdev; 617 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 618 struct spdk_io_channel *ch = bdev_ch->channel; 619 620 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 621 622 bdev_ch->io_outstanding++; 623 bdev_io->in_submit_request = true; 624 if (spdk_likely(bdev_ch->flags == 0)) { 625 if (spdk_likely(TAILQ_EMPTY(&bdev_ch->nomem_io))) { 626 bdev->fn_table->submit_request(ch, bdev_io); 627 } else { 628 bdev_ch->io_outstanding--; 629 TAILQ_INSERT_TAIL(&bdev_ch->nomem_io, bdev_io, link); 630 } 631 } else if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) { 632 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 633 } else { 634 SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags); 635 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 636 } 637 bdev_io->in_submit_request = false; 638 } 639 640 static void 641 spdk_bdev_io_submit_reset(struct spdk_bdev_io *bdev_io) 642 { 643 struct spdk_bdev *bdev = bdev_io->bdev; 644 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 645 struct spdk_io_channel *ch = bdev_ch->channel; 646 647 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 648 649 bdev_io->in_submit_request = true; 650 bdev->fn_table->submit_request(ch, bdev_io); 651 bdev_io->in_submit_request = false; 652 } 653 654 static void 655 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 656 struct spdk_bdev *bdev, void *cb_arg, 657 spdk_bdev_io_completion_cb cb) 658 { 659 bdev_io->bdev = bdev; 660 bdev_io->caller_ctx = cb_arg; 661 bdev_io->cb = cb; 662 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 663 bdev_io->in_submit_request = false; 664 } 665 666 bool 667 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 668 { 669 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 670 } 671 672 int 673 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 674 { 675 if (bdev->fn_table->dump_config_json) { 676 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 677 } 678 679 return 0; 680 } 681 682 static int 683 spdk_bdev_channel_create(void *io_device, void *ctx_buf) 684 { 685 struct spdk_bdev *bdev = io_device; 686 struct spdk_bdev_channel *ch = ctx_buf; 687 688 ch->bdev = io_device; 689 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt); 690 if (!ch->channel) { 691 return -1; 692 } 693 694 ch->mgmt_channel = spdk_get_io_channel(&g_bdev_mgr); 695 if (!ch->mgmt_channel) { 696 spdk_put_io_channel(ch->channel); 697 return -1; 698 } 699 700 memset(&ch->stat, 0, sizeof(ch->stat)); 701 ch->io_outstanding = 0; 702 TAILQ_INIT(&ch->queued_resets); 703 TAILQ_INIT(&ch->nomem_io); 704 ch->nomem_threshold = 0; 705 ch->flags = 0; 706 707 #ifdef SPDK_CONFIG_VTUNE 708 { 709 char *name; 710 __itt_init_ittlib(NULL, 0); 711 name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch); 712 if (!name) { 713 spdk_put_io_channel(ch->channel); 714 spdk_put_io_channel(ch->mgmt_channel); 715 return -1; 716 } 717 ch->handle = __itt_string_handle_create(name); 718 free(name); 719 ch->start_tsc = spdk_get_ticks(); 720 ch->interval_tsc = spdk_get_ticks_hz() / 100; 721 } 722 #endif 723 724 return 0; 725 } 726 727 /* 728 * Abort I/O that are waiting on a data buffer. These types of I/O are 729 * linked using the spdk_bdev_io buf_link TAILQ_ENTRY. 730 */ 731 static void 732 _spdk_bdev_abort_buf_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch) 733 { 734 struct spdk_bdev_io *bdev_io, *tmp; 735 736 TAILQ_FOREACH_SAFE(bdev_io, queue, buf_link, tmp) { 737 if (bdev_io->ch == ch) { 738 TAILQ_REMOVE(queue, bdev_io, buf_link); 739 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 740 } 741 } 742 } 743 744 /* 745 * Abort I/O that are queued waiting for submission. These types of I/O are 746 * linked using the spdk_bdev_io link TAILQ_ENTRY. 747 */ 748 static void 749 _spdk_bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch) 750 { 751 struct spdk_bdev_io *bdev_io, *tmp; 752 753 TAILQ_FOREACH_SAFE(bdev_io, queue, link, tmp) { 754 if (bdev_io->ch == ch) { 755 TAILQ_REMOVE(queue, bdev_io, link); 756 /* 757 * spdk_bdev_io_complete() assumes that the completed I/O had 758 * been submitted to the bdev module. Since in this case it 759 * hadn't, bump io_outstanding to account for the decrement 760 * that spdk_bdev_io_complete() will do. 761 */ 762 if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) { 763 ch->io_outstanding++; 764 } 765 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 766 } 767 } 768 } 769 770 static void 771 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 772 { 773 struct spdk_bdev_channel *ch = ctx_buf; 774 struct spdk_bdev_mgmt_channel *mgmt_channel; 775 776 mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel); 777 778 _spdk_bdev_abort_queued_io(&ch->queued_resets, ch); 779 _spdk_bdev_abort_queued_io(&ch->nomem_io, ch); 780 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, ch); 781 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, ch); 782 783 spdk_put_io_channel(ch->channel); 784 spdk_put_io_channel(ch->mgmt_channel); 785 assert(ch->io_outstanding == 0); 786 } 787 788 struct spdk_io_channel * 789 spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) 790 { 791 return spdk_get_io_channel(desc->bdev); 792 } 793 794 const char * 795 spdk_bdev_get_name(const struct spdk_bdev *bdev) 796 { 797 return bdev->name; 798 } 799 800 const char * 801 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 802 { 803 return bdev->product_name; 804 } 805 806 uint32_t 807 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 808 { 809 return bdev->blocklen; 810 } 811 812 uint64_t 813 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 814 { 815 return bdev->blockcnt; 816 } 817 818 size_t 819 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 820 { 821 /* TODO: push this logic down to the bdev modules */ 822 if (bdev->need_aligned_buffer) { 823 return bdev->blocklen; 824 } 825 826 return 1; 827 } 828 829 uint32_t 830 spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev) 831 { 832 return bdev->optimal_io_boundary; 833 } 834 835 bool 836 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 837 { 838 return bdev->write_cache; 839 } 840 841 /* 842 * Convert I/O offset and length from bytes to blocks. 843 * 844 * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size. 845 */ 846 static uint64_t 847 spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks, 848 uint64_t num_bytes, uint64_t *num_blocks) 849 { 850 uint32_t block_size = bdev->blocklen; 851 852 *offset_blocks = offset_bytes / block_size; 853 *num_blocks = num_bytes / block_size; 854 855 return (offset_bytes % block_size) | (num_bytes % block_size); 856 } 857 858 static bool 859 spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks) 860 { 861 /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there 862 * has been an overflow and hence the offset has been wrapped around */ 863 if (offset_blocks + num_blocks < offset_blocks) { 864 return false; 865 } 866 867 /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */ 868 if (offset_blocks + num_blocks > bdev->blockcnt) { 869 return false; 870 } 871 872 return true; 873 } 874 875 int 876 spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 877 void *buf, uint64_t offset, uint64_t nbytes, 878 spdk_bdev_io_completion_cb cb, void *cb_arg) 879 { 880 uint64_t offset_blocks, num_blocks; 881 882 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 883 return -EINVAL; 884 } 885 886 return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 887 } 888 889 int 890 spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 891 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 892 spdk_bdev_io_completion_cb cb, void *cb_arg) 893 { 894 struct spdk_bdev *bdev = desc->bdev; 895 struct spdk_bdev_io *bdev_io; 896 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 897 898 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 899 return -EINVAL; 900 } 901 902 bdev_io = spdk_bdev_get_io(); 903 if (!bdev_io) { 904 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 905 return -ENOMEM; 906 } 907 908 bdev_io->ch = channel; 909 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 910 bdev_io->u.bdev.iov.iov_base = buf; 911 bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen; 912 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 913 bdev_io->u.bdev.iovcnt = 1; 914 bdev_io->u.bdev.num_blocks = num_blocks; 915 bdev_io->u.bdev.offset_blocks = offset_blocks; 916 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 917 918 spdk_bdev_io_submit(bdev_io); 919 return 0; 920 } 921 922 int 923 spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 924 struct iovec *iov, int iovcnt, 925 uint64_t offset, uint64_t nbytes, 926 spdk_bdev_io_completion_cb cb, void *cb_arg) 927 { 928 uint64_t offset_blocks, num_blocks; 929 930 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 931 return -EINVAL; 932 } 933 934 return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 935 } 936 937 int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 938 struct iovec *iov, int iovcnt, 939 uint64_t offset_blocks, uint64_t num_blocks, 940 spdk_bdev_io_completion_cb cb, void *cb_arg) 941 { 942 struct spdk_bdev *bdev = desc->bdev; 943 struct spdk_bdev_io *bdev_io; 944 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 945 946 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 947 return -EINVAL; 948 } 949 950 bdev_io = spdk_bdev_get_io(); 951 if (!bdev_io) { 952 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 953 return -ENOMEM; 954 } 955 956 bdev_io->ch = channel; 957 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 958 bdev_io->u.bdev.iovs = iov; 959 bdev_io->u.bdev.iovcnt = iovcnt; 960 bdev_io->u.bdev.num_blocks = num_blocks; 961 bdev_io->u.bdev.offset_blocks = offset_blocks; 962 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 963 964 spdk_bdev_io_submit(bdev_io); 965 return 0; 966 } 967 968 int 969 spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 970 void *buf, uint64_t offset, uint64_t nbytes, 971 spdk_bdev_io_completion_cb cb, void *cb_arg) 972 { 973 uint64_t offset_blocks, num_blocks; 974 975 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 976 return -EINVAL; 977 } 978 979 return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 980 } 981 982 int 983 spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 984 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 985 spdk_bdev_io_completion_cb cb, void *cb_arg) 986 { 987 struct spdk_bdev *bdev = desc->bdev; 988 struct spdk_bdev_io *bdev_io; 989 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 990 991 if (!desc->write) { 992 return -EBADF; 993 } 994 995 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 996 return -EINVAL; 997 } 998 999 bdev_io = spdk_bdev_get_io(); 1000 if (!bdev_io) { 1001 SPDK_ERRLOG("bdev_io memory allocation failed duing write\n"); 1002 return -ENOMEM; 1003 } 1004 1005 bdev_io->ch = channel; 1006 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 1007 bdev_io->u.bdev.iov.iov_base = buf; 1008 bdev_io->u.bdev.iov.iov_len = num_blocks * bdev->blocklen; 1009 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1010 bdev_io->u.bdev.iovcnt = 1; 1011 bdev_io->u.bdev.num_blocks = num_blocks; 1012 bdev_io->u.bdev.offset_blocks = offset_blocks; 1013 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1014 1015 spdk_bdev_io_submit(bdev_io); 1016 return 0; 1017 } 1018 1019 int 1020 spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1021 struct iovec *iov, int iovcnt, 1022 uint64_t offset, uint64_t len, 1023 spdk_bdev_io_completion_cb cb, void *cb_arg) 1024 { 1025 uint64_t offset_blocks, num_blocks; 1026 1027 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 1028 return -EINVAL; 1029 } 1030 1031 return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 1032 } 1033 1034 int 1035 spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1036 struct iovec *iov, int iovcnt, 1037 uint64_t offset_blocks, uint64_t num_blocks, 1038 spdk_bdev_io_completion_cb cb, void *cb_arg) 1039 { 1040 struct spdk_bdev *bdev = desc->bdev; 1041 struct spdk_bdev_io *bdev_io; 1042 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1043 1044 if (!desc->write) { 1045 return -EBADF; 1046 } 1047 1048 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1049 return -EINVAL; 1050 } 1051 1052 bdev_io = spdk_bdev_get_io(); 1053 if (!bdev_io) { 1054 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 1055 return -ENOMEM; 1056 } 1057 1058 bdev_io->ch = channel; 1059 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 1060 bdev_io->u.bdev.iovs = iov; 1061 bdev_io->u.bdev.iovcnt = iovcnt; 1062 bdev_io->u.bdev.num_blocks = num_blocks; 1063 bdev_io->u.bdev.offset_blocks = offset_blocks; 1064 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1065 1066 spdk_bdev_io_submit(bdev_io); 1067 return 0; 1068 } 1069 1070 int 1071 spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1072 uint64_t offset, uint64_t len, 1073 spdk_bdev_io_completion_cb cb, void *cb_arg) 1074 { 1075 uint64_t offset_blocks, num_blocks; 1076 1077 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 1078 return -EINVAL; 1079 } 1080 1081 return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1082 } 1083 1084 int 1085 spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1086 uint64_t offset_blocks, uint64_t num_blocks, 1087 spdk_bdev_io_completion_cb cb, void *cb_arg) 1088 { 1089 struct spdk_bdev *bdev = desc->bdev; 1090 struct spdk_bdev_io *bdev_io; 1091 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1092 1093 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1094 return -EINVAL; 1095 } 1096 1097 bdev_io = spdk_bdev_get_io(); 1098 if (!bdev_io) { 1099 SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n"); 1100 return -ENOMEM; 1101 } 1102 1103 bdev_io->ch = channel; 1104 bdev_io->u.bdev.iovs = NULL; 1105 bdev_io->u.bdev.iovcnt = 0; 1106 bdev_io->u.bdev.num_blocks = num_blocks; 1107 bdev_io->u.bdev.offset_blocks = offset_blocks; 1108 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 1109 1110 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1111 1112 spdk_bdev_io_submit(bdev_io); 1113 return 0; 1114 } 1115 1116 int 1117 spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1118 uint64_t offset, uint64_t nbytes, 1119 spdk_bdev_io_completion_cb cb, void *cb_arg) 1120 { 1121 uint64_t offset_blocks, num_blocks; 1122 1123 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1124 return -EINVAL; 1125 } 1126 1127 return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1128 } 1129 1130 int 1131 spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1132 uint64_t offset_blocks, uint64_t num_blocks, 1133 spdk_bdev_io_completion_cb cb, void *cb_arg) 1134 { 1135 struct spdk_bdev *bdev = desc->bdev; 1136 struct spdk_bdev_io *bdev_io; 1137 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1138 1139 if (!desc->write) { 1140 return -EBADF; 1141 } 1142 1143 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1144 return -EINVAL; 1145 } 1146 1147 if (num_blocks == 0) { 1148 SPDK_ERRLOG("Can't unmap 0 bytes\n"); 1149 return -EINVAL; 1150 } 1151 1152 bdev_io = spdk_bdev_get_io(); 1153 if (!bdev_io) { 1154 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 1155 return -ENOMEM; 1156 } 1157 1158 bdev_io->ch = channel; 1159 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 1160 bdev_io->u.bdev.iovs = &bdev_io->u.bdev.iov; 1161 bdev_io->u.bdev.iovcnt = 1; 1162 bdev_io->u.bdev.offset_blocks = offset_blocks; 1163 bdev_io->u.bdev.num_blocks = num_blocks; 1164 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1165 1166 spdk_bdev_io_submit(bdev_io); 1167 return 0; 1168 } 1169 1170 int 1171 spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1172 uint64_t offset, uint64_t length, 1173 spdk_bdev_io_completion_cb cb, void *cb_arg) 1174 { 1175 uint64_t offset_blocks, num_blocks; 1176 1177 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) { 1178 return -EINVAL; 1179 } 1180 1181 return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1182 } 1183 1184 int 1185 spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1186 uint64_t offset_blocks, uint64_t num_blocks, 1187 spdk_bdev_io_completion_cb cb, void *cb_arg) 1188 { 1189 struct spdk_bdev *bdev = desc->bdev; 1190 struct spdk_bdev_io *bdev_io; 1191 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1192 1193 if (!desc->write) { 1194 return -EBADF; 1195 } 1196 1197 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1198 return -EINVAL; 1199 } 1200 1201 bdev_io = spdk_bdev_get_io(); 1202 if (!bdev_io) { 1203 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 1204 return -ENOMEM; 1205 } 1206 1207 bdev_io->ch = channel; 1208 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 1209 bdev_io->u.bdev.iovs = NULL; 1210 bdev_io->u.bdev.iovcnt = 0; 1211 bdev_io->u.bdev.offset_blocks = offset_blocks; 1212 bdev_io->u.bdev.num_blocks = num_blocks; 1213 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1214 1215 spdk_bdev_io_submit(bdev_io); 1216 return 0; 1217 } 1218 1219 static void 1220 _spdk_bdev_reset_dev(void *io_device, void *ctx) 1221 { 1222 struct spdk_bdev_channel *ch = ctx; 1223 struct spdk_bdev_io *bdev_io; 1224 1225 bdev_io = TAILQ_FIRST(&ch->queued_resets); 1226 TAILQ_REMOVE(&ch->queued_resets, bdev_io, link); 1227 spdk_bdev_io_submit_reset(bdev_io); 1228 } 1229 1230 static void 1231 _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch, 1232 void *ctx) 1233 { 1234 struct spdk_bdev_channel *channel; 1235 struct spdk_bdev_mgmt_channel *mgmt_channel; 1236 1237 channel = spdk_io_channel_get_ctx(ch); 1238 mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel); 1239 1240 channel->flags |= BDEV_CH_RESET_IN_PROGRESS; 1241 1242 _spdk_bdev_abort_queued_io(&channel->nomem_io, channel); 1243 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_small, channel); 1244 _spdk_bdev_abort_buf_io(&mgmt_channel->need_buf_large, channel); 1245 } 1246 1247 static void 1248 _spdk_bdev_start_reset(void *ctx) 1249 { 1250 struct spdk_bdev_channel *ch = ctx; 1251 1252 spdk_for_each_channel(ch->bdev, _spdk_bdev_reset_abort_channel, 1253 ch, _spdk_bdev_reset_dev); 1254 } 1255 1256 static void 1257 _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch) 1258 { 1259 struct spdk_bdev *bdev = ch->bdev; 1260 1261 assert(!TAILQ_EMPTY(&ch->queued_resets)); 1262 1263 pthread_mutex_lock(&bdev->mutex); 1264 if (bdev->reset_in_progress == NULL) { 1265 bdev->reset_in_progress = TAILQ_FIRST(&ch->queued_resets); 1266 /* 1267 * Take a channel reference for the target bdev for the life of this 1268 * reset. This guards against the channel getting destroyed while 1269 * spdk_for_each_channel() calls related to this reset IO are in 1270 * progress. We will release the reference when this reset is 1271 * completed. 1272 */ 1273 bdev->reset_in_progress->u.reset.ch_ref = spdk_get_io_channel(bdev); 1274 _spdk_bdev_start_reset(ch); 1275 } 1276 pthread_mutex_unlock(&bdev->mutex); 1277 } 1278 1279 static void 1280 _spdk_bdev_complete_reset_channel(void *io_device, struct spdk_io_channel *_ch, void *ctx) 1281 { 1282 struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); 1283 1284 ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS; 1285 if (!TAILQ_EMPTY(&ch->queued_resets)) { 1286 _spdk_bdev_channel_start_reset(ch); 1287 } 1288 } 1289 1290 int 1291 spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1292 spdk_bdev_io_completion_cb cb, void *cb_arg) 1293 { 1294 struct spdk_bdev *bdev = desc->bdev; 1295 struct spdk_bdev_io *bdev_io; 1296 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1297 1298 bdev_io = spdk_bdev_get_io(); 1299 if (!bdev_io) { 1300 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 1301 return -ENOMEM; 1302 } 1303 1304 bdev_io->ch = channel; 1305 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 1306 bdev_io->u.reset.ch_ref = NULL; 1307 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1308 1309 pthread_mutex_lock(&bdev->mutex); 1310 TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, link); 1311 pthread_mutex_unlock(&bdev->mutex); 1312 1313 _spdk_bdev_channel_start_reset(channel); 1314 1315 return 0; 1316 } 1317 1318 void 1319 spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 1320 struct spdk_bdev_io_stat *stat) 1321 { 1322 #ifdef SPDK_CONFIG_VTUNE 1323 SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n"); 1324 memset(stat, 0, sizeof(*stat)); 1325 return; 1326 #endif 1327 1328 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1329 1330 *stat = channel->stat; 1331 memset(&channel->stat, 0, sizeof(channel->stat)); 1332 } 1333 1334 int 1335 spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1336 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1337 spdk_bdev_io_completion_cb cb, void *cb_arg) 1338 { 1339 struct spdk_bdev *bdev = desc->bdev; 1340 struct spdk_bdev_io *bdev_io; 1341 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1342 1343 if (!desc->write) { 1344 return -EBADF; 1345 } 1346 1347 bdev_io = spdk_bdev_get_io(); 1348 if (!bdev_io) { 1349 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1350 return -ENOMEM; 1351 } 1352 1353 bdev_io->ch = channel; 1354 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN; 1355 bdev_io->u.nvme_passthru.cmd = *cmd; 1356 bdev_io->u.nvme_passthru.buf = buf; 1357 bdev_io->u.nvme_passthru.nbytes = nbytes; 1358 1359 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1360 1361 spdk_bdev_io_submit(bdev_io); 1362 return 0; 1363 } 1364 1365 int 1366 spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1367 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1368 spdk_bdev_io_completion_cb cb, void *cb_arg) 1369 { 1370 struct spdk_bdev *bdev = desc->bdev; 1371 struct spdk_bdev_io *bdev_io; 1372 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1373 1374 if (!desc->write) { 1375 /* 1376 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 1377 * to easily determine if the command is a read or write, but for now just 1378 * do not allow io_passthru with a read-only descriptor. 1379 */ 1380 return -EBADF; 1381 } 1382 1383 bdev_io = spdk_bdev_get_io(); 1384 if (!bdev_io) { 1385 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1386 return -ENOMEM; 1387 } 1388 1389 bdev_io->ch = channel; 1390 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO; 1391 bdev_io->u.nvme_passthru.cmd = *cmd; 1392 bdev_io->u.nvme_passthru.buf = buf; 1393 bdev_io->u.nvme_passthru.nbytes = nbytes; 1394 1395 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1396 1397 spdk_bdev_io_submit(bdev_io); 1398 return 0; 1399 } 1400 1401 int 1402 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 1403 { 1404 if (!bdev_io) { 1405 SPDK_ERRLOG("bdev_io is NULL\n"); 1406 return -1; 1407 } 1408 1409 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 1410 SPDK_ERRLOG("bdev_io is in pending state\n"); 1411 assert(false); 1412 return -1; 1413 } 1414 1415 spdk_bdev_put_io(bdev_io); 1416 1417 return 0; 1418 } 1419 1420 static void 1421 _spdk_bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch) 1422 { 1423 struct spdk_bdev *bdev = bdev_ch->bdev; 1424 struct spdk_bdev_io *bdev_io; 1425 1426 if (bdev_ch->io_outstanding > bdev_ch->nomem_threshold) { 1427 /* 1428 * Allow some more I/O to complete before retrying the nomem_io queue. 1429 * Some drivers (such as nvme) cannot immediately take a new I/O in 1430 * the context of a completion, because the resources for the I/O are 1431 * not released until control returns to the bdev poller. Also, we 1432 * may require several small I/O to complete before a larger I/O 1433 * (that requires splitting) can be submitted. 1434 */ 1435 return; 1436 } 1437 1438 while (!TAILQ_EMPTY(&bdev_ch->nomem_io)) { 1439 bdev_io = TAILQ_FIRST(&bdev_ch->nomem_io); 1440 TAILQ_REMOVE(&bdev_ch->nomem_io, bdev_io, link); 1441 bdev_ch->io_outstanding++; 1442 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 1443 bdev->fn_table->submit_request(bdev_ch->channel, bdev_io); 1444 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NOMEM) { 1445 break; 1446 } 1447 } 1448 } 1449 1450 static void 1451 _spdk_bdev_io_complete(void *ctx) 1452 { 1453 struct spdk_bdev_io *bdev_io = ctx; 1454 1455 assert(bdev_io->cb != NULL); 1456 bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS, bdev_io->caller_ctx); 1457 } 1458 1459 void 1460 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 1461 { 1462 struct spdk_bdev *bdev = bdev_io->bdev; 1463 struct spdk_bdev_channel *bdev_ch = bdev_io->ch; 1464 1465 bdev_io->status = status; 1466 1467 if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) { 1468 if (status == SPDK_BDEV_IO_STATUS_NOMEM) { 1469 SPDK_ERRLOG("NOMEM returned for reset\n"); 1470 } 1471 pthread_mutex_lock(&bdev->mutex); 1472 if (bdev_io == bdev->reset_in_progress) { 1473 bdev->reset_in_progress = NULL; 1474 } 1475 pthread_mutex_unlock(&bdev->mutex); 1476 if (bdev_io->u.reset.ch_ref != NULL) { 1477 spdk_put_io_channel(bdev_io->u.reset.ch_ref); 1478 } 1479 spdk_for_each_channel(bdev, _spdk_bdev_complete_reset_channel, NULL, NULL); 1480 } else { 1481 assert(bdev_ch->io_outstanding > 0); 1482 bdev_ch->io_outstanding--; 1483 if (spdk_likely(status != SPDK_BDEV_IO_STATUS_NOMEM)) { 1484 if (spdk_unlikely(!TAILQ_EMPTY(&bdev_ch->nomem_io))) { 1485 _spdk_bdev_ch_retry_io(bdev_ch); 1486 } 1487 } else { 1488 TAILQ_INSERT_HEAD(&bdev_ch->nomem_io, bdev_io, link); 1489 /* 1490 * Wait for some of the outstanding I/O to complete before we 1491 * retry any of the nomem_io. Normally we will wait for 1492 * NOMEM_THRESHOLD_COUNT I/O to complete but for low queue 1493 * depth channels we will instead wait for half to complete. 1494 */ 1495 bdev_ch->nomem_threshold = spdk_max(bdev_ch->io_outstanding / 2, 1496 bdev_ch->io_outstanding - NOMEM_THRESHOLD_COUNT); 1497 return; 1498 } 1499 } 1500 1501 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1502 switch (bdev_io->type) { 1503 case SPDK_BDEV_IO_TYPE_READ: 1504 bdev_ch->stat.bytes_read += bdev_io->u.bdev.num_blocks * bdev->blocklen; 1505 bdev_ch->stat.num_read_ops++; 1506 break; 1507 case SPDK_BDEV_IO_TYPE_WRITE: 1508 bdev_ch->stat.bytes_written += bdev_io->u.bdev.num_blocks * bdev->blocklen; 1509 bdev_ch->stat.num_write_ops++; 1510 break; 1511 default: 1512 break; 1513 } 1514 } 1515 1516 #ifdef SPDK_CONFIG_VTUNE 1517 uint64_t now_tsc = spdk_get_ticks(); 1518 if (now_tsc > (bdev_ch->start_tsc + bdev_ch->interval_tsc)) { 1519 uint64_t data[5]; 1520 1521 data[0] = bdev_ch->stat.num_read_ops; 1522 data[1] = bdev_ch->stat.bytes_read; 1523 data[2] = bdev_ch->stat.num_write_ops; 1524 data[3] = bdev_ch->stat.bytes_written; 1525 data[4] = bdev->fn_table->get_spin_time ? 1526 bdev->fn_table->get_spin_time(bdev_ch->channel) : 0; 1527 1528 __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_ch->handle, 1529 __itt_metadata_u64, 5, data); 1530 1531 memset(&bdev_ch->stat, 0, sizeof(bdev_ch->stat)); 1532 bdev_ch->start_tsc = now_tsc; 1533 } 1534 #endif 1535 1536 if (bdev_io->in_submit_request || bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1537 /* 1538 * Defer completion to avoid potential infinite recursion if the 1539 * user's completion callback issues a new I/O. 1540 */ 1541 spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_ch->channel), 1542 _spdk_bdev_io_complete, bdev_io); 1543 } else { 1544 _spdk_bdev_io_complete(bdev_io); 1545 } 1546 } 1547 1548 void 1549 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 1550 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 1551 { 1552 if (sc == SPDK_SCSI_STATUS_GOOD) { 1553 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1554 } else { 1555 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 1556 bdev_io->error.scsi.sc = sc; 1557 bdev_io->error.scsi.sk = sk; 1558 bdev_io->error.scsi.asc = asc; 1559 bdev_io->error.scsi.ascq = ascq; 1560 } 1561 1562 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1563 } 1564 1565 void 1566 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 1567 int *sc, int *sk, int *asc, int *ascq) 1568 { 1569 assert(sc != NULL); 1570 assert(sk != NULL); 1571 assert(asc != NULL); 1572 assert(ascq != NULL); 1573 1574 switch (bdev_io->status) { 1575 case SPDK_BDEV_IO_STATUS_SUCCESS: 1576 *sc = SPDK_SCSI_STATUS_GOOD; 1577 *sk = SPDK_SCSI_SENSE_NO_SENSE; 1578 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1579 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1580 break; 1581 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 1582 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 1583 break; 1584 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 1585 *sc = bdev_io->error.scsi.sc; 1586 *sk = bdev_io->error.scsi.sk; 1587 *asc = bdev_io->error.scsi.asc; 1588 *ascq = bdev_io->error.scsi.ascq; 1589 break; 1590 default: 1591 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 1592 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 1593 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1594 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1595 break; 1596 } 1597 } 1598 1599 void 1600 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 1601 { 1602 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 1603 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1604 } else { 1605 bdev_io->error.nvme.sct = sct; 1606 bdev_io->error.nvme.sc = sc; 1607 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1608 } 1609 1610 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1611 } 1612 1613 void 1614 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1615 { 1616 assert(sct != NULL); 1617 assert(sc != NULL); 1618 1619 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1620 *sct = bdev_io->error.nvme.sct; 1621 *sc = bdev_io->error.nvme.sc; 1622 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1623 *sct = SPDK_NVME_SCT_GENERIC; 1624 *sc = SPDK_NVME_SC_SUCCESS; 1625 } else { 1626 *sct = SPDK_NVME_SCT_GENERIC; 1627 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1628 } 1629 } 1630 1631 static void 1632 _spdk_bdev_register(struct spdk_bdev *bdev) 1633 { 1634 struct spdk_bdev_module_if *module; 1635 1636 assert(bdev->module != NULL); 1637 1638 bdev->status = SPDK_BDEV_STATUS_READY; 1639 1640 TAILQ_INIT(&bdev->open_descs); 1641 1642 TAILQ_INIT(&bdev->vbdevs); 1643 TAILQ_INIT(&bdev->base_bdevs); 1644 1645 bdev->reset_in_progress = NULL; 1646 1647 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1648 sizeof(struct spdk_bdev_channel)); 1649 1650 pthread_mutex_init(&bdev->mutex, NULL); 1651 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Inserting bdev %s into list\n", bdev->name); 1652 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1653 1654 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 1655 if (module->examine) { 1656 module->action_in_progress++; 1657 module->examine(bdev); 1658 } 1659 } 1660 } 1661 1662 void 1663 spdk_bdev_register(struct spdk_bdev *bdev) 1664 { 1665 _spdk_bdev_register(bdev); 1666 } 1667 1668 void 1669 spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) 1670 { 1671 int i; 1672 1673 _spdk_bdev_register(vbdev); 1674 for (i = 0; i < base_bdev_count; i++) { 1675 assert(base_bdevs[i] != NULL); 1676 TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link); 1677 TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link); 1678 } 1679 } 1680 1681 void 1682 spdk_bdev_unregister(struct spdk_bdev *bdev) 1683 { 1684 struct spdk_bdev_desc *desc, *tmp; 1685 int rc; 1686 bool do_destruct = true; 1687 1688 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Removing bdev %s from list\n", bdev->name); 1689 1690 pthread_mutex_lock(&bdev->mutex); 1691 1692 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1693 1694 TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) { 1695 if (desc->remove_cb) { 1696 pthread_mutex_unlock(&bdev->mutex); 1697 do_destruct = false; 1698 desc->remove_cb(desc->remove_ctx); 1699 pthread_mutex_lock(&bdev->mutex); 1700 } 1701 } 1702 1703 if (!do_destruct) { 1704 pthread_mutex_unlock(&bdev->mutex); 1705 return; 1706 } 1707 1708 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1709 pthread_mutex_unlock(&bdev->mutex); 1710 1711 pthread_mutex_destroy(&bdev->mutex); 1712 1713 spdk_io_device_unregister(bdev, NULL); 1714 1715 rc = bdev->fn_table->destruct(bdev->ctxt); 1716 if (rc < 0) { 1717 SPDK_ERRLOG("destruct failed\n"); 1718 } 1719 } 1720 1721 void 1722 spdk_vbdev_unregister(struct spdk_bdev *vbdev) 1723 { 1724 struct spdk_bdev *base_bdev; 1725 1726 assert(!TAILQ_EMPTY(&vbdev->base_bdevs)); 1727 TAILQ_FOREACH(base_bdev, &vbdev->base_bdevs, base_bdev_link) { 1728 TAILQ_REMOVE(&base_bdev->vbdevs, vbdev, vbdev_link); 1729 } 1730 spdk_bdev_unregister(vbdev); 1731 } 1732 1733 int 1734 spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, 1735 void *remove_ctx, struct spdk_bdev_desc **_desc) 1736 { 1737 struct spdk_bdev_desc *desc; 1738 1739 desc = calloc(1, sizeof(*desc)); 1740 if (desc == NULL) { 1741 return -ENOMEM; 1742 } 1743 1744 pthread_mutex_lock(&bdev->mutex); 1745 1746 if (write && bdev->claim_module) { 1747 SPDK_ERRLOG("failed, %s already claimed\n", bdev->name); 1748 free(desc); 1749 pthread_mutex_unlock(&bdev->mutex); 1750 return -EPERM; 1751 } 1752 1753 TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link); 1754 1755 desc->bdev = bdev; 1756 desc->remove_cb = remove_cb; 1757 desc->remove_ctx = remove_ctx; 1758 desc->write = write; 1759 *_desc = desc; 1760 1761 pthread_mutex_unlock(&bdev->mutex); 1762 1763 return 0; 1764 } 1765 1766 void 1767 spdk_bdev_close(struct spdk_bdev_desc *desc) 1768 { 1769 struct spdk_bdev *bdev = desc->bdev; 1770 bool do_unregister = false; 1771 1772 pthread_mutex_lock(&bdev->mutex); 1773 1774 TAILQ_REMOVE(&bdev->open_descs, desc, link); 1775 free(desc); 1776 1777 if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) { 1778 do_unregister = true; 1779 } 1780 pthread_mutex_unlock(&bdev->mutex); 1781 1782 if (do_unregister == true) { 1783 spdk_bdev_unregister(bdev); 1784 } 1785 } 1786 1787 int 1788 spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 1789 struct spdk_bdev_module_if *module) 1790 { 1791 if (bdev->claim_module != NULL) { 1792 SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name, 1793 bdev->claim_module->name); 1794 return -EPERM; 1795 } 1796 1797 if (desc && !desc->write) { 1798 desc->write = true; 1799 } 1800 1801 bdev->claim_module = module; 1802 return 0; 1803 } 1804 1805 void 1806 spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) 1807 { 1808 assert(bdev->claim_module != NULL); 1809 bdev->claim_module = NULL; 1810 } 1811 1812 struct spdk_bdev * 1813 spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc) 1814 { 1815 return desc->bdev; 1816 } 1817 1818 void 1819 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1820 { 1821 struct iovec *iovs; 1822 int iovcnt; 1823 1824 if (bdev_io == NULL) { 1825 return; 1826 } 1827 1828 switch (bdev_io->type) { 1829 case SPDK_BDEV_IO_TYPE_READ: 1830 iovs = bdev_io->u.bdev.iovs; 1831 iovcnt = bdev_io->u.bdev.iovcnt; 1832 break; 1833 case SPDK_BDEV_IO_TYPE_WRITE: 1834 iovs = bdev_io->u.bdev.iovs; 1835 iovcnt = bdev_io->u.bdev.iovcnt; 1836 break; 1837 default: 1838 iovs = NULL; 1839 iovcnt = 0; 1840 break; 1841 } 1842 1843 if (iovp) { 1844 *iovp = iovs; 1845 } 1846 if (iovcntp) { 1847 *iovcntp = iovcnt; 1848 } 1849 } 1850 1851 void 1852 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1853 { 1854 /* 1855 * Modules with examine callbacks must be initialized first, so they are 1856 * ready to handle examine callbacks from later modules that will 1857 * register physical bdevs. 1858 */ 1859 if (bdev_module->examine != NULL) { 1860 TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1861 } else { 1862 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1863 } 1864 } 1865 1866 void 1867 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 1868 { 1869 if (base->desc) { 1870 spdk_bdev_close(base->desc); 1871 base->desc = NULL; 1872 } 1873 base->base_free_fn(base); 1874 } 1875 1876 void 1877 spdk_bdev_part_free(struct spdk_bdev_part *part) 1878 { 1879 struct spdk_bdev_part_base *base; 1880 1881 assert(part); 1882 assert(part->base); 1883 1884 base = part->base; 1885 spdk_io_device_unregister(&part->base, NULL); 1886 TAILQ_REMOVE(base->tailq, part, tailq); 1887 free(part->bdev.name); 1888 free(part); 1889 1890 if (__sync_sub_and_fetch(&base->ref, 1) == 0) { 1891 spdk_bdev_module_release_bdev(base->bdev); 1892 spdk_bdev_part_base_free(base); 1893 } 1894 } 1895 1896 void 1897 spdk_bdev_part_tailq_fini(struct bdev_part_tailq *tailq) 1898 { 1899 struct spdk_bdev_part *part, *tmp; 1900 1901 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 1902 spdk_bdev_part_free(part); 1903 } 1904 } 1905 1906 void 1907 spdk_bdev_part_base_hotremove(struct spdk_bdev *base_bdev, struct bdev_part_tailq *tailq) 1908 { 1909 struct spdk_bdev_part *part, *tmp; 1910 1911 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 1912 if (part->base->bdev == base_bdev) { 1913 spdk_vbdev_unregister(&part->bdev); 1914 } 1915 } 1916 } 1917 1918 static bool 1919 spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 1920 { 1921 struct spdk_bdev_part *part = _part; 1922 1923 return part->base->bdev->fn_table->io_type_supported(part->base->bdev, io_type); 1924 } 1925 1926 static struct spdk_io_channel * 1927 spdk_bdev_part_get_io_channel(void *_part) 1928 { 1929 struct spdk_bdev_part *part = _part; 1930 1931 return spdk_get_io_channel(&part->base); 1932 } 1933 1934 static void 1935 spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1936 { 1937 struct spdk_bdev_io *part_io = cb_arg; 1938 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 1939 1940 spdk_bdev_io_complete(part_io, status); 1941 spdk_bdev_free_io(bdev_io); 1942 } 1943 1944 void 1945 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 1946 { 1947 struct spdk_bdev_part *part = ch->part; 1948 struct spdk_io_channel *base_ch = ch->base_ch; 1949 struct spdk_bdev_desc *base_desc = part->base->desc; 1950 uint64_t offset; 1951 int rc = 0; 1952 1953 /* Modify the I/O to adjust for the offset within the base bdev. */ 1954 switch (bdev_io->type) { 1955 case SPDK_BDEV_IO_TYPE_READ: 1956 offset = bdev_io->u.bdev.offset_blocks + part->offset_blocks; 1957 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 1958 bdev_io->u.bdev.iovcnt, offset, 1959 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io, 1960 bdev_io); 1961 break; 1962 case SPDK_BDEV_IO_TYPE_WRITE: 1963 offset = bdev_io->u.bdev.offset_blocks + part->offset_blocks; 1964 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.bdev.iovs, 1965 bdev_io->u.bdev.iovcnt, offset, 1966 bdev_io->u.bdev.num_blocks, spdk_bdev_part_complete_io, 1967 bdev_io); 1968 break; 1969 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1970 offset = bdev_io->u.bdev.offset_blocks + part->offset_blocks; 1971 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks, 1972 spdk_bdev_part_complete_io, bdev_io); 1973 break; 1974 case SPDK_BDEV_IO_TYPE_UNMAP: 1975 offset = bdev_io->u.bdev.offset_blocks + part->offset_blocks; 1976 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks, 1977 spdk_bdev_part_complete_io, bdev_io); 1978 break; 1979 case SPDK_BDEV_IO_TYPE_FLUSH: 1980 offset = bdev_io->u.bdev.offset_blocks + part->offset_blocks; 1981 rc = spdk_bdev_flush_blocks(base_desc, base_ch, offset, bdev_io->u.bdev.num_blocks, 1982 spdk_bdev_part_complete_io, bdev_io); 1983 break; 1984 case SPDK_BDEV_IO_TYPE_RESET: 1985 rc = spdk_bdev_reset(base_desc, base_ch, 1986 spdk_bdev_part_complete_io, bdev_io); 1987 break; 1988 default: 1989 SPDK_ERRLOG("split: unknown I/O type %d\n", bdev_io->type); 1990 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1991 return; 1992 } 1993 1994 if (rc != 0) { 1995 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1996 } 1997 } 1998 static int 1999 spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 2000 { 2001 struct spdk_bdev_part *part = SPDK_CONTAINEROF(io_device, struct spdk_bdev_part, base); 2002 struct spdk_bdev_part_channel *ch = ctx_buf; 2003 2004 ch->part = part; 2005 ch->base_ch = spdk_bdev_get_io_channel(part->base->desc); 2006 if (ch->base_ch == NULL) { 2007 return -1; 2008 } 2009 2010 if (part->base->ch_create_cb) { 2011 return part->base->ch_create_cb(io_device, ctx_buf); 2012 } else { 2013 return 0; 2014 } 2015 } 2016 2017 static void 2018 spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 2019 { 2020 struct spdk_bdev_part *part = SPDK_CONTAINEROF(io_device, struct spdk_bdev_part, base); 2021 struct spdk_bdev_part_channel *ch = ctx_buf; 2022 2023 if (part->base->ch_destroy_cb) { 2024 part->base->ch_destroy_cb(io_device, ctx_buf); 2025 } 2026 spdk_put_io_channel(ch->base_ch); 2027 } 2028 2029 int 2030 spdk_bdev_part_base_construct(struct spdk_bdev_part_base *base, struct spdk_bdev *bdev, 2031 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module_if *module, 2032 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 2033 spdk_bdev_part_base_free_fn free_fn, 2034 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 2035 spdk_io_channel_destroy_cb ch_destroy_cb) 2036 { 2037 int rc; 2038 2039 fn_table->get_io_channel = spdk_bdev_part_get_io_channel; 2040 fn_table->io_type_supported = spdk_bdev_part_io_type_supported; 2041 2042 base->bdev = bdev; 2043 base->desc = NULL; 2044 base->ref = 0; 2045 base->module = module; 2046 base->fn_table = fn_table; 2047 base->tailq = tailq; 2048 base->claimed = false; 2049 base->channel_size = channel_size; 2050 base->ch_create_cb = ch_create_cb; 2051 base->ch_destroy_cb = ch_destroy_cb; 2052 base->base_free_fn = free_fn; 2053 2054 rc = spdk_bdev_open(bdev, false, remove_cb, bdev, &base->desc); 2055 if (rc) { 2056 spdk_bdev_part_base_free(base); 2057 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev)); 2058 return -1; 2059 } 2060 2061 return 0; 2062 } 2063 2064 int 2065 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 2066 char *name, uint64_t offset_blocks, uint64_t num_blocks, 2067 char *product_name) 2068 { 2069 part->bdev.name = name; 2070 part->bdev.blocklen = base->bdev->blocklen; 2071 part->bdev.blockcnt = num_blocks; 2072 part->offset_blocks = offset_blocks; 2073 2074 part->bdev.write_cache = base->bdev->write_cache; 2075 part->bdev.need_aligned_buffer = base->bdev->need_aligned_buffer; 2076 part->bdev.product_name = product_name; 2077 part->bdev.ctxt = part; 2078 part->bdev.module = base->module; 2079 part->bdev.fn_table = base->fn_table; 2080 2081 __sync_fetch_and_add(&base->ref, 1); 2082 part->base = base; 2083 2084 if (!base->claimed) { 2085 int rc; 2086 2087 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 2088 if (rc) { 2089 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 2090 free(part->bdev.name); 2091 return -1; 2092 } 2093 base->claimed = true; 2094 } 2095 2096 spdk_io_device_register(&part->base, spdk_bdev_part_channel_create_cb, 2097 spdk_bdev_part_channel_destroy_cb, 2098 base->channel_size); 2099 spdk_vbdev_register(&part->bdev, &base->bdev, 1); 2100 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 2101 2102 return 0; 2103 } 2104 2105 SPDK_LOG_REGISTER_TRACE_FLAG("bdev", SPDK_TRACE_BDEV) 2106