1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include "spdk/env.h" 40 #include "spdk/io_channel.h" 41 #include "spdk/likely.h" 42 #include "spdk/queue.h" 43 #include "spdk/nvme_spec.h" 44 #include "spdk/scsi_spec.h" 45 #include "spdk/util.h" 46 47 #include "spdk_internal/bdev.h" 48 #include "spdk_internal/log.h" 49 #include "spdk/string.h" 50 51 #ifdef SPDK_CONFIG_VTUNE 52 #include "ittnotify.h" 53 #include "ittnotify_types.h" 54 int __itt_init_ittlib(const char *, __itt_group_id); 55 #endif 56 57 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 58 #define BUF_SMALL_POOL_SIZE 8192 59 #define BUF_LARGE_POOL_SIZE 1024 60 61 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 62 63 struct spdk_bdev_mgr { 64 struct spdk_mempool *bdev_io_pool; 65 66 struct spdk_mempool *buf_small_pool; 67 struct spdk_mempool *buf_large_pool; 68 69 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 70 71 TAILQ_HEAD(, spdk_bdev) bdevs; 72 73 spdk_bdev_poller_start_cb start_poller_fn; 74 spdk_bdev_poller_stop_cb stop_poller_fn; 75 76 bool init_complete; 77 bool module_init_complete; 78 79 #ifdef SPDK_CONFIG_VTUNE 80 __itt_domain *domain; 81 #endif 82 }; 83 84 static struct spdk_bdev_mgr g_bdev_mgr = { 85 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 86 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 87 .start_poller_fn = NULL, 88 .stop_poller_fn = NULL, 89 .init_complete = false, 90 .module_init_complete = false, 91 }; 92 93 static spdk_bdev_init_cb g_cb_fn = NULL; 94 static void *g_cb_arg = NULL; 95 96 97 struct spdk_bdev_mgmt_channel { 98 need_buf_tailq_t need_buf_small; 99 need_buf_tailq_t need_buf_large; 100 }; 101 102 struct spdk_bdev_desc { 103 struct spdk_bdev *bdev; 104 spdk_bdev_remove_cb_t remove_cb; 105 void *remove_ctx; 106 bool write; 107 TAILQ_ENTRY(spdk_bdev_desc) link; 108 }; 109 110 struct spdk_bdev_channel { 111 struct spdk_bdev *bdev; 112 113 /* The channel for the underlying device */ 114 struct spdk_io_channel *channel; 115 116 /* Channel for the bdev manager */ 117 struct spdk_io_channel *mgmt_channel; 118 119 struct spdk_bdev_io_stat stat; 120 121 /* 122 * Count of I/O submitted to bdev module and waiting for completion. 123 * Incremented before submit_request() is called on an spdk_bdev_io. 124 */ 125 uint64_t io_outstanding; 126 127 TAILQ_HEAD(, spdk_bdev_io) queued_resets; 128 129 #ifdef SPDK_CONFIG_VTUNE 130 uint64_t start_tsc; 131 uint64_t interval_tsc; 132 __itt_string_handle *handle; 133 #endif 134 135 }; 136 137 struct spdk_bdev * 138 spdk_bdev_first(void) 139 { 140 struct spdk_bdev *bdev; 141 142 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 143 if (bdev) { 144 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Starting bdev iteration at %s\n", bdev->name); 145 } 146 147 return bdev; 148 } 149 150 struct spdk_bdev * 151 spdk_bdev_next(struct spdk_bdev *prev) 152 { 153 struct spdk_bdev *bdev; 154 155 bdev = TAILQ_NEXT(prev, link); 156 if (bdev) { 157 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 158 } 159 160 return bdev; 161 } 162 163 static struct spdk_bdev * 164 _bdev_next_leaf(struct spdk_bdev *bdev) 165 { 166 while (bdev != NULL) { 167 if (TAILQ_EMPTY(&bdev->vbdevs)) { 168 return bdev; 169 } else { 170 bdev = TAILQ_NEXT(bdev, link); 171 } 172 } 173 174 return bdev; 175 } 176 177 struct spdk_bdev * 178 spdk_bdev_first_leaf(void) 179 { 180 struct spdk_bdev *bdev; 181 182 bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs)); 183 184 if (bdev) { 185 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Starting bdev iteration at %s\n", bdev->name); 186 } 187 188 return bdev; 189 } 190 191 struct spdk_bdev * 192 spdk_bdev_next_leaf(struct spdk_bdev *prev) 193 { 194 struct spdk_bdev *bdev; 195 196 bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link)); 197 198 if (bdev) { 199 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Continuing bdev iteration at %s\n", bdev->name); 200 } 201 202 return bdev; 203 } 204 205 struct spdk_bdev * 206 spdk_bdev_get_by_name(const char *bdev_name) 207 { 208 struct spdk_bdev *bdev = spdk_bdev_first(); 209 210 while (bdev != NULL) { 211 if (strcmp(bdev_name, bdev->name) == 0) { 212 return bdev; 213 } 214 bdev = spdk_bdev_next(bdev); 215 } 216 217 return NULL; 218 } 219 220 static void 221 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 222 { 223 assert(bdev_io->get_buf_cb != NULL); 224 assert(buf != NULL); 225 assert(bdev_io->u.read.iovs != NULL); 226 227 bdev_io->buf = buf; 228 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 229 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.num_blocks * bdev_io->bdev->blocklen; 230 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 231 } 232 233 static void 234 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 235 { 236 struct spdk_mempool *pool; 237 struct spdk_bdev_io *tmp; 238 void *buf; 239 need_buf_tailq_t *tailq; 240 uint64_t length; 241 struct spdk_bdev_mgmt_channel *ch; 242 243 assert(bdev_io->u.read.iovcnt == 1); 244 245 length = bdev_io->u.read.num_blocks * bdev_io->bdev->blocklen; 246 buf = bdev_io->buf; 247 248 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 249 250 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 251 pool = g_bdev_mgr.buf_small_pool; 252 tailq = &ch->need_buf_small; 253 } else { 254 pool = g_bdev_mgr.buf_large_pool; 255 tailq = &ch->need_buf_large; 256 } 257 258 if (TAILQ_EMPTY(tailq)) { 259 spdk_mempool_put(pool, buf); 260 } else { 261 tmp = TAILQ_FIRST(tailq); 262 TAILQ_REMOVE(tailq, tmp, buf_link); 263 spdk_bdev_io_set_buf(tmp, buf); 264 } 265 } 266 267 void 268 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 269 { 270 uint64_t len = bdev_io->u.read.num_blocks * bdev_io->bdev->blocklen; 271 struct spdk_mempool *pool; 272 need_buf_tailq_t *tailq; 273 void *buf = NULL; 274 struct spdk_bdev_mgmt_channel *ch; 275 276 assert(cb != NULL); 277 assert(bdev_io->u.read.iovs != NULL); 278 279 if (spdk_unlikely(bdev_io->u.read.iovs[0].iov_base != NULL)) { 280 /* Buffer already present */ 281 cb(bdev_io->ch->channel, bdev_io); 282 return; 283 } 284 285 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 286 287 bdev_io->get_buf_cb = cb; 288 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 289 pool = g_bdev_mgr.buf_small_pool; 290 tailq = &ch->need_buf_small; 291 } else { 292 pool = g_bdev_mgr.buf_large_pool; 293 tailq = &ch->need_buf_large; 294 } 295 296 buf = spdk_mempool_get(pool); 297 298 if (!buf) { 299 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 300 } else { 301 spdk_bdev_io_set_buf(bdev_io, buf); 302 } 303 } 304 305 static int 306 spdk_bdev_module_get_max_ctx_size(void) 307 { 308 struct spdk_bdev_module_if *bdev_module; 309 int max_bdev_module_size = 0; 310 311 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 312 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 313 max_bdev_module_size = bdev_module->get_ctx_size(); 314 } 315 } 316 317 return max_bdev_module_size; 318 } 319 320 void 321 spdk_bdev_config_text(FILE *fp) 322 { 323 struct spdk_bdev_module_if *bdev_module; 324 325 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 326 if (bdev_module->config_text) { 327 bdev_module->config_text(fp); 328 } 329 } 330 } 331 332 static int 333 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf) 334 { 335 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 336 337 TAILQ_INIT(&ch->need_buf_small); 338 TAILQ_INIT(&ch->need_buf_large); 339 340 return 0; 341 } 342 343 static void 344 spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf) 345 { 346 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 347 348 if (!TAILQ_EMPTY(&ch->need_buf_small) || !TAILQ_EMPTY(&ch->need_buf_large)) { 349 SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n"); 350 } 351 } 352 353 static void 354 spdk_bdev_init_complete(int rc) 355 { 356 spdk_bdev_init_cb cb_fn = g_cb_fn; 357 void *cb_arg = g_cb_arg; 358 359 g_bdev_mgr.init_complete = true; 360 g_cb_fn = NULL; 361 g_cb_arg = NULL; 362 363 cb_fn(cb_arg, rc); 364 } 365 366 static void 367 spdk_bdev_module_action_complete(void) 368 { 369 struct spdk_bdev_module_if *m; 370 371 /* 372 * Don't finish bdev subsystem initialization if 373 * module pre-initialization is still in progress, or 374 * the subsystem been already initialized. 375 */ 376 if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) { 377 return; 378 } 379 380 /* 381 * Check all bdev modules for inits/examinations in progress. If any 382 * exist, return immediately since we cannot finish bdev subsystem 383 * initialization until all are completed. 384 */ 385 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 386 if (m->action_in_progress > 0) { 387 return; 388 } 389 } 390 391 /* 392 * Modules already finished initialization - now that all 393 * the bdev modules have finished their asynchronous I/O 394 * processing, the entire bdev layer can be marked as complete. 395 */ 396 spdk_bdev_init_complete(0); 397 } 398 399 static void 400 spdk_bdev_module_action_done(struct spdk_bdev_module_if *module) 401 { 402 assert(module->action_in_progress > 0); 403 module->action_in_progress--; 404 spdk_bdev_module_action_complete(); 405 } 406 407 void 408 spdk_bdev_module_init_done(struct spdk_bdev_module_if *module) 409 { 410 spdk_bdev_module_action_done(module); 411 } 412 413 void 414 spdk_bdev_module_examine_done(struct spdk_bdev_module_if *module) 415 { 416 spdk_bdev_module_action_done(module); 417 } 418 419 static int 420 spdk_bdev_modules_init(void) 421 { 422 struct spdk_bdev_module_if *module; 423 int rc = 0; 424 425 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 426 rc = module->module_init(); 427 if (rc != 0) { 428 break; 429 } 430 } 431 432 g_bdev_mgr.module_init_complete = true; 433 return rc; 434 } 435 436 void 437 spdk_bdev_poller_start(struct spdk_bdev_poller **ppoller, 438 spdk_bdev_poller_fn fn, 439 void *arg, 440 uint32_t lcore, 441 uint64_t period_microseconds) 442 { 443 g_bdev_mgr.start_poller_fn(ppoller, fn, arg, lcore, period_microseconds); 444 } 445 446 void 447 spdk_bdev_poller_stop(struct spdk_bdev_poller **ppoller) 448 { 449 g_bdev_mgr.stop_poller_fn(ppoller); 450 } 451 452 void 453 spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg, 454 spdk_bdev_poller_start_cb start_poller_fn, 455 spdk_bdev_poller_stop_cb stop_poller_fn) 456 { 457 int cache_size; 458 int rc = 0; 459 char mempool_name[32]; 460 461 assert(cb_fn != NULL); 462 463 g_cb_fn = cb_fn; 464 g_cb_arg = cb_arg; 465 466 g_bdev_mgr.start_poller_fn = start_poller_fn; 467 g_bdev_mgr.stop_poller_fn = stop_poller_fn; 468 469 snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid()); 470 471 g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name, 472 SPDK_BDEV_IO_POOL_SIZE, 473 sizeof(struct spdk_bdev_io) + 474 spdk_bdev_module_get_max_ctx_size(), 475 64, 476 SPDK_ENV_SOCKET_ID_ANY); 477 478 if (g_bdev_mgr.bdev_io_pool == NULL) { 479 SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n"); 480 spdk_bdev_init_complete(-1); 481 return; 482 } 483 484 /** 485 * Ensure no more than half of the total buffers end up local caches, by 486 * using spdk_env_get_core_count() to determine how many local caches we need 487 * to account for. 488 */ 489 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 490 snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid()); 491 492 g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name, 493 BUF_SMALL_POOL_SIZE, 494 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 495 cache_size, 496 SPDK_ENV_SOCKET_ID_ANY); 497 if (!g_bdev_mgr.buf_small_pool) { 498 SPDK_ERRLOG("create rbuf small pool failed\n"); 499 spdk_bdev_init_complete(-1); 500 return; 501 } 502 503 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 504 snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid()); 505 506 g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name, 507 BUF_LARGE_POOL_SIZE, 508 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 509 cache_size, 510 SPDK_ENV_SOCKET_ID_ANY); 511 if (!g_bdev_mgr.buf_large_pool) { 512 SPDK_ERRLOG("create rbuf large pool failed\n"); 513 spdk_bdev_init_complete(-1); 514 return; 515 } 516 517 #ifdef SPDK_CONFIG_VTUNE 518 g_bdev_mgr.domain = __itt_domain_create("spdk_bdev"); 519 #endif 520 521 spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create, 522 spdk_bdev_mgmt_channel_destroy, 523 sizeof(struct spdk_bdev_mgmt_channel)); 524 525 rc = spdk_bdev_modules_init(); 526 if (rc != 0) { 527 SPDK_ERRLOG("bdev modules init failed\n"); 528 spdk_bdev_init_complete(-1); 529 return; 530 } 531 532 spdk_bdev_module_action_complete(); 533 } 534 535 int 536 spdk_bdev_finish(void) 537 { 538 struct spdk_bdev_module_if *bdev_module; 539 540 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 541 if (bdev_module->module_fini) { 542 bdev_module->module_fini(); 543 } 544 } 545 546 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 547 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 548 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 549 SPDK_BDEV_IO_POOL_SIZE); 550 } 551 552 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 553 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 554 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 555 BUF_SMALL_POOL_SIZE); 556 assert(false); 557 } 558 559 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 560 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 561 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 562 BUF_LARGE_POOL_SIZE); 563 assert(false); 564 } 565 566 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 567 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 568 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 569 570 spdk_io_device_unregister(&g_bdev_mgr, NULL); 571 572 return 0; 573 } 574 575 struct spdk_bdev_io * 576 spdk_bdev_get_io(void) 577 { 578 struct spdk_bdev_io *bdev_io; 579 580 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 581 if (!bdev_io) { 582 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 583 abort(); 584 } 585 586 memset(bdev_io, 0, offsetof(struct spdk_bdev_io, u)); 587 588 return bdev_io; 589 } 590 591 static void 592 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 593 { 594 if (!bdev_io) { 595 return; 596 } 597 598 if (bdev_io->buf != NULL) { 599 spdk_bdev_io_put_buf(bdev_io); 600 } 601 602 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 603 } 604 605 static void 606 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 607 { 608 struct spdk_bdev *bdev = bdev_io->bdev; 609 struct spdk_io_channel *ch = bdev_io->ch->channel; 610 611 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 612 613 bdev_io->ch->io_outstanding++; 614 bdev_io->in_submit_request = true; 615 bdev->fn_table->submit_request(ch, bdev_io); 616 bdev_io->in_submit_request = false; 617 } 618 619 static void 620 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 621 struct spdk_bdev *bdev, void *cb_arg, 622 spdk_bdev_io_completion_cb cb) 623 { 624 bdev_io->bdev = bdev; 625 bdev_io->caller_ctx = cb_arg; 626 bdev_io->cb = cb; 627 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 628 bdev_io->in_submit_request = false; 629 } 630 631 bool 632 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 633 { 634 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 635 } 636 637 int 638 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 639 { 640 if (bdev->fn_table->dump_config_json) { 641 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 642 } 643 644 return 0; 645 } 646 647 static int 648 spdk_bdev_channel_create(void *io_device, void *ctx_buf) 649 { 650 struct spdk_bdev *bdev = io_device; 651 struct spdk_bdev_channel *ch = ctx_buf; 652 653 ch->bdev = io_device; 654 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt); 655 ch->mgmt_channel = spdk_get_io_channel(&g_bdev_mgr); 656 memset(&ch->stat, 0, sizeof(ch->stat)); 657 ch->io_outstanding = 0; 658 TAILQ_INIT(&ch->queued_resets); 659 660 #ifdef SPDK_CONFIG_VTUNE 661 { 662 char *name; 663 __itt_init_ittlib(NULL, 0); 664 name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch); 665 if (!name) { 666 return -1; 667 } 668 ch->handle = __itt_string_handle_create(name); 669 free(name); 670 ch->start_tsc = spdk_get_ticks(); 671 ch->interval_tsc = spdk_get_ticks_hz() / 100; 672 } 673 #endif 674 675 return 0; 676 } 677 678 static void 679 _spdk_bdev_abort_io(need_buf_tailq_t *queue, struct spdk_bdev_channel *ch) 680 { 681 struct spdk_bdev_io *bdev_io, *tmp; 682 683 TAILQ_FOREACH_SAFE(bdev_io, queue, buf_link, tmp) { 684 if (bdev_io->ch == ch) { 685 TAILQ_REMOVE(queue, bdev_io, buf_link); 686 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 687 } 688 } 689 } 690 691 static void 692 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 693 { 694 struct spdk_bdev_channel *ch = ctx_buf; 695 struct spdk_bdev_mgmt_channel *mgmt_channel; 696 697 mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel); 698 699 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, ch); 700 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, ch); 701 702 spdk_put_io_channel(ch->channel); 703 spdk_put_io_channel(ch->mgmt_channel); 704 assert(ch->io_outstanding == 0); 705 } 706 707 struct spdk_io_channel * 708 spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) 709 { 710 return spdk_get_io_channel(desc->bdev); 711 } 712 713 const char * 714 spdk_bdev_get_name(const struct spdk_bdev *bdev) 715 { 716 return bdev->name; 717 } 718 719 const char * 720 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 721 { 722 return bdev->product_name; 723 } 724 725 uint32_t 726 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 727 { 728 return bdev->blocklen; 729 } 730 731 uint64_t 732 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 733 { 734 return bdev->blockcnt; 735 } 736 737 size_t 738 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 739 { 740 /* TODO: push this logic down to the bdev modules */ 741 if (bdev->need_aligned_buffer) { 742 return bdev->blocklen; 743 } 744 745 return 1; 746 } 747 748 uint32_t 749 spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev) 750 { 751 return bdev->optimal_io_boundary; 752 } 753 754 bool 755 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 756 { 757 return bdev->write_cache; 758 } 759 760 /* 761 * Convert I/O offset and length from bytes to blocks. 762 * 763 * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size. 764 */ 765 static uint64_t 766 spdk_bdev_bytes_to_blocks(struct spdk_bdev *bdev, uint64_t offset_bytes, uint64_t *offset_blocks, 767 uint64_t num_bytes, uint64_t *num_blocks) 768 { 769 uint32_t block_size = bdev->blocklen; 770 771 *offset_blocks = offset_bytes / block_size; 772 *num_blocks = num_bytes / block_size; 773 774 return (offset_bytes % block_size) | (num_bytes % block_size); 775 } 776 777 static bool 778 spdk_bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks) 779 { 780 /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there 781 * has been an overflow and hence the offset has been wrapped around */ 782 if (offset_blocks + num_blocks < offset_blocks) { 783 return false; 784 } 785 786 /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */ 787 if (offset_blocks + num_blocks > bdev->blockcnt) { 788 return false; 789 } 790 791 return true; 792 } 793 794 int 795 spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 796 void *buf, uint64_t offset, uint64_t nbytes, 797 spdk_bdev_io_completion_cb cb, void *cb_arg) 798 { 799 uint64_t offset_blocks, num_blocks; 800 801 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 802 return -EINVAL; 803 } 804 805 return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 806 } 807 808 int 809 spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 810 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 811 spdk_bdev_io_completion_cb cb, void *cb_arg) 812 { 813 struct spdk_bdev *bdev = desc->bdev; 814 struct spdk_bdev_io *bdev_io; 815 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 816 817 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 818 return -EINVAL; 819 } 820 821 bdev_io = spdk_bdev_get_io(); 822 if (!bdev_io) { 823 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 824 return -ENOMEM; 825 } 826 827 bdev_io->ch = channel; 828 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 829 bdev_io->u.read.iov.iov_base = buf; 830 bdev_io->u.read.iov.iov_len = num_blocks * bdev->blocklen; 831 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 832 bdev_io->u.read.iovcnt = 1; 833 bdev_io->u.read.num_blocks = num_blocks; 834 bdev_io->u.read.offset_blocks = offset_blocks; 835 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 836 837 spdk_bdev_io_submit(bdev_io); 838 return 0; 839 } 840 841 int 842 spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 843 struct iovec *iov, int iovcnt, 844 uint64_t offset, uint64_t nbytes, 845 spdk_bdev_io_completion_cb cb, void *cb_arg) 846 { 847 uint64_t offset_blocks, num_blocks; 848 849 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 850 return -EINVAL; 851 } 852 853 return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 854 } 855 856 int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 857 struct iovec *iov, int iovcnt, 858 uint64_t offset_blocks, uint64_t num_blocks, 859 spdk_bdev_io_completion_cb cb, void *cb_arg) 860 { 861 struct spdk_bdev *bdev = desc->bdev; 862 struct spdk_bdev_io *bdev_io; 863 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 864 865 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 866 return -EINVAL; 867 } 868 869 bdev_io = spdk_bdev_get_io(); 870 if (!bdev_io) { 871 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 872 return -ENOMEM; 873 } 874 875 bdev_io->ch = channel; 876 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 877 bdev_io->u.read.iovs = iov; 878 bdev_io->u.read.iovcnt = iovcnt; 879 bdev_io->u.read.num_blocks = num_blocks; 880 bdev_io->u.read.offset_blocks = offset_blocks; 881 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 882 883 spdk_bdev_io_submit(bdev_io); 884 return 0; 885 } 886 887 int 888 spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 889 void *buf, uint64_t offset, uint64_t nbytes, 890 spdk_bdev_io_completion_cb cb, void *cb_arg) 891 { 892 uint64_t offset_blocks, num_blocks; 893 894 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 895 return -EINVAL; 896 } 897 898 return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg); 899 } 900 901 int 902 spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 903 void *buf, uint64_t offset_blocks, uint64_t num_blocks, 904 spdk_bdev_io_completion_cb cb, void *cb_arg) 905 { 906 struct spdk_bdev *bdev = desc->bdev; 907 struct spdk_bdev_io *bdev_io; 908 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 909 910 if (!desc->write) { 911 return -EBADF; 912 } 913 914 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 915 return -EINVAL; 916 } 917 918 bdev_io = spdk_bdev_get_io(); 919 if (!bdev_io) { 920 SPDK_ERRLOG("bdev_io memory allocation failed duing write\n"); 921 return -ENOMEM; 922 } 923 924 bdev_io->ch = channel; 925 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 926 bdev_io->u.write.iov.iov_base = buf; 927 bdev_io->u.write.iov.iov_len = num_blocks * bdev->blocklen; 928 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 929 bdev_io->u.write.iovcnt = 1; 930 bdev_io->u.write.num_blocks = num_blocks; 931 bdev_io->u.write.offset_blocks = offset_blocks; 932 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 933 934 spdk_bdev_io_submit(bdev_io); 935 return 0; 936 } 937 938 int 939 spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 940 struct iovec *iov, int iovcnt, 941 uint64_t offset, uint64_t len, 942 spdk_bdev_io_completion_cb cb, void *cb_arg) 943 { 944 uint64_t offset_blocks, num_blocks; 945 946 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 947 return -EINVAL; 948 } 949 950 return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); 951 } 952 953 int 954 spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 955 struct iovec *iov, int iovcnt, 956 uint64_t offset_blocks, uint64_t num_blocks, 957 spdk_bdev_io_completion_cb cb, void *cb_arg) 958 { 959 struct spdk_bdev *bdev = desc->bdev; 960 struct spdk_bdev_io *bdev_io; 961 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 962 963 if (!desc->write) { 964 return -EBADF; 965 } 966 967 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 968 return -EINVAL; 969 } 970 971 bdev_io = spdk_bdev_get_io(); 972 if (!bdev_io) { 973 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 974 return -ENOMEM; 975 } 976 977 bdev_io->ch = channel; 978 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 979 bdev_io->u.write.iovs = iov; 980 bdev_io->u.write.iovcnt = iovcnt; 981 bdev_io->u.write.num_blocks = num_blocks; 982 bdev_io->u.write.offset_blocks = offset_blocks; 983 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 984 985 spdk_bdev_io_submit(bdev_io); 986 return 0; 987 } 988 989 int 990 spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 991 uint64_t offset, uint64_t len, 992 spdk_bdev_io_completion_cb cb, void *cb_arg) 993 { 994 uint64_t offset_blocks, num_blocks; 995 996 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, len, &num_blocks) != 0) { 997 return -EINVAL; 998 } 999 1000 return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1001 } 1002 1003 int 1004 spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1005 uint64_t offset_blocks, uint64_t num_blocks, 1006 spdk_bdev_io_completion_cb cb, void *cb_arg) 1007 { 1008 struct spdk_bdev *bdev = desc->bdev; 1009 struct spdk_bdev_io *bdev_io; 1010 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1011 1012 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1013 return -EINVAL; 1014 } 1015 1016 bdev_io = spdk_bdev_get_io(); 1017 if (!bdev_io) { 1018 SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n"); 1019 return -ENOMEM; 1020 } 1021 1022 bdev_io->ch = channel; 1023 bdev_io->u.write.num_blocks = num_blocks; 1024 bdev_io->u.write.offset_blocks = offset_blocks; 1025 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 1026 1027 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1028 1029 spdk_bdev_io_submit(bdev_io); 1030 return 0; 1031 } 1032 1033 int 1034 spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1035 uint64_t offset, uint64_t nbytes, 1036 spdk_bdev_io_completion_cb cb, void *cb_arg) 1037 { 1038 uint64_t offset_blocks, num_blocks; 1039 1040 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, nbytes, &num_blocks) != 0) { 1041 return -EINVAL; 1042 } 1043 1044 return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1045 } 1046 1047 int 1048 spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1049 uint64_t offset_blocks, uint64_t num_blocks, 1050 spdk_bdev_io_completion_cb cb, void *cb_arg) 1051 { 1052 struct spdk_bdev *bdev = desc->bdev; 1053 struct spdk_bdev_io *bdev_io; 1054 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1055 1056 if (!desc->write) { 1057 return -EBADF; 1058 } 1059 1060 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1061 return -EINVAL; 1062 } 1063 1064 if (num_blocks == 0) { 1065 SPDK_ERRLOG("Can't unmap 0 bytes\n"); 1066 return -EINVAL; 1067 } 1068 1069 bdev_io = spdk_bdev_get_io(); 1070 if (!bdev_io) { 1071 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 1072 return -ENOMEM; 1073 } 1074 1075 bdev_io->ch = channel; 1076 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 1077 bdev_io->u.unmap.offset_blocks = offset_blocks; 1078 bdev_io->u.unmap.num_blocks = num_blocks; 1079 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1080 1081 spdk_bdev_io_submit(bdev_io); 1082 return 0; 1083 } 1084 1085 int 1086 spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1087 uint64_t offset, uint64_t length, 1088 spdk_bdev_io_completion_cb cb, void *cb_arg) 1089 { 1090 uint64_t offset_blocks, num_blocks; 1091 1092 if (spdk_bdev_bytes_to_blocks(desc->bdev, offset, &offset_blocks, length, &num_blocks) != 0) { 1093 return -EINVAL; 1094 } 1095 1096 return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg); 1097 } 1098 1099 int 1100 spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1101 uint64_t offset_blocks, uint64_t num_blocks, 1102 spdk_bdev_io_completion_cb cb, void *cb_arg) 1103 { 1104 struct spdk_bdev *bdev = desc->bdev; 1105 struct spdk_bdev_io *bdev_io; 1106 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1107 1108 if (!desc->write) { 1109 return -EBADF; 1110 } 1111 1112 if (!spdk_bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { 1113 return -EINVAL; 1114 } 1115 1116 bdev_io = spdk_bdev_get_io(); 1117 if (!bdev_io) { 1118 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 1119 return -ENOMEM; 1120 } 1121 1122 bdev_io->ch = channel; 1123 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 1124 bdev_io->u.flush.offset_blocks = offset_blocks; 1125 bdev_io->u.flush.num_blocks = num_blocks; 1126 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1127 1128 spdk_bdev_io_submit(bdev_io); 1129 return 0; 1130 } 1131 1132 static void 1133 _spdk_bdev_reset_dev(void *io_device, void *ctx) 1134 { 1135 struct spdk_bdev_io *bdev_io = ctx; 1136 1137 spdk_bdev_io_submit(bdev_io); 1138 } 1139 1140 static void 1141 _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch, 1142 void *ctx) 1143 { 1144 struct spdk_bdev_channel *channel; 1145 struct spdk_bdev_mgmt_channel *mgmt_channel; 1146 1147 channel = spdk_io_channel_get_ctx(ch); 1148 mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel); 1149 1150 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, channel); 1151 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, channel); 1152 } 1153 1154 static void 1155 _spdk_bdev_start_reset(void *ctx) 1156 { 1157 struct spdk_bdev_io *bdev_io = ctx; 1158 1159 spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_reset_abort_channel, 1160 bdev_io, _spdk_bdev_reset_dev); 1161 } 1162 1163 static void 1164 _spdk_bdev_channel_start_reset(struct spdk_bdev_channel *ch) 1165 { 1166 struct spdk_bdev *bdev = ch->bdev; 1167 struct spdk_bdev_io *reset_io; 1168 1169 assert(!TAILQ_EMPTY(&ch->queued_resets)); 1170 1171 pthread_mutex_lock(&bdev->mutex); 1172 if (!bdev->reset_in_progress) { 1173 bdev->reset_in_progress = true; 1174 reset_io = TAILQ_FIRST(&ch->queued_resets); 1175 TAILQ_REMOVE(&ch->queued_resets, reset_io, link); 1176 _spdk_bdev_start_reset(reset_io); 1177 } 1178 pthread_mutex_unlock(&bdev->mutex); 1179 } 1180 1181 static void 1182 _spdk_bdev_complete_reset_channel(void *io_device, struct spdk_io_channel *_ch, void *ctx) 1183 { 1184 struct spdk_bdev_channel *ch = spdk_io_channel_get_ctx(_ch); 1185 1186 if (!TAILQ_EMPTY(&ch->queued_resets)) { 1187 _spdk_bdev_channel_start_reset(ch); 1188 } 1189 } 1190 1191 int 1192 spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1193 spdk_bdev_io_completion_cb cb, void *cb_arg) 1194 { 1195 struct spdk_bdev *bdev = desc->bdev; 1196 struct spdk_bdev_io *bdev_io; 1197 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1198 1199 bdev_io = spdk_bdev_get_io(); 1200 if (!bdev_io) { 1201 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 1202 return -ENOMEM;; 1203 } 1204 1205 bdev_io->ch = channel; 1206 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 1207 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1208 1209 pthread_mutex_lock(&bdev->mutex); 1210 TAILQ_INSERT_TAIL(&channel->queued_resets, bdev_io, link); 1211 pthread_mutex_unlock(&bdev->mutex); 1212 1213 _spdk_bdev_channel_start_reset(channel); 1214 1215 return 0; 1216 } 1217 1218 void 1219 spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 1220 struct spdk_bdev_io_stat *stat) 1221 { 1222 #ifdef SPDK_CONFIG_VTUNE 1223 SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n"); 1224 memset(stat, 0, sizeof(*stat)); 1225 return; 1226 #endif 1227 1228 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1229 1230 *stat = channel->stat; 1231 memset(&channel->stat, 0, sizeof(channel->stat)); 1232 } 1233 1234 int 1235 spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1236 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1237 spdk_bdev_io_completion_cb cb, void *cb_arg) 1238 { 1239 struct spdk_bdev *bdev = desc->bdev; 1240 struct spdk_bdev_io *bdev_io; 1241 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1242 1243 if (!desc->write) { 1244 return -EBADF; 1245 } 1246 1247 bdev_io = spdk_bdev_get_io(); 1248 if (!bdev_io) { 1249 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1250 return -ENOMEM; 1251 } 1252 1253 bdev_io->ch = channel; 1254 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN; 1255 bdev_io->u.nvme_passthru.cmd = *cmd; 1256 bdev_io->u.nvme_passthru.buf = buf; 1257 bdev_io->u.nvme_passthru.nbytes = nbytes; 1258 1259 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1260 1261 spdk_bdev_io_submit(bdev_io); 1262 return 0; 1263 } 1264 1265 int 1266 spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1267 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1268 spdk_bdev_io_completion_cb cb, void *cb_arg) 1269 { 1270 struct spdk_bdev *bdev = desc->bdev; 1271 struct spdk_bdev_io *bdev_io; 1272 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1273 1274 if (!desc->write) { 1275 /* 1276 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 1277 * to easily determine if the command is a read or write, but for now just 1278 * do not allow io_passthru with a read-only descriptor. 1279 */ 1280 return -EBADF; 1281 } 1282 1283 bdev_io = spdk_bdev_get_io(); 1284 if (!bdev_io) { 1285 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1286 return -ENOMEM; 1287 } 1288 1289 bdev_io->ch = channel; 1290 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO; 1291 bdev_io->u.nvme_passthru.cmd = *cmd; 1292 bdev_io->u.nvme_passthru.buf = buf; 1293 bdev_io->u.nvme_passthru.nbytes = nbytes; 1294 1295 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1296 1297 spdk_bdev_io_submit(bdev_io); 1298 return 0; 1299 } 1300 1301 int 1302 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 1303 { 1304 if (!bdev_io) { 1305 SPDK_ERRLOG("bdev_io is NULL\n"); 1306 return -1; 1307 } 1308 1309 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 1310 SPDK_ERRLOG("bdev_io is in pending state\n"); 1311 assert(false); 1312 return -1; 1313 } 1314 1315 spdk_bdev_put_io(bdev_io); 1316 1317 return 0; 1318 } 1319 1320 static void 1321 _spdk_bdev_io_complete(void *ctx) 1322 { 1323 struct spdk_bdev_io *bdev_io = ctx; 1324 1325 assert(bdev_io->cb != NULL); 1326 bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS, bdev_io->caller_ctx); 1327 } 1328 1329 void 1330 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 1331 { 1332 bdev_io->status = status; 1333 1334 assert(bdev_io->ch->io_outstanding > 0); 1335 bdev_io->ch->io_outstanding--; 1336 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1337 pthread_mutex_lock(&bdev_io->bdev->mutex); 1338 bdev_io->bdev->reset_in_progress = false; 1339 pthread_mutex_unlock(&bdev_io->bdev->mutex); 1340 spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_complete_reset_channel, NULL, NULL); 1341 } 1342 1343 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1344 switch (bdev_io->type) { 1345 case SPDK_BDEV_IO_TYPE_READ: 1346 bdev_io->ch->stat.bytes_read += bdev_io->u.read.num_blocks * bdev_io->bdev->blocklen; 1347 bdev_io->ch->stat.num_read_ops++; 1348 break; 1349 case SPDK_BDEV_IO_TYPE_WRITE: 1350 bdev_io->ch->stat.bytes_written += bdev_io->u.write.num_blocks * bdev_io->bdev->blocklen; 1351 bdev_io->ch->stat.num_write_ops++; 1352 break; 1353 default: 1354 break; 1355 } 1356 } 1357 1358 #ifdef SPDK_CONFIG_VTUNE 1359 uint64_t now_tsc = spdk_get_ticks(); 1360 if (now_tsc > (bdev_io->ch->start_tsc + bdev_io->ch->interval_tsc)) { 1361 uint64_t data[5]; 1362 1363 data[0] = bdev_io->ch->stat.num_read_ops; 1364 data[1] = bdev_io->ch->stat.bytes_read; 1365 data[2] = bdev_io->ch->stat.num_write_ops; 1366 data[3] = bdev_io->ch->stat.bytes_written; 1367 data[4] = bdev_io->bdev->fn_table->get_spin_time ? 1368 bdev_io->bdev->fn_table->get_spin_time(bdev_io->ch->channel) : 0; 1369 1370 __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->ch->handle, 1371 __itt_metadata_u64, 5, data); 1372 1373 memset(&bdev_io->ch->stat, 0, sizeof(bdev_io->ch->stat)); 1374 bdev_io->ch->start_tsc = now_tsc; 1375 } 1376 #endif 1377 1378 if (bdev_io->in_submit_request || bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1379 /* 1380 * Defer completion to avoid potential infinite recursion if the 1381 * user's completion callback issues a new I/O. 1382 */ 1383 spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->ch->channel), 1384 _spdk_bdev_io_complete, bdev_io); 1385 } else { 1386 _spdk_bdev_io_complete(bdev_io); 1387 } 1388 } 1389 1390 void 1391 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 1392 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 1393 { 1394 if (sc == SPDK_SCSI_STATUS_GOOD) { 1395 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1396 } else { 1397 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 1398 bdev_io->error.scsi.sc = sc; 1399 bdev_io->error.scsi.sk = sk; 1400 bdev_io->error.scsi.asc = asc; 1401 bdev_io->error.scsi.ascq = ascq; 1402 } 1403 1404 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1405 } 1406 1407 void 1408 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 1409 int *sc, int *sk, int *asc, int *ascq) 1410 { 1411 assert(sc != NULL); 1412 assert(sk != NULL); 1413 assert(asc != NULL); 1414 assert(ascq != NULL); 1415 1416 switch (bdev_io->status) { 1417 case SPDK_BDEV_IO_STATUS_SUCCESS: 1418 *sc = SPDK_SCSI_STATUS_GOOD; 1419 *sk = SPDK_SCSI_SENSE_NO_SENSE; 1420 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1421 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1422 break; 1423 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 1424 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 1425 break; 1426 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 1427 *sc = bdev_io->error.scsi.sc; 1428 *sk = bdev_io->error.scsi.sk; 1429 *asc = bdev_io->error.scsi.asc; 1430 *ascq = bdev_io->error.scsi.ascq; 1431 break; 1432 default: 1433 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 1434 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 1435 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1436 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1437 break; 1438 } 1439 } 1440 1441 void 1442 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 1443 { 1444 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 1445 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1446 } else { 1447 bdev_io->error.nvme.sct = sct; 1448 bdev_io->error.nvme.sc = sc; 1449 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1450 } 1451 1452 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1453 } 1454 1455 void 1456 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1457 { 1458 assert(sct != NULL); 1459 assert(sc != NULL); 1460 1461 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1462 *sct = bdev_io->error.nvme.sct; 1463 *sc = bdev_io->error.nvme.sc; 1464 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1465 *sct = SPDK_NVME_SCT_GENERIC; 1466 *sc = SPDK_NVME_SC_SUCCESS; 1467 } else { 1468 *sct = SPDK_NVME_SCT_GENERIC; 1469 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1470 } 1471 } 1472 1473 static void 1474 _spdk_bdev_register(struct spdk_bdev *bdev) 1475 { 1476 struct spdk_bdev_module_if *module; 1477 1478 assert(bdev->module != NULL); 1479 1480 bdev->status = SPDK_BDEV_STATUS_READY; 1481 1482 TAILQ_INIT(&bdev->open_descs); 1483 bdev->bdev_opened = false; 1484 bdev->bdev_opened_for_write = false; 1485 1486 TAILQ_INIT(&bdev->vbdevs); 1487 TAILQ_INIT(&bdev->base_bdevs); 1488 1489 bdev->reset_in_progress = false; 1490 1491 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1492 sizeof(struct spdk_bdev_channel)); 1493 1494 pthread_mutex_init(&bdev->mutex, NULL); 1495 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Inserting bdev %s into list\n", bdev->name); 1496 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1497 1498 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 1499 if (module->examine) { 1500 module->action_in_progress++; 1501 module->examine(bdev); 1502 } 1503 } 1504 } 1505 1506 void 1507 spdk_bdev_register(struct spdk_bdev *bdev) 1508 { 1509 _spdk_bdev_register(bdev); 1510 } 1511 1512 void 1513 spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) 1514 { 1515 int i; 1516 1517 _spdk_bdev_register(vbdev); 1518 for (i = 0; i < base_bdev_count; i++) { 1519 assert(base_bdevs[i] != NULL); 1520 TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link); 1521 TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link); 1522 } 1523 } 1524 1525 void 1526 spdk_bdev_unregister(struct spdk_bdev *bdev) 1527 { 1528 struct spdk_bdev_desc *desc, *tmp; 1529 int rc; 1530 bool do_destruct = true; 1531 1532 SPDK_DEBUGLOG(SPDK_TRACE_BDEV, "Removing bdev %s from list\n", bdev->name); 1533 1534 pthread_mutex_lock(&bdev->mutex); 1535 1536 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1537 1538 TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) { 1539 if (desc->remove_cb) { 1540 pthread_mutex_unlock(&bdev->mutex); 1541 do_destruct = false; 1542 desc->remove_cb(desc->remove_ctx); 1543 pthread_mutex_lock(&bdev->mutex); 1544 } 1545 } 1546 1547 if (!do_destruct) { 1548 pthread_mutex_unlock(&bdev->mutex); 1549 return; 1550 } 1551 1552 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1553 pthread_mutex_unlock(&bdev->mutex); 1554 1555 pthread_mutex_destroy(&bdev->mutex); 1556 1557 spdk_io_device_unregister(bdev, NULL); 1558 1559 rc = bdev->fn_table->destruct(bdev->ctxt); 1560 if (rc < 0) { 1561 SPDK_ERRLOG("destruct failed\n"); 1562 } 1563 } 1564 1565 void 1566 spdk_vbdev_unregister(struct spdk_bdev *vbdev) 1567 { 1568 struct spdk_bdev *base_bdev; 1569 1570 assert(!TAILQ_EMPTY(&vbdev->base_bdevs)); 1571 TAILQ_FOREACH(base_bdev, &vbdev->base_bdevs, base_bdev_link) { 1572 TAILQ_REMOVE(&base_bdev->vbdevs, vbdev, vbdev_link); 1573 } 1574 spdk_bdev_unregister(vbdev); 1575 } 1576 1577 bool 1578 spdk_is_bdev_opened(struct spdk_bdev *bdev) 1579 { 1580 struct spdk_bdev *base; 1581 1582 if (bdev->bdev_opened) { 1583 return true; 1584 } 1585 1586 TAILQ_FOREACH(base, &bdev->base_bdevs, base_bdev_link) { 1587 if (spdk_is_bdev_opened(base)) { 1588 return true; 1589 } 1590 } 1591 1592 return false; 1593 } 1594 1595 int 1596 spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, 1597 void *remove_ctx, struct spdk_bdev_desc **_desc) 1598 { 1599 struct spdk_bdev_desc *desc; 1600 1601 desc = calloc(1, sizeof(*desc)); 1602 if (desc == NULL) { 1603 return -ENOMEM; 1604 } 1605 1606 pthread_mutex_lock(&bdev->mutex); 1607 1608 if (write && (bdev->bdev_opened_for_write || bdev->claim_module)) { 1609 SPDK_ERRLOG("failed, %s already opened for write or claimed\n", bdev->name); 1610 free(desc); 1611 pthread_mutex_unlock(&bdev->mutex); 1612 return -EPERM; 1613 } 1614 1615 TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link); 1616 1617 if (write) { 1618 bdev->bdev_opened_for_write = true; 1619 } 1620 1621 bdev->bdev_opened = true; 1622 1623 desc->bdev = bdev; 1624 desc->remove_cb = remove_cb; 1625 desc->remove_ctx = remove_ctx; 1626 desc->write = write; 1627 *_desc = desc; 1628 1629 pthread_mutex_unlock(&bdev->mutex); 1630 1631 return 0; 1632 } 1633 1634 void 1635 spdk_bdev_close(struct spdk_bdev_desc *desc) 1636 { 1637 struct spdk_bdev *bdev = desc->bdev; 1638 bool do_unregister = false; 1639 1640 pthread_mutex_lock(&bdev->mutex); 1641 1642 if (desc->write) { 1643 assert(bdev->bdev_opened_for_write); 1644 bdev->bdev_opened_for_write = false; 1645 } 1646 1647 bdev->bdev_opened = false; 1648 1649 TAILQ_REMOVE(&bdev->open_descs, desc, link); 1650 free(desc); 1651 1652 if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) { 1653 do_unregister = true; 1654 } 1655 pthread_mutex_unlock(&bdev->mutex); 1656 1657 if (do_unregister == true) { 1658 spdk_bdev_unregister(bdev); 1659 } 1660 } 1661 1662 int 1663 spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 1664 struct spdk_bdev_module_if *module) 1665 { 1666 if (bdev->claim_module != NULL) { 1667 SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name, 1668 bdev->claim_module->name); 1669 return -EPERM; 1670 } 1671 1672 if ((!desc || !desc->write) && bdev->bdev_opened_for_write) { 1673 SPDK_ERRLOG("bdev %s already opened with write access\n", bdev->name); 1674 return -EPERM; 1675 } 1676 1677 if (desc && !desc->write) { 1678 bdev->bdev_opened_for_write = true; 1679 desc->write = true; 1680 } 1681 1682 bdev->claim_module = module; 1683 return 0; 1684 } 1685 1686 void 1687 spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) 1688 { 1689 assert(bdev->claim_module != NULL); 1690 bdev->claim_module = NULL; 1691 } 1692 1693 struct spdk_bdev * 1694 spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc) 1695 { 1696 return desc->bdev; 1697 } 1698 1699 void 1700 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1701 { 1702 struct iovec *iovs; 1703 int iovcnt; 1704 1705 if (bdev_io == NULL) { 1706 return; 1707 } 1708 1709 switch (bdev_io->type) { 1710 case SPDK_BDEV_IO_TYPE_READ: 1711 iovs = bdev_io->u.read.iovs; 1712 iovcnt = bdev_io->u.read.iovcnt; 1713 break; 1714 case SPDK_BDEV_IO_TYPE_WRITE: 1715 iovs = bdev_io->u.write.iovs; 1716 iovcnt = bdev_io->u.write.iovcnt; 1717 break; 1718 default: 1719 iovs = NULL; 1720 iovcnt = 0; 1721 break; 1722 } 1723 1724 if (iovp) { 1725 *iovp = iovs; 1726 } 1727 if (iovcntp) { 1728 *iovcntp = iovcnt; 1729 } 1730 } 1731 1732 void 1733 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1734 { 1735 /* 1736 * Modules with examine callbacks must be initialized first, so they are 1737 * ready to handle examine callbacks from later modules that will 1738 * register physical bdevs. 1739 */ 1740 if (bdev_module->examine != NULL) { 1741 TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1742 } else { 1743 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1744 } 1745 } 1746 1747 void 1748 spdk_bdev_part_base_free(struct spdk_bdev_part_base *base) 1749 { 1750 assert(base->bdev); 1751 assert(base->desc); 1752 spdk_bdev_close(base->desc); 1753 free(base); 1754 } 1755 1756 void 1757 spdk_bdev_part_free(struct spdk_bdev_part *part) 1758 { 1759 struct spdk_bdev_part_base *base; 1760 1761 assert(part); 1762 assert(part->base); 1763 1764 base = part->base; 1765 spdk_io_device_unregister(&part->base, NULL); 1766 TAILQ_REMOVE(base->tailq, part, tailq); 1767 free(part->bdev.name); 1768 free(part); 1769 1770 if (__sync_sub_and_fetch(&base->ref, 1) == 0) { 1771 spdk_bdev_module_release_bdev(base->bdev); 1772 spdk_bdev_part_base_free(base); 1773 } 1774 } 1775 1776 void 1777 spdk_bdev_part_tailq_fini(struct bdev_part_tailq *tailq) 1778 { 1779 struct spdk_bdev_part *part, *tmp; 1780 1781 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 1782 spdk_bdev_part_free(part); 1783 } 1784 } 1785 1786 void 1787 spdk_bdev_part_base_hotremove(struct spdk_bdev *base_bdev, struct bdev_part_tailq *tailq) 1788 { 1789 struct spdk_bdev_part *part, *tmp; 1790 1791 TAILQ_FOREACH_SAFE(part, tailq, tailq, tmp) { 1792 if (part->base->bdev == base_bdev) { 1793 spdk_bdev_unregister(&part->bdev); 1794 } 1795 } 1796 } 1797 1798 static bool 1799 spdk_bdev_part_io_type_supported(void *_part, enum spdk_bdev_io_type io_type) 1800 { 1801 struct spdk_bdev_part *part = _part; 1802 1803 return part->base->bdev->fn_table->io_type_supported(part->base->bdev, io_type); 1804 } 1805 1806 static struct spdk_io_channel * 1807 spdk_bdev_part_get_io_channel(void *_part) 1808 { 1809 struct spdk_bdev_part *part = _part; 1810 1811 return spdk_get_io_channel(&part->base); 1812 } 1813 1814 static void 1815 spdk_bdev_part_complete_io(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1816 { 1817 struct spdk_bdev_io *part_io = cb_arg; 1818 int status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED; 1819 1820 spdk_bdev_io_complete(part_io, status); 1821 spdk_bdev_free_io(bdev_io); 1822 } 1823 1824 void 1825 spdk_bdev_part_submit_request(struct spdk_bdev_part_channel *ch, struct spdk_bdev_io *bdev_io) 1826 { 1827 struct spdk_bdev_part *part = ch->part; 1828 struct spdk_io_channel *base_ch = ch->base_ch; 1829 struct spdk_bdev_desc *base_desc = part->base->desc; 1830 uint64_t offset; 1831 int rc = 0; 1832 1833 /* Modify the I/O to adjust for the offset within the base bdev. */ 1834 switch (bdev_io->type) { 1835 case SPDK_BDEV_IO_TYPE_READ: 1836 offset = bdev_io->u.read.offset_blocks + part->offset_blocks; 1837 rc = spdk_bdev_readv_blocks(base_desc, base_ch, bdev_io->u.read.iovs, 1838 bdev_io->u.read.iovcnt, offset, 1839 bdev_io->u.read.num_blocks, spdk_bdev_part_complete_io, 1840 bdev_io); 1841 break; 1842 case SPDK_BDEV_IO_TYPE_WRITE: 1843 offset = bdev_io->u.write.offset_blocks + part->offset_blocks; 1844 rc = spdk_bdev_writev_blocks(base_desc, base_ch, bdev_io->u.write.iovs, 1845 bdev_io->u.write.iovcnt, offset, 1846 bdev_io->u.write.num_blocks, spdk_bdev_part_complete_io, 1847 bdev_io); 1848 break; 1849 case SPDK_BDEV_IO_TYPE_WRITE_ZEROES: 1850 offset = bdev_io->u.write.offset_blocks + part->offset_blocks; 1851 rc = spdk_bdev_write_zeroes_blocks(base_desc, base_ch, offset, bdev_io->u.write.num_blocks, 1852 spdk_bdev_part_complete_io, bdev_io); 1853 break; 1854 case SPDK_BDEV_IO_TYPE_UNMAP: 1855 offset = bdev_io->u.unmap.offset_blocks + part->offset_blocks; 1856 rc = spdk_bdev_unmap_blocks(base_desc, base_ch, offset, bdev_io->u.unmap.num_blocks, 1857 spdk_bdev_part_complete_io, bdev_io); 1858 break; 1859 case SPDK_BDEV_IO_TYPE_FLUSH: 1860 offset = bdev_io->u.flush.offset_blocks + part->offset_blocks; 1861 rc = spdk_bdev_flush_blocks(base_desc, base_ch, offset, bdev_io->u.flush.num_blocks, 1862 spdk_bdev_part_complete_io, bdev_io); 1863 break; 1864 case SPDK_BDEV_IO_TYPE_RESET: 1865 rc = spdk_bdev_reset(base_desc, base_ch, 1866 spdk_bdev_part_complete_io, bdev_io); 1867 break; 1868 default: 1869 SPDK_ERRLOG("split: unknown I/O type %d\n", bdev_io->type); 1870 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1871 return; 1872 } 1873 1874 if (rc != 0) { 1875 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1876 } 1877 } 1878 static int 1879 spdk_bdev_part_channel_create_cb(void *io_device, void *ctx_buf) 1880 { 1881 struct spdk_bdev_part *part = SPDK_CONTAINEROF(io_device, struct spdk_bdev_part, base); 1882 struct spdk_bdev_part_channel *ch = ctx_buf; 1883 1884 ch->part = part; 1885 ch->base_ch = spdk_bdev_get_io_channel(part->base->desc); 1886 if (ch->base_ch == NULL) { 1887 return -1; 1888 } 1889 1890 if (part->base->ch_create_cb) { 1891 return part->base->ch_create_cb(io_device, ctx_buf); 1892 } else { 1893 return 0; 1894 } 1895 } 1896 1897 static void 1898 spdk_bdev_part_channel_destroy_cb(void *io_device, void *ctx_buf) 1899 { 1900 struct spdk_bdev_part *part = SPDK_CONTAINEROF(io_device, struct spdk_bdev_part, base); 1901 struct spdk_bdev_part_channel *ch = ctx_buf; 1902 1903 if (part->base->ch_destroy_cb) { 1904 part->base->ch_destroy_cb(io_device, ctx_buf); 1905 } 1906 spdk_put_io_channel(ch->base_ch); 1907 } 1908 1909 int 1910 spdk_bdev_part_base_construct(struct spdk_bdev_part_base *base, struct spdk_bdev *bdev, 1911 spdk_bdev_remove_cb_t remove_cb, struct spdk_bdev_module_if *module, 1912 struct spdk_bdev_fn_table *fn_table, struct bdev_part_tailq *tailq, 1913 uint32_t channel_size, spdk_io_channel_create_cb ch_create_cb, 1914 spdk_io_channel_destroy_cb ch_destroy_cb) 1915 { 1916 int rc; 1917 1918 fn_table->get_io_channel = spdk_bdev_part_get_io_channel; 1919 fn_table->io_type_supported = spdk_bdev_part_io_type_supported; 1920 1921 base->bdev = bdev; 1922 base->ref = 0; 1923 base->module = module; 1924 base->fn_table = fn_table; 1925 base->tailq = tailq; 1926 base->claimed = false; 1927 base->channel_size = channel_size; 1928 base->ch_create_cb = ch_create_cb; 1929 base->ch_destroy_cb = ch_destroy_cb; 1930 1931 rc = spdk_bdev_open(bdev, false, remove_cb, bdev, &base->desc); 1932 if (rc) { 1933 SPDK_ERRLOG("could not open bdev %s\n", spdk_bdev_get_name(bdev)); 1934 return -1; 1935 } 1936 1937 return 0; 1938 } 1939 1940 int 1941 spdk_bdev_part_construct(struct spdk_bdev_part *part, struct spdk_bdev_part_base *base, 1942 char *name, uint64_t offset_blocks, uint64_t num_blocks, 1943 char *product_name) 1944 { 1945 part->bdev.name = name; 1946 part->bdev.blocklen = base->bdev->blocklen; 1947 part->bdev.blockcnt = num_blocks; 1948 part->offset_blocks = offset_blocks; 1949 1950 part->bdev.write_cache = base->bdev->write_cache; 1951 part->bdev.need_aligned_buffer = base->bdev->need_aligned_buffer; 1952 part->bdev.product_name = product_name; 1953 part->bdev.ctxt = part; 1954 part->bdev.module = base->module; 1955 part->bdev.fn_table = base->fn_table; 1956 1957 __sync_fetch_and_add(&base->ref, 1); 1958 part->base = base; 1959 1960 if (!base->claimed) { 1961 int rc; 1962 1963 rc = spdk_bdev_module_claim_bdev(base->bdev, base->desc, base->module); 1964 if (rc) { 1965 SPDK_ERRLOG("could not claim bdev %s\n", spdk_bdev_get_name(base->bdev)); 1966 free(part->bdev.name); 1967 return -1; 1968 } 1969 base->claimed = true; 1970 } 1971 1972 spdk_io_device_register(&part->base, spdk_bdev_part_channel_create_cb, 1973 spdk_bdev_part_channel_destroy_cb, 1974 base->channel_size); 1975 spdk_vbdev_register(&part->bdev, &base->bdev, 1); 1976 TAILQ_INSERT_TAIL(base->tailq, part, tailq); 1977 1978 return 0; 1979 } 1980 1981 SPDK_LOG_REGISTER_TRACE_FLAG("bdev", SPDK_TRACE_BDEV) 1982