1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include "spdk/env.h" 40 #include "spdk/io_channel.h" 41 #include "spdk/likely.h" 42 #include "spdk/queue.h" 43 #include "spdk/nvme_spec.h" 44 #include "spdk/scsi_spec.h" 45 46 #include "spdk_internal/bdev.h" 47 #include "spdk_internal/log.h" 48 #include "spdk/string.h" 49 50 #ifdef SPDK_CONFIG_VTUNE 51 #include "ittnotify.h" 52 #include "ittnotify_types.h" 53 int __itt_init_ittlib(const char *, __itt_group_id); 54 #endif 55 56 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 57 #define BUF_SMALL_POOL_SIZE 8192 58 #define BUF_LARGE_POOL_SIZE 1024 59 60 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 61 62 struct spdk_bdev_mgr { 63 struct spdk_mempool *bdev_io_pool; 64 65 struct spdk_mempool *buf_small_pool; 66 struct spdk_mempool *buf_large_pool; 67 68 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 69 70 TAILQ_HEAD(, spdk_bdev) bdevs; 71 72 spdk_bdev_poller_start_cb start_poller_fn; 73 spdk_bdev_poller_stop_cb stop_poller_fn; 74 75 bool init_complete; 76 bool module_init_complete; 77 78 #ifdef SPDK_CONFIG_VTUNE 79 __itt_domain *domain; 80 #endif 81 }; 82 83 static struct spdk_bdev_mgr g_bdev_mgr = { 84 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 85 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 86 .start_poller_fn = NULL, 87 .stop_poller_fn = NULL, 88 .init_complete = false, 89 .module_init_complete = false, 90 }; 91 92 static spdk_bdev_init_cb g_cb_fn = NULL; 93 static void *g_cb_arg = NULL; 94 95 96 struct spdk_bdev_mgmt_channel { 97 need_buf_tailq_t need_buf_small; 98 need_buf_tailq_t need_buf_large; 99 }; 100 101 struct spdk_bdev_desc { 102 struct spdk_bdev *bdev; 103 spdk_bdev_remove_cb_t remove_cb; 104 void *remove_ctx; 105 bool write; 106 TAILQ_ENTRY(spdk_bdev_desc) link; 107 }; 108 109 struct spdk_bdev_channel { 110 struct spdk_bdev *bdev; 111 112 /* The channel for the underlying device */ 113 struct spdk_io_channel *channel; 114 115 /* Channel for the bdev manager */ 116 struct spdk_io_channel *mgmt_channel; 117 118 struct spdk_bdev_io_stat stat; 119 120 /* 121 * Count of I/O submitted to bdev module and waiting for completion. 122 * Incremented before submit_request() is called on an spdk_bdev_io. 123 */ 124 uint64_t io_outstanding; 125 126 #ifdef SPDK_CONFIG_VTUNE 127 uint64_t start_tsc; 128 uint64_t interval_tsc; 129 __itt_string_handle *handle; 130 #endif 131 132 }; 133 134 struct spdk_bdev * 135 spdk_bdev_first(void) 136 { 137 struct spdk_bdev *bdev; 138 139 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 140 if (bdev) { 141 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 142 } 143 144 return bdev; 145 } 146 147 struct spdk_bdev * 148 spdk_bdev_next(struct spdk_bdev *prev) 149 { 150 struct spdk_bdev *bdev; 151 152 bdev = TAILQ_NEXT(prev, link); 153 if (bdev) { 154 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 155 } 156 157 return bdev; 158 } 159 160 static struct spdk_bdev * 161 _bdev_next_leaf(struct spdk_bdev *bdev) 162 { 163 while (bdev != NULL) { 164 if (TAILQ_EMPTY(&bdev->vbdevs)) { 165 return bdev; 166 } else { 167 bdev = TAILQ_NEXT(bdev, link); 168 } 169 } 170 171 return bdev; 172 } 173 174 struct spdk_bdev * 175 spdk_bdev_first_leaf(void) 176 { 177 struct spdk_bdev *bdev; 178 179 bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs)); 180 181 if (bdev) { 182 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 183 } 184 185 return bdev; 186 } 187 188 struct spdk_bdev * 189 spdk_bdev_next_leaf(struct spdk_bdev *prev) 190 { 191 struct spdk_bdev *bdev; 192 193 bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link)); 194 195 if (bdev) { 196 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 197 } 198 199 return bdev; 200 } 201 202 struct spdk_bdev * 203 spdk_bdev_get_by_name(const char *bdev_name) 204 { 205 struct spdk_bdev *bdev = spdk_bdev_first(); 206 207 while (bdev != NULL) { 208 if (strcmp(bdev_name, bdev->name) == 0) { 209 return bdev; 210 } 211 bdev = spdk_bdev_next(bdev); 212 } 213 214 return NULL; 215 } 216 217 static void 218 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 219 { 220 assert(bdev_io->get_buf_cb != NULL); 221 assert(buf != NULL); 222 assert(bdev_io->u.read.iovs != NULL); 223 224 bdev_io->buf = buf; 225 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 226 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 227 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 228 } 229 230 static void 231 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 232 { 233 struct spdk_mempool *pool; 234 struct spdk_bdev_io *tmp; 235 void *buf; 236 need_buf_tailq_t *tailq; 237 uint64_t length; 238 struct spdk_bdev_mgmt_channel *ch; 239 240 assert(bdev_io->u.read.iovcnt == 1); 241 242 length = bdev_io->u.read.len; 243 buf = bdev_io->buf; 244 245 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 246 247 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 248 pool = g_bdev_mgr.buf_small_pool; 249 tailq = &ch->need_buf_small; 250 } else { 251 pool = g_bdev_mgr.buf_large_pool; 252 tailq = &ch->need_buf_large; 253 } 254 255 if (TAILQ_EMPTY(tailq)) { 256 spdk_mempool_put(pool, buf); 257 } else { 258 tmp = TAILQ_FIRST(tailq); 259 TAILQ_REMOVE(tailq, tmp, buf_link); 260 spdk_bdev_io_set_buf(tmp, buf); 261 } 262 } 263 264 void 265 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 266 { 267 uint64_t len = bdev_io->u.read.len; 268 struct spdk_mempool *pool; 269 need_buf_tailq_t *tailq; 270 void *buf = NULL; 271 struct spdk_bdev_mgmt_channel *ch; 272 273 assert(cb != NULL); 274 assert(bdev_io->u.read.iovs != NULL); 275 276 if (spdk_unlikely(bdev_io->u.read.iovs[0].iov_base != NULL)) { 277 /* Buffer already present */ 278 cb(bdev_io->ch->channel, bdev_io); 279 return; 280 } 281 282 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 283 284 bdev_io->get_buf_cb = cb; 285 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 286 pool = g_bdev_mgr.buf_small_pool; 287 tailq = &ch->need_buf_small; 288 } else { 289 pool = g_bdev_mgr.buf_large_pool; 290 tailq = &ch->need_buf_large; 291 } 292 293 buf = spdk_mempool_get(pool); 294 295 if (!buf) { 296 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 297 } else { 298 spdk_bdev_io_set_buf(bdev_io, buf); 299 } 300 } 301 302 static int 303 spdk_bdev_module_get_max_ctx_size(void) 304 { 305 struct spdk_bdev_module_if *bdev_module; 306 int max_bdev_module_size = 0; 307 308 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 309 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 310 max_bdev_module_size = bdev_module->get_ctx_size(); 311 } 312 } 313 314 return max_bdev_module_size; 315 } 316 317 void 318 spdk_bdev_config_text(FILE *fp) 319 { 320 struct spdk_bdev_module_if *bdev_module; 321 322 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 323 if (bdev_module->config_text) { 324 bdev_module->config_text(fp); 325 } 326 } 327 } 328 329 static int 330 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf) 331 { 332 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 333 334 TAILQ_INIT(&ch->need_buf_small); 335 TAILQ_INIT(&ch->need_buf_large); 336 337 return 0; 338 } 339 340 static void 341 spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf) 342 { 343 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 344 345 if (!TAILQ_EMPTY(&ch->need_buf_small) || !TAILQ_EMPTY(&ch->need_buf_large)) { 346 SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n"); 347 } 348 } 349 350 static void 351 spdk_bdev_init_complete(int rc) 352 { 353 spdk_bdev_init_cb cb_fn = g_cb_fn; 354 void *cb_arg = g_cb_arg; 355 356 g_bdev_mgr.init_complete = true; 357 g_cb_fn = NULL; 358 g_cb_arg = NULL; 359 360 cb_fn(cb_arg, rc); 361 } 362 363 static void 364 spdk_bdev_module_init_complete(int rc) 365 { 366 struct spdk_bdev_module_if *m; 367 368 g_bdev_mgr.module_init_complete = true; 369 370 if (rc != 0) { 371 spdk_bdev_init_complete(rc); 372 } 373 374 /* 375 * Check all bdev modules for an examinations in progress. If any 376 * exist, return immediately since we cannot finish bdev subsystem 377 * initialization until all are completed. 378 */ 379 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 380 if (m->examine_in_progress > 0) { 381 return; 382 } 383 } 384 385 spdk_bdev_init_complete(0); 386 } 387 388 static int 389 spdk_bdev_modules_init(void) 390 { 391 struct spdk_bdev_module_if *module; 392 int rc; 393 394 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 395 rc = module->module_init(); 396 if (rc != 0) { 397 return rc; 398 } 399 } 400 401 return 0; 402 } 403 404 void 405 spdk_bdev_poller_start(struct spdk_bdev_poller **ppoller, 406 spdk_bdev_poller_fn fn, 407 void *arg, 408 uint32_t lcore, 409 uint64_t period_microseconds) 410 { 411 g_bdev_mgr.start_poller_fn(ppoller, fn, arg, lcore, period_microseconds); 412 } 413 414 void 415 spdk_bdev_poller_stop(struct spdk_bdev_poller **ppoller) 416 { 417 g_bdev_mgr.stop_poller_fn(ppoller); 418 } 419 420 void 421 spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg, 422 spdk_bdev_poller_start_cb start_poller_fn, 423 spdk_bdev_poller_stop_cb stop_poller_fn) 424 { 425 int cache_size; 426 int rc = 0; 427 428 assert(cb_fn != NULL); 429 430 g_cb_fn = cb_fn; 431 g_cb_arg = cb_arg; 432 433 g_bdev_mgr.start_poller_fn = start_poller_fn; 434 g_bdev_mgr.stop_poller_fn = stop_poller_fn; 435 436 g_bdev_mgr.bdev_io_pool = spdk_mempool_create("bdev_io", 437 SPDK_BDEV_IO_POOL_SIZE, 438 sizeof(struct spdk_bdev_io) + 439 spdk_bdev_module_get_max_ctx_size(), 440 64, 441 SPDK_ENV_SOCKET_ID_ANY); 442 443 if (g_bdev_mgr.bdev_io_pool == NULL) { 444 SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); 445 spdk_bdev_module_init_complete(-1); 446 return; 447 } 448 449 /** 450 * Ensure no more than half of the total buffers end up local caches, by 451 * using spdk_env_get_core_count() to determine how many local caches we need 452 * to account for. 453 */ 454 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 455 g_bdev_mgr.buf_small_pool = spdk_mempool_create("buf_small_pool", 456 BUF_SMALL_POOL_SIZE, 457 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 458 cache_size, 459 SPDK_ENV_SOCKET_ID_ANY); 460 if (!g_bdev_mgr.buf_small_pool) { 461 SPDK_ERRLOG("create rbuf small pool failed\n"); 462 spdk_bdev_module_init_complete(-1); 463 return; 464 } 465 466 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 467 g_bdev_mgr.buf_large_pool = spdk_mempool_create("buf_large_pool", 468 BUF_LARGE_POOL_SIZE, 469 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 470 cache_size, 471 SPDK_ENV_SOCKET_ID_ANY); 472 if (!g_bdev_mgr.buf_large_pool) { 473 SPDK_ERRLOG("create rbuf large pool failed\n"); 474 spdk_bdev_module_init_complete(-1); 475 return; 476 } 477 478 #ifdef SPDK_CONFIG_VTUNE 479 g_bdev_mgr.domain = __itt_domain_create("spdk_bdev"); 480 #endif 481 482 spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create, 483 spdk_bdev_mgmt_channel_destroy, 484 sizeof(struct spdk_bdev_mgmt_channel)); 485 486 rc = spdk_bdev_modules_init(); 487 spdk_bdev_module_init_complete(rc); 488 } 489 490 int 491 spdk_bdev_finish(void) 492 { 493 struct spdk_bdev_module_if *bdev_module; 494 495 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 496 if (bdev_module->module_fini) { 497 bdev_module->module_fini(); 498 } 499 } 500 501 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 502 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 503 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 504 SPDK_BDEV_IO_POOL_SIZE); 505 } 506 507 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 508 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 509 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 510 BUF_SMALL_POOL_SIZE); 511 assert(false); 512 } 513 514 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 515 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 516 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 517 BUF_LARGE_POOL_SIZE); 518 assert(false); 519 } 520 521 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 522 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 523 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 524 525 spdk_io_device_unregister(&g_bdev_mgr, NULL); 526 527 return 0; 528 } 529 530 struct spdk_bdev_io * 531 spdk_bdev_get_io(void) 532 { 533 struct spdk_bdev_io *bdev_io; 534 535 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 536 if (!bdev_io) { 537 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 538 abort(); 539 } 540 541 memset(bdev_io, 0, sizeof(*bdev_io)); 542 543 return bdev_io; 544 } 545 546 static void 547 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 548 { 549 if (!bdev_io) { 550 return; 551 } 552 553 if (bdev_io->buf != NULL) { 554 spdk_bdev_io_put_buf(bdev_io); 555 } 556 557 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 558 } 559 560 static void 561 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 562 { 563 struct spdk_io_channel *ch; 564 565 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 566 567 ch = bdev_io->ch->channel; 568 569 bdev_io->ch->io_outstanding++; 570 bdev_io->in_submit_request = true; 571 bdev->fn_table->submit_request(ch, bdev_io); 572 bdev_io->in_submit_request = false; 573 } 574 575 static int 576 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 577 { 578 struct spdk_bdev *bdev = bdev_io->bdev; 579 580 __submit_request(bdev, bdev_io); 581 return 0; 582 } 583 584 void 585 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev_desc *new_bdev_desc) 586 { 587 struct spdk_bdev *new_bdev = new_bdev_desc->bdev; 588 589 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 590 bdev_io->bdev = new_bdev; 591 592 /* 593 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 594 * being switched, they need to be reinitialized. 595 */ 596 bdev_io->gencnt = new_bdev->gencnt; 597 598 /* 599 * This bdev_io was already submitted so decrement io_outstanding to ensure it 600 * does not get double-counted. 601 */ 602 assert(bdev_io->ch->io_outstanding > 0); 603 bdev_io->ch->io_outstanding--; 604 __submit_request(new_bdev, bdev_io); 605 } 606 607 static void 608 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 609 struct spdk_bdev *bdev, void *cb_arg, 610 spdk_bdev_io_completion_cb cb) 611 { 612 bdev_io->bdev = bdev; 613 bdev_io->caller_ctx = cb_arg; 614 bdev_io->cb = cb; 615 bdev_io->gencnt = bdev->gencnt; 616 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 617 bdev_io->in_submit_request = false; 618 } 619 620 bool 621 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 622 { 623 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 624 } 625 626 int 627 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 628 { 629 if (bdev->fn_table->dump_config_json) { 630 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 631 } 632 633 return 0; 634 } 635 636 static int 637 spdk_bdev_channel_create(void *io_device, void *ctx_buf) 638 { 639 struct spdk_bdev *bdev = io_device; 640 struct spdk_bdev_channel *ch = ctx_buf; 641 642 ch->bdev = io_device; 643 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt); 644 ch->mgmt_channel = spdk_get_io_channel(&g_bdev_mgr); 645 memset(&ch->stat, 0, sizeof(ch->stat)); 646 ch->io_outstanding = 0; 647 648 #ifdef SPDK_CONFIG_VTUNE 649 { 650 char *name; 651 __itt_init_ittlib(NULL, 0); 652 name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch); 653 if (!name) { 654 return -1; 655 } 656 ch->handle = __itt_string_handle_create(name); 657 free(name); 658 ch->start_tsc = spdk_get_ticks(); 659 ch->interval_tsc = spdk_get_ticks_hz() / 100; 660 } 661 #endif 662 663 return 0; 664 } 665 666 static void 667 _spdk_bdev_abort_io(need_buf_tailq_t *queue, struct spdk_bdev_channel *ch) 668 { 669 struct spdk_bdev_io *bdev_io, *tmp; 670 671 TAILQ_FOREACH_SAFE(bdev_io, queue, buf_link, tmp) { 672 if (bdev_io->ch == ch) { 673 TAILQ_REMOVE(queue, bdev_io, buf_link); 674 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 675 } 676 } 677 } 678 679 static void 680 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 681 { 682 struct spdk_bdev_channel *ch = ctx_buf; 683 struct spdk_bdev_mgmt_channel *mgmt_channel; 684 685 mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel); 686 687 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, ch); 688 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, ch); 689 690 spdk_put_io_channel(ch->channel); 691 spdk_put_io_channel(ch->mgmt_channel); 692 assert(ch->io_outstanding == 0); 693 } 694 695 struct spdk_io_channel * 696 spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) 697 { 698 return spdk_get_io_channel(desc->bdev); 699 } 700 701 const char * 702 spdk_bdev_get_name(const struct spdk_bdev *bdev) 703 { 704 return bdev->name; 705 } 706 707 const char * 708 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 709 { 710 return bdev->product_name; 711 } 712 713 uint32_t 714 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 715 { 716 return bdev->blocklen; 717 } 718 719 uint64_t 720 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 721 { 722 return bdev->blockcnt; 723 } 724 725 size_t 726 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 727 { 728 /* TODO: push this logic down to the bdev modules */ 729 if (bdev->need_aligned_buffer) { 730 return bdev->blocklen; 731 } 732 733 return 1; 734 } 735 736 bool 737 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 738 { 739 return bdev->write_cache; 740 } 741 742 static int 743 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 744 { 745 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 746 if (nbytes % bdev->blocklen) { 747 return -1; 748 } 749 750 /* Return failure if offset + nbytes is less than offset; indicates there 751 * has been an overflow and hence the offset has been wrapped around */ 752 if (offset + nbytes < offset) { 753 return -1; 754 } 755 756 /* Return failure if offset + nbytes exceeds the size of the bdev */ 757 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 758 return -1; 759 } 760 761 return 0; 762 } 763 764 int 765 spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 766 void *buf, uint64_t offset, uint64_t nbytes, 767 spdk_bdev_io_completion_cb cb, void *cb_arg) 768 { 769 struct spdk_bdev *bdev = desc->bdev; 770 struct spdk_bdev_io *bdev_io; 771 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 772 int rc; 773 774 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 775 return -EINVAL; 776 } 777 778 bdev_io = spdk_bdev_get_io(); 779 if (!bdev_io) { 780 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 781 return -ENOMEM; 782 } 783 784 bdev_io->ch = channel; 785 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 786 bdev_io->u.read.iov.iov_base = buf; 787 bdev_io->u.read.iov.iov_len = nbytes; 788 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 789 bdev_io->u.read.iovcnt = 1; 790 bdev_io->u.read.len = nbytes; 791 bdev_io->u.read.offset = offset; 792 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 793 794 rc = spdk_bdev_io_submit(bdev_io); 795 if (rc < 0) { 796 spdk_bdev_put_io(bdev_io); 797 return rc; 798 } 799 800 return 0; 801 } 802 803 int 804 spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 805 struct iovec *iov, int iovcnt, 806 uint64_t offset, uint64_t nbytes, 807 spdk_bdev_io_completion_cb cb, void *cb_arg) 808 { 809 struct spdk_bdev *bdev = desc->bdev; 810 struct spdk_bdev_io *bdev_io; 811 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 812 int rc; 813 814 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 815 return -EINVAL; 816 } 817 818 bdev_io = spdk_bdev_get_io(); 819 if (!bdev_io) { 820 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 821 return -ENOMEM; 822 } 823 824 bdev_io->ch = channel; 825 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 826 bdev_io->u.read.iovs = iov; 827 bdev_io->u.read.iovcnt = iovcnt; 828 bdev_io->u.read.len = nbytes; 829 bdev_io->u.read.offset = offset; 830 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 831 832 rc = spdk_bdev_io_submit(bdev_io); 833 if (rc < 0) { 834 spdk_bdev_put_io(bdev_io); 835 return rc; 836 } 837 838 return 0; 839 } 840 841 int 842 spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 843 void *buf, uint64_t offset, uint64_t nbytes, 844 spdk_bdev_io_completion_cb cb, void *cb_arg) 845 { 846 struct spdk_bdev *bdev = desc->bdev; 847 struct spdk_bdev_io *bdev_io; 848 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 849 int rc; 850 851 if (!desc->write) { 852 return -EBADF; 853 } 854 855 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 856 return -EINVAL; 857 } 858 859 bdev_io = spdk_bdev_get_io(); 860 if (!bdev_io) { 861 SPDK_ERRLOG("bdev_io memory allocation failed duing write\n"); 862 return -ENOMEM; 863 } 864 865 bdev_io->ch = channel; 866 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 867 bdev_io->u.write.iov.iov_base = buf; 868 bdev_io->u.write.iov.iov_len = nbytes; 869 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 870 bdev_io->u.write.iovcnt = 1; 871 bdev_io->u.write.len = nbytes; 872 bdev_io->u.write.offset = offset; 873 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 874 875 rc = spdk_bdev_io_submit(bdev_io); 876 if (rc < 0) { 877 spdk_bdev_put_io(bdev_io); 878 return rc; 879 } 880 881 return 0; 882 } 883 884 int 885 spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 886 struct iovec *iov, int iovcnt, 887 uint64_t offset, uint64_t len, 888 spdk_bdev_io_completion_cb cb, void *cb_arg) 889 { 890 struct spdk_bdev *bdev = desc->bdev; 891 struct spdk_bdev_io *bdev_io; 892 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 893 int rc; 894 895 if (!desc->write) { 896 return -EBADF; 897 } 898 899 if (spdk_bdev_io_valid(bdev, offset, len) != 0) { 900 return -EINVAL; 901 } 902 903 bdev_io = spdk_bdev_get_io(); 904 if (!bdev_io) { 905 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 906 return -ENOMEM; 907 } 908 909 bdev_io->ch = channel; 910 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 911 bdev_io->u.write.iovs = iov; 912 bdev_io->u.write.iovcnt = iovcnt; 913 bdev_io->u.write.len = len; 914 bdev_io->u.write.offset = offset; 915 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 916 917 rc = spdk_bdev_io_submit(bdev_io); 918 if (rc < 0) { 919 spdk_bdev_put_io(bdev_io); 920 return rc; 921 } 922 923 return 0; 924 } 925 926 int 927 spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 928 uint64_t offset, uint64_t nbytes, 929 spdk_bdev_io_completion_cb cb, void *cb_arg) 930 { 931 struct spdk_bdev *bdev = desc->bdev; 932 struct spdk_bdev_io *bdev_io; 933 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 934 int rc; 935 936 if (!desc->write) { 937 return -EBADF; 938 } 939 940 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 941 return -EINVAL; 942 } 943 944 if (nbytes == 0) { 945 SPDK_ERRLOG("Can't unmap 0 bytes\n"); 946 return -EINVAL; 947 } 948 949 bdev_io = spdk_bdev_get_io(); 950 if (!bdev_io) { 951 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 952 return -ENOMEM; 953 } 954 955 bdev_io->ch = channel; 956 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 957 bdev_io->u.unmap.offset = offset; 958 bdev_io->u.unmap.len = nbytes; 959 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 960 961 rc = spdk_bdev_io_submit(bdev_io); 962 if (rc < 0) { 963 spdk_bdev_put_io(bdev_io); 964 return rc; 965 } 966 967 return 0; 968 } 969 970 int 971 spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 972 uint64_t offset, uint64_t length, 973 spdk_bdev_io_completion_cb cb, void *cb_arg) 974 { 975 struct spdk_bdev *bdev = desc->bdev; 976 struct spdk_bdev_io *bdev_io; 977 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 978 int rc; 979 980 if (!desc->write) { 981 return -EBADF; 982 } 983 984 bdev_io = spdk_bdev_get_io(); 985 if (!bdev_io) { 986 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 987 return -ENOMEM; 988 } 989 990 bdev_io->ch = channel; 991 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 992 bdev_io->u.flush.offset = offset; 993 bdev_io->u.flush.length = length; 994 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 995 996 rc = spdk_bdev_io_submit(bdev_io); 997 if (rc < 0) { 998 spdk_bdev_put_io(bdev_io); 999 return rc; 1000 } 1001 1002 return 0; 1003 } 1004 1005 static void 1006 _spdk_bdev_reset_dev(void *io_device, void *ctx) 1007 { 1008 struct spdk_bdev_io *bdev_io = ctx; 1009 int rc; 1010 1011 rc = spdk_bdev_io_submit(bdev_io); 1012 if (rc < 0) { 1013 spdk_bdev_put_io(bdev_io); 1014 SPDK_ERRLOG("reset failed\n"); 1015 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1016 } 1017 } 1018 1019 static void 1020 _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch, 1021 void *ctx) 1022 { 1023 struct spdk_bdev_channel *channel; 1024 struct spdk_bdev_mgmt_channel *mgmt_channel; 1025 1026 channel = spdk_io_channel_get_ctx(ch); 1027 mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel); 1028 1029 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, channel); 1030 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, channel); 1031 } 1032 1033 static void 1034 _spdk_bdev_start_reset(void *ctx) 1035 { 1036 struct spdk_bdev_io *bdev_io = ctx; 1037 1038 spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_reset_abort_channel, 1039 bdev_io, _spdk_bdev_reset_dev); 1040 } 1041 1042 static void 1043 _spdk_bdev_start_next_reset(struct spdk_bdev *bdev) 1044 { 1045 struct spdk_bdev_io *bdev_io; 1046 struct spdk_thread *thread; 1047 1048 pthread_mutex_lock(&bdev->mutex); 1049 1050 if (bdev->reset_in_progress || TAILQ_EMPTY(&bdev->queued_resets)) { 1051 pthread_mutex_unlock(&bdev->mutex); 1052 return; 1053 } else { 1054 bdev_io = TAILQ_FIRST(&bdev->queued_resets); 1055 TAILQ_REMOVE(&bdev->queued_resets, bdev_io, link); 1056 bdev->reset_in_progress = true; 1057 thread = spdk_io_channel_get_thread(bdev_io->ch->channel); 1058 spdk_thread_send_msg(thread, _spdk_bdev_start_reset, bdev_io); 1059 } 1060 1061 pthread_mutex_unlock(&bdev->mutex); 1062 } 1063 1064 int 1065 spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1066 spdk_bdev_io_completion_cb cb, void *cb_arg) 1067 { 1068 struct spdk_bdev *bdev = desc->bdev; 1069 struct spdk_bdev_io *bdev_io; 1070 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1071 1072 bdev_io = spdk_bdev_get_io(); 1073 if (!bdev_io) { 1074 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 1075 return -ENOMEM;; 1076 } 1077 1078 bdev_io->ch = channel; 1079 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 1080 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1081 1082 pthread_mutex_lock(&bdev->mutex); 1083 TAILQ_INSERT_TAIL(&bdev->queued_resets, bdev_io, link); 1084 pthread_mutex_unlock(&bdev->mutex); 1085 1086 _spdk_bdev_start_next_reset(bdev); 1087 1088 return 0; 1089 } 1090 1091 void 1092 spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 1093 struct spdk_bdev_io_stat *stat) 1094 { 1095 #ifdef SPDK_CONFIG_VTUNE 1096 SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n"); 1097 memset(stat, 0, sizeof(*stat)); 1098 return; 1099 #endif 1100 1101 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1102 1103 *stat = channel->stat; 1104 memset(&channel->stat, 0, sizeof(channel->stat)); 1105 } 1106 1107 int 1108 spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1109 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1110 spdk_bdev_io_completion_cb cb, void *cb_arg) 1111 { 1112 struct spdk_bdev *bdev = desc->bdev; 1113 struct spdk_bdev_io *bdev_io; 1114 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1115 int rc; 1116 1117 if (!desc->write) { 1118 return -EBADF; 1119 } 1120 1121 bdev_io = spdk_bdev_get_io(); 1122 if (!bdev_io) { 1123 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1124 return -ENOMEM; 1125 } 1126 1127 bdev_io->ch = channel; 1128 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN; 1129 bdev_io->u.nvme_passthru.cmd = *cmd; 1130 bdev_io->u.nvme_passthru.buf = buf; 1131 bdev_io->u.nvme_passthru.nbytes = nbytes; 1132 1133 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1134 1135 rc = spdk_bdev_io_submit(bdev_io); 1136 if (rc < 0) { 1137 spdk_bdev_put_io(bdev_io); 1138 return rc; 1139 } 1140 1141 return 0; 1142 } 1143 1144 int 1145 spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1146 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1147 spdk_bdev_io_completion_cb cb, void *cb_arg) 1148 { 1149 struct spdk_bdev *bdev = desc->bdev; 1150 struct spdk_bdev_io *bdev_io; 1151 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1152 int rc; 1153 1154 if (!desc->write) { 1155 /* 1156 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 1157 * to easily determine if the command is a read or write, but for now just 1158 * do not allow io_passthru with a read-only descriptor. 1159 */ 1160 return -EBADF; 1161 } 1162 1163 bdev_io = spdk_bdev_get_io(); 1164 if (!bdev_io) { 1165 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1166 return -ENOMEM; 1167 } 1168 1169 bdev_io->ch = channel; 1170 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO; 1171 bdev_io->u.nvme_passthru.cmd = *cmd; 1172 bdev_io->u.nvme_passthru.buf = buf; 1173 bdev_io->u.nvme_passthru.nbytes = nbytes; 1174 1175 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1176 1177 rc = spdk_bdev_io_submit(bdev_io); 1178 if (rc < 0) { 1179 spdk_bdev_put_io(bdev_io); 1180 return rc; 1181 } 1182 1183 return 0; 1184 } 1185 1186 int 1187 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 1188 { 1189 if (!bdev_io) { 1190 SPDK_ERRLOG("bdev_io is NULL\n"); 1191 return -1; 1192 } 1193 1194 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 1195 SPDK_ERRLOG("bdev_io is in pending state\n"); 1196 assert(false); 1197 return -1; 1198 } 1199 1200 spdk_bdev_put_io(bdev_io); 1201 1202 return 0; 1203 } 1204 1205 static void 1206 _spdk_bdev_io_complete(void *ctx) 1207 { 1208 struct spdk_bdev_io *bdev_io = ctx; 1209 1210 assert(bdev_io->cb != NULL); 1211 bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS, bdev_io->caller_ctx); 1212 } 1213 1214 void 1215 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 1216 { 1217 bdev_io->status = status; 1218 1219 assert(bdev_io->ch->io_outstanding > 0); 1220 bdev_io->ch->io_outstanding--; 1221 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1222 /* Successful reset */ 1223 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1224 /* Increase the bdev generation */ 1225 bdev_io->bdev->gencnt++; 1226 } 1227 bdev_io->bdev->reset_in_progress = false; 1228 _spdk_bdev_start_next_reset(bdev_io->bdev); 1229 } else { 1230 /* 1231 * Check the gencnt, to see if this I/O was issued before the most 1232 * recent reset. If the gencnt is not equal, then just free the I/O 1233 * without calling the callback, since the caller will have already 1234 * freed its context for this I/O. 1235 */ 1236 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 1237 spdk_bdev_put_io(bdev_io); 1238 return; 1239 } 1240 } 1241 1242 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1243 switch (bdev_io->type) { 1244 case SPDK_BDEV_IO_TYPE_READ: 1245 bdev_io->ch->stat.bytes_read += bdev_io->u.read.len; 1246 bdev_io->ch->stat.num_read_ops++; 1247 break; 1248 case SPDK_BDEV_IO_TYPE_WRITE: 1249 bdev_io->ch->stat.bytes_written += bdev_io->u.write.len; 1250 bdev_io->ch->stat.num_write_ops++; 1251 break; 1252 default: 1253 break; 1254 } 1255 } 1256 1257 #ifdef SPDK_CONFIG_VTUNE 1258 uint64_t now_tsc = spdk_get_ticks(); 1259 if (now_tsc > (bdev_io->ch->start_tsc + bdev_io->ch->interval_tsc)) { 1260 uint64_t data[5]; 1261 1262 data[0] = bdev_io->ch->stat.num_read_ops; 1263 data[1] = bdev_io->ch->stat.bytes_read; 1264 data[2] = bdev_io->ch->stat.num_write_ops; 1265 data[3] = bdev_io->ch->stat.bytes_written; 1266 data[4] = bdev_io->bdev->fn_table->get_spin_time ? 1267 bdev_io->bdev->fn_table->get_spin_time(bdev_io->ch->channel) : 0; 1268 1269 __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->ch->handle, 1270 __itt_metadata_u64, 5, data); 1271 1272 memset(&bdev_io->ch->stat, 0, sizeof(bdev_io->ch->stat)); 1273 bdev_io->ch->start_tsc = now_tsc; 1274 } 1275 #endif 1276 1277 if (bdev_io->in_submit_request || bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1278 /* 1279 * Defer completion to avoid potential infinite recursion if the 1280 * user's completion callback issues a new I/O. 1281 */ 1282 spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->ch->channel), 1283 _spdk_bdev_io_complete, bdev_io); 1284 } else { 1285 _spdk_bdev_io_complete(bdev_io); 1286 } 1287 } 1288 1289 void 1290 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 1291 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 1292 { 1293 if (sc == SPDK_SCSI_STATUS_GOOD) { 1294 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1295 } else { 1296 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 1297 bdev_io->error.scsi.sc = sc; 1298 bdev_io->error.scsi.sk = sk; 1299 bdev_io->error.scsi.asc = asc; 1300 bdev_io->error.scsi.ascq = ascq; 1301 } 1302 1303 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1304 } 1305 1306 void 1307 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 1308 int *sc, int *sk, int *asc, int *ascq) 1309 { 1310 assert(sc != NULL); 1311 assert(sk != NULL); 1312 assert(asc != NULL); 1313 assert(ascq != NULL); 1314 1315 switch (bdev_io->status) { 1316 case SPDK_BDEV_IO_STATUS_SUCCESS: 1317 *sc = SPDK_SCSI_STATUS_GOOD; 1318 *sk = SPDK_SCSI_SENSE_NO_SENSE; 1319 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1320 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1321 break; 1322 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 1323 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 1324 break; 1325 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 1326 *sc = bdev_io->error.scsi.sc; 1327 *sk = bdev_io->error.scsi.sk; 1328 *asc = bdev_io->error.scsi.asc; 1329 *ascq = bdev_io->error.scsi.ascq; 1330 break; 1331 default: 1332 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 1333 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 1334 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1335 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1336 break; 1337 } 1338 } 1339 1340 void 1341 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 1342 { 1343 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 1344 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1345 } else { 1346 bdev_io->error.nvme.sct = sct; 1347 bdev_io->error.nvme.sc = sc; 1348 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1349 } 1350 1351 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1352 } 1353 1354 void 1355 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1356 { 1357 assert(sct != NULL); 1358 assert(sc != NULL); 1359 1360 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1361 *sct = bdev_io->error.nvme.sct; 1362 *sc = bdev_io->error.nvme.sc; 1363 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1364 *sct = SPDK_NVME_SCT_GENERIC; 1365 *sc = SPDK_NVME_SC_SUCCESS; 1366 } else { 1367 *sct = SPDK_NVME_SCT_GENERIC; 1368 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1369 } 1370 } 1371 1372 static void 1373 _spdk_bdev_register(struct spdk_bdev *bdev) 1374 { 1375 struct spdk_bdev_module_if *module; 1376 1377 assert(bdev->module != NULL); 1378 1379 bdev->status = SPDK_BDEV_STATUS_READY; 1380 1381 /* initialize the reset generation value to zero */ 1382 bdev->gencnt = 0; 1383 TAILQ_INIT(&bdev->open_descs); 1384 bdev->bdev_opened_for_write = false; 1385 1386 TAILQ_INIT(&bdev->vbdevs); 1387 TAILQ_INIT(&bdev->base_bdevs); 1388 1389 bdev->reset_in_progress = false; 1390 TAILQ_INIT(&bdev->queued_resets); 1391 1392 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1393 sizeof(struct spdk_bdev_channel)); 1394 1395 pthread_mutex_init(&bdev->mutex, NULL); 1396 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 1397 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1398 1399 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 1400 if (module->examine) { 1401 module->examine_in_progress++; 1402 module->examine(bdev); 1403 } 1404 } 1405 } 1406 1407 void 1408 spdk_bdev_register(struct spdk_bdev *bdev) 1409 { 1410 _spdk_bdev_register(bdev); 1411 } 1412 1413 void 1414 spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) 1415 { 1416 int i; 1417 1418 _spdk_bdev_register(vbdev); 1419 for (i = 0; i < base_bdev_count; i++) { 1420 assert(base_bdevs[i] != NULL); 1421 TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link); 1422 TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link); 1423 } 1424 } 1425 1426 void 1427 spdk_bdev_unregister(struct spdk_bdev *bdev) 1428 { 1429 struct spdk_bdev_desc *desc, *tmp; 1430 int rc; 1431 bool do_destruct = true; 1432 1433 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1434 1435 pthread_mutex_lock(&bdev->mutex); 1436 1437 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1438 1439 TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) { 1440 if (desc->remove_cb) { 1441 pthread_mutex_unlock(&bdev->mutex); 1442 do_destruct = false; 1443 desc->remove_cb(desc->remove_ctx); 1444 pthread_mutex_lock(&bdev->mutex); 1445 } 1446 } 1447 1448 if (!do_destruct) { 1449 pthread_mutex_unlock(&bdev->mutex); 1450 return; 1451 } 1452 1453 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1454 pthread_mutex_unlock(&bdev->mutex); 1455 1456 pthread_mutex_destroy(&bdev->mutex); 1457 1458 spdk_io_device_unregister(bdev, NULL); 1459 1460 rc = bdev->fn_table->destruct(bdev->ctxt); 1461 if (rc < 0) { 1462 SPDK_ERRLOG("destruct failed\n"); 1463 } 1464 } 1465 1466 void 1467 spdk_vbdev_unregister(struct spdk_bdev *vbdev) 1468 { 1469 struct spdk_bdev *base_bdev; 1470 1471 assert(!TAILQ_EMPTY(&vbdev->base_bdevs)); 1472 TAILQ_FOREACH(base_bdev, &vbdev->base_bdevs, base_bdev_link) { 1473 TAILQ_REMOVE(&base_bdev->vbdevs, vbdev, vbdev_link); 1474 } 1475 spdk_bdev_unregister(vbdev); 1476 } 1477 1478 void 1479 spdk_bdev_module_examine_done(struct spdk_bdev_module_if *module) 1480 { 1481 struct spdk_bdev_module_if *m; 1482 1483 assert(module->examine_in_progress > 0); 1484 module->examine_in_progress--; 1485 1486 /* 1487 * Check all bdev modules for an examinations in progress. If any 1488 * exist, return immediately since we cannot finish bdev subsystem 1489 * initialization until all are completed. 1490 */ 1491 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 1492 if (m->examine_in_progress > 0) { 1493 return; 1494 } 1495 } 1496 1497 if (g_bdev_mgr.module_init_complete && !g_bdev_mgr.init_complete) { 1498 /* 1499 * Modules already finished initialization - now that all 1500 * the bdev moduless have finished their asynchronous I/O 1501 * processing, the entire bdev layer can be marked as complete. 1502 */ 1503 spdk_bdev_init_complete(0); 1504 } 1505 } 1506 1507 int 1508 spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, 1509 void *remove_ctx, struct spdk_bdev_desc **_desc) 1510 { 1511 struct spdk_bdev_desc *desc; 1512 1513 desc = calloc(1, sizeof(*desc)); 1514 if (desc == NULL) { 1515 return -ENOMEM; 1516 } 1517 1518 pthread_mutex_lock(&bdev->mutex); 1519 1520 if (write && (bdev->bdev_opened_for_write || bdev->claim_module)) { 1521 SPDK_ERRLOG("failed, %s already opened for write or claimed\n", bdev->name); 1522 free(desc); 1523 pthread_mutex_unlock(&bdev->mutex); 1524 return -EPERM; 1525 } 1526 1527 TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link); 1528 1529 if (write) { 1530 bdev->bdev_opened_for_write = true; 1531 } 1532 1533 desc->bdev = bdev; 1534 desc->remove_cb = remove_cb; 1535 desc->remove_ctx = remove_ctx; 1536 desc->write = write; 1537 *_desc = desc; 1538 1539 pthread_mutex_unlock(&bdev->mutex); 1540 1541 return 0; 1542 } 1543 1544 void 1545 spdk_bdev_close(struct spdk_bdev_desc *desc) 1546 { 1547 struct spdk_bdev *bdev = desc->bdev; 1548 bool do_unregister = false; 1549 1550 pthread_mutex_lock(&bdev->mutex); 1551 1552 if (desc->write) { 1553 assert(bdev->bdev_opened_for_write); 1554 bdev->bdev_opened_for_write = false; 1555 } 1556 1557 TAILQ_REMOVE(&bdev->open_descs, desc, link); 1558 free(desc); 1559 1560 if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) { 1561 do_unregister = true; 1562 } 1563 pthread_mutex_unlock(&bdev->mutex); 1564 1565 if (do_unregister == true) { 1566 spdk_bdev_unregister(bdev); 1567 } 1568 } 1569 1570 int 1571 spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 1572 struct spdk_bdev_module_if *module) 1573 { 1574 if (bdev->claim_module != NULL) { 1575 SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name, 1576 bdev->claim_module->name); 1577 return -EPERM; 1578 } 1579 1580 if ((!desc || !desc->write) && bdev->bdev_opened_for_write) { 1581 SPDK_ERRLOG("bdev %s already opened with write access\n", bdev->name); 1582 return -EPERM; 1583 } 1584 1585 if (desc && !desc->write) { 1586 bdev->bdev_opened_for_write = true; 1587 desc->write = true; 1588 } 1589 1590 bdev->claim_module = module; 1591 return 0; 1592 } 1593 1594 void 1595 spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) 1596 { 1597 assert(bdev->claim_module != NULL); 1598 bdev->claim_module = NULL; 1599 } 1600 1601 struct spdk_bdev * 1602 spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc) 1603 { 1604 return desc->bdev; 1605 } 1606 1607 void 1608 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1609 { 1610 struct iovec *iovs; 1611 int iovcnt; 1612 1613 if (bdev_io == NULL) { 1614 return; 1615 } 1616 1617 switch (bdev_io->type) { 1618 case SPDK_BDEV_IO_TYPE_READ: 1619 iovs = bdev_io->u.read.iovs; 1620 iovcnt = bdev_io->u.read.iovcnt; 1621 break; 1622 case SPDK_BDEV_IO_TYPE_WRITE: 1623 iovs = bdev_io->u.write.iovs; 1624 iovcnt = bdev_io->u.write.iovcnt; 1625 break; 1626 default: 1627 iovs = NULL; 1628 iovcnt = 0; 1629 break; 1630 } 1631 1632 if (iovp) { 1633 *iovp = iovs; 1634 } 1635 if (iovcntp) { 1636 *iovcntp = iovcnt; 1637 } 1638 } 1639 1640 void 1641 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1642 { 1643 /* 1644 * Modules with examine callbacks must be initialized first, so they are 1645 * ready to handle examine callbacks from later modules that will 1646 * register physical bdevs. 1647 */ 1648 if (bdev_module->examine != NULL) { 1649 TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1650 } else { 1651 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1652 } 1653 } 1654