1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include "spdk/env.h" 40 #include "spdk/io_channel.h" 41 #include "spdk/likely.h" 42 #include "spdk/queue.h" 43 #include "spdk/nvme_spec.h" 44 #include "spdk/scsi_spec.h" 45 46 #include "spdk_internal/bdev.h" 47 #include "spdk_internal/log.h" 48 #include "spdk/string.h" 49 50 #ifdef SPDK_CONFIG_VTUNE 51 #include "ittnotify.h" 52 #include "ittnotify_types.h" 53 int __itt_init_ittlib(const char *, __itt_group_id); 54 #endif 55 56 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 57 #define BUF_SMALL_POOL_SIZE 8192 58 #define BUF_LARGE_POOL_SIZE 1024 59 60 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 61 62 struct spdk_bdev_mgr { 63 struct spdk_mempool *bdev_io_pool; 64 65 struct spdk_mempool *buf_small_pool; 66 struct spdk_mempool *buf_large_pool; 67 68 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 69 70 TAILQ_HEAD(, spdk_bdev) bdevs; 71 72 spdk_bdev_poller_start_cb start_poller_fn; 73 spdk_bdev_poller_stop_cb stop_poller_fn; 74 75 bool init_complete; 76 bool module_init_complete; 77 78 #ifdef SPDK_CONFIG_VTUNE 79 __itt_domain *domain; 80 #endif 81 }; 82 83 static struct spdk_bdev_mgr g_bdev_mgr = { 84 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 85 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 86 .start_poller_fn = NULL, 87 .stop_poller_fn = NULL, 88 .init_complete = false, 89 .module_init_complete = false, 90 }; 91 92 static spdk_bdev_init_cb g_cb_fn = NULL; 93 static void *g_cb_arg = NULL; 94 95 96 struct spdk_bdev_mgmt_channel { 97 need_buf_tailq_t need_buf_small; 98 need_buf_tailq_t need_buf_large; 99 }; 100 101 struct spdk_bdev_desc { 102 struct spdk_bdev *bdev; 103 spdk_bdev_remove_cb_t remove_cb; 104 void *remove_ctx; 105 bool write; 106 TAILQ_ENTRY(spdk_bdev_desc) link; 107 }; 108 109 struct spdk_bdev_channel { 110 struct spdk_bdev *bdev; 111 112 /* The channel for the underlying device */ 113 struct spdk_io_channel *channel; 114 115 /* Channel for the bdev manager */ 116 struct spdk_io_channel *mgmt_channel; 117 118 struct spdk_bdev_io_stat stat; 119 120 /* 121 * Count of I/O submitted to bdev module and waiting for completion. 122 * Incremented before submit_request() is called on an spdk_bdev_io. 123 */ 124 uint64_t io_outstanding; 125 126 #ifdef SPDK_CONFIG_VTUNE 127 uint64_t start_tsc; 128 uint64_t interval_tsc; 129 __itt_string_handle *handle; 130 #endif 131 132 }; 133 134 struct spdk_bdev * 135 spdk_bdev_first(void) 136 { 137 struct spdk_bdev *bdev; 138 139 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 140 if (bdev) { 141 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 142 } 143 144 return bdev; 145 } 146 147 struct spdk_bdev * 148 spdk_bdev_next(struct spdk_bdev *prev) 149 { 150 struct spdk_bdev *bdev; 151 152 bdev = TAILQ_NEXT(prev, link); 153 if (bdev) { 154 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 155 } 156 157 return bdev; 158 } 159 160 static struct spdk_bdev * 161 _bdev_next_leaf(struct spdk_bdev *bdev) 162 { 163 while (bdev != NULL) { 164 if (TAILQ_EMPTY(&bdev->vbdevs)) { 165 return bdev; 166 } else { 167 bdev = TAILQ_NEXT(bdev, link); 168 } 169 } 170 171 return bdev; 172 } 173 174 struct spdk_bdev * 175 spdk_bdev_first_leaf(void) 176 { 177 struct spdk_bdev *bdev; 178 179 bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs)); 180 181 if (bdev) { 182 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 183 } 184 185 return bdev; 186 } 187 188 struct spdk_bdev * 189 spdk_bdev_next_leaf(struct spdk_bdev *prev) 190 { 191 struct spdk_bdev *bdev; 192 193 bdev = _bdev_next_leaf(TAILQ_NEXT(prev, link)); 194 195 if (bdev) { 196 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 197 } 198 199 return bdev; 200 } 201 202 struct spdk_bdev * 203 spdk_bdev_get_by_name(const char *bdev_name) 204 { 205 struct spdk_bdev *bdev = spdk_bdev_first(); 206 207 while (bdev != NULL) { 208 if (strcmp(bdev_name, bdev->name) == 0) { 209 return bdev; 210 } 211 bdev = spdk_bdev_next(bdev); 212 } 213 214 return NULL; 215 } 216 217 static void 218 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 219 { 220 assert(bdev_io->get_buf_cb != NULL); 221 assert(buf != NULL); 222 assert(bdev_io->u.read.iovs != NULL); 223 224 bdev_io->buf = buf; 225 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 226 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 227 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 228 } 229 230 static void 231 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 232 { 233 struct spdk_mempool *pool; 234 struct spdk_bdev_io *tmp; 235 void *buf; 236 need_buf_tailq_t *tailq; 237 uint64_t length; 238 struct spdk_bdev_mgmt_channel *ch; 239 240 assert(bdev_io->u.read.iovcnt == 1); 241 242 length = bdev_io->u.read.len; 243 buf = bdev_io->buf; 244 245 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 246 247 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 248 pool = g_bdev_mgr.buf_small_pool; 249 tailq = &ch->need_buf_small; 250 } else { 251 pool = g_bdev_mgr.buf_large_pool; 252 tailq = &ch->need_buf_large; 253 } 254 255 if (TAILQ_EMPTY(tailq)) { 256 spdk_mempool_put(pool, buf); 257 } else { 258 tmp = TAILQ_FIRST(tailq); 259 TAILQ_REMOVE(tailq, tmp, buf_link); 260 spdk_bdev_io_set_buf(tmp, buf); 261 } 262 } 263 264 void 265 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 266 { 267 uint64_t len = bdev_io->u.read.len; 268 struct spdk_mempool *pool; 269 need_buf_tailq_t *tailq; 270 void *buf = NULL; 271 struct spdk_bdev_mgmt_channel *ch; 272 273 assert(cb != NULL); 274 assert(bdev_io->u.read.iovs != NULL); 275 276 if (spdk_unlikely(bdev_io->u.read.iovs[0].iov_base != NULL)) { 277 /* Buffer already present */ 278 cb(bdev_io->ch->channel, bdev_io); 279 return; 280 } 281 282 ch = spdk_io_channel_get_ctx(bdev_io->ch->mgmt_channel); 283 284 bdev_io->get_buf_cb = cb; 285 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 286 pool = g_bdev_mgr.buf_small_pool; 287 tailq = &ch->need_buf_small; 288 } else { 289 pool = g_bdev_mgr.buf_large_pool; 290 tailq = &ch->need_buf_large; 291 } 292 293 buf = spdk_mempool_get(pool); 294 295 if (!buf) { 296 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 297 } else { 298 spdk_bdev_io_set_buf(bdev_io, buf); 299 } 300 } 301 302 static int 303 spdk_bdev_module_get_max_ctx_size(void) 304 { 305 struct spdk_bdev_module_if *bdev_module; 306 int max_bdev_module_size = 0; 307 308 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 309 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 310 max_bdev_module_size = bdev_module->get_ctx_size(); 311 } 312 } 313 314 return max_bdev_module_size; 315 } 316 317 void 318 spdk_bdev_config_text(FILE *fp) 319 { 320 struct spdk_bdev_module_if *bdev_module; 321 322 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 323 if (bdev_module->config_text) { 324 bdev_module->config_text(fp); 325 } 326 } 327 } 328 329 static int 330 spdk_bdev_mgmt_channel_create(void *io_device, void *ctx_buf) 331 { 332 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 333 334 TAILQ_INIT(&ch->need_buf_small); 335 TAILQ_INIT(&ch->need_buf_large); 336 337 return 0; 338 } 339 340 static void 341 spdk_bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf) 342 { 343 struct spdk_bdev_mgmt_channel *ch = ctx_buf; 344 345 if (!TAILQ_EMPTY(&ch->need_buf_small) || !TAILQ_EMPTY(&ch->need_buf_large)) { 346 SPDK_ERRLOG("Pending I/O list wasn't empty on channel destruction\n"); 347 } 348 } 349 350 static void 351 spdk_bdev_init_complete(int rc) 352 { 353 spdk_bdev_init_cb cb_fn = g_cb_fn; 354 void *cb_arg = g_cb_arg; 355 356 g_bdev_mgr.init_complete = true; 357 g_cb_fn = NULL; 358 g_cb_arg = NULL; 359 360 cb_fn(cb_arg, rc); 361 } 362 363 static void 364 spdk_bdev_module_init_complete(int rc) 365 { 366 struct spdk_bdev_module_if *m; 367 368 g_bdev_mgr.module_init_complete = true; 369 370 if (rc != 0) { 371 spdk_bdev_init_complete(rc); 372 } 373 374 /* 375 * Check all bdev modules for an examinations in progress. If any 376 * exist, return immediately since we cannot finish bdev subsystem 377 * initialization until all are completed. 378 */ 379 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 380 if (m->examine_in_progress > 0) { 381 return; 382 } 383 } 384 385 spdk_bdev_init_complete(0); 386 } 387 388 static int 389 spdk_bdev_modules_init(void) 390 { 391 struct spdk_bdev_module_if *module; 392 int rc; 393 394 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 395 rc = module->module_init(); 396 if (rc != 0) { 397 return rc; 398 } 399 } 400 401 return 0; 402 } 403 404 void 405 spdk_bdev_poller_start(struct spdk_bdev_poller **ppoller, 406 spdk_bdev_poller_fn fn, 407 void *arg, 408 uint32_t lcore, 409 uint64_t period_microseconds) 410 { 411 g_bdev_mgr.start_poller_fn(ppoller, fn, arg, lcore, period_microseconds); 412 } 413 414 void 415 spdk_bdev_poller_stop(struct spdk_bdev_poller **ppoller) 416 { 417 g_bdev_mgr.stop_poller_fn(ppoller); 418 } 419 420 void 421 spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg, 422 spdk_bdev_poller_start_cb start_poller_fn, 423 spdk_bdev_poller_stop_cb stop_poller_fn) 424 { 425 int cache_size; 426 int rc = 0; 427 char mempool_name[32]; 428 429 assert(cb_fn != NULL); 430 431 g_cb_fn = cb_fn; 432 g_cb_arg = cb_arg; 433 434 g_bdev_mgr.start_poller_fn = start_poller_fn; 435 g_bdev_mgr.stop_poller_fn = stop_poller_fn; 436 437 snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid()); 438 439 g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name, 440 SPDK_BDEV_IO_POOL_SIZE, 441 sizeof(struct spdk_bdev_io) + 442 spdk_bdev_module_get_max_ctx_size(), 443 64, 444 SPDK_ENV_SOCKET_ID_ANY); 445 446 if (g_bdev_mgr.bdev_io_pool == NULL) { 447 SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n"); 448 spdk_bdev_module_init_complete(-1); 449 return; 450 } 451 452 /** 453 * Ensure no more than half of the total buffers end up local caches, by 454 * using spdk_env_get_core_count() to determine how many local caches we need 455 * to account for. 456 */ 457 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 458 snprintf(mempool_name, sizeof(mempool_name), "buf_small_pool_%d", getpid()); 459 460 g_bdev_mgr.buf_small_pool = spdk_mempool_create(mempool_name, 461 BUF_SMALL_POOL_SIZE, 462 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 463 cache_size, 464 SPDK_ENV_SOCKET_ID_ANY); 465 if (!g_bdev_mgr.buf_small_pool) { 466 SPDK_ERRLOG("create rbuf small pool failed\n"); 467 spdk_bdev_module_init_complete(-1); 468 return; 469 } 470 471 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 472 snprintf(mempool_name, sizeof(mempool_name), "buf_large_pool_%d", getpid()); 473 474 g_bdev_mgr.buf_large_pool = spdk_mempool_create(mempool_name, 475 BUF_LARGE_POOL_SIZE, 476 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 477 cache_size, 478 SPDK_ENV_SOCKET_ID_ANY); 479 if (!g_bdev_mgr.buf_large_pool) { 480 SPDK_ERRLOG("create rbuf large pool failed\n"); 481 spdk_bdev_module_init_complete(-1); 482 return; 483 } 484 485 #ifdef SPDK_CONFIG_VTUNE 486 g_bdev_mgr.domain = __itt_domain_create("spdk_bdev"); 487 #endif 488 489 spdk_io_device_register(&g_bdev_mgr, spdk_bdev_mgmt_channel_create, 490 spdk_bdev_mgmt_channel_destroy, 491 sizeof(struct spdk_bdev_mgmt_channel)); 492 493 rc = spdk_bdev_modules_init(); 494 spdk_bdev_module_init_complete(rc); 495 } 496 497 int 498 spdk_bdev_finish(void) 499 { 500 struct spdk_bdev_module_if *bdev_module; 501 502 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 503 if (bdev_module->module_fini) { 504 bdev_module->module_fini(); 505 } 506 } 507 508 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 509 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 510 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 511 SPDK_BDEV_IO_POOL_SIZE); 512 } 513 514 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 515 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 516 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 517 BUF_SMALL_POOL_SIZE); 518 assert(false); 519 } 520 521 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 522 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 523 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 524 BUF_LARGE_POOL_SIZE); 525 assert(false); 526 } 527 528 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 529 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 530 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 531 532 spdk_io_device_unregister(&g_bdev_mgr, NULL); 533 534 return 0; 535 } 536 537 struct spdk_bdev_io * 538 spdk_bdev_get_io(void) 539 { 540 struct spdk_bdev_io *bdev_io; 541 542 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 543 if (!bdev_io) { 544 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 545 abort(); 546 } 547 548 memset(bdev_io, 0, sizeof(*bdev_io)); 549 550 return bdev_io; 551 } 552 553 static void 554 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 555 { 556 if (!bdev_io) { 557 return; 558 } 559 560 if (bdev_io->buf != NULL) { 561 spdk_bdev_io_put_buf(bdev_io); 562 } 563 564 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 565 } 566 567 static void 568 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 569 { 570 struct spdk_io_channel *ch; 571 572 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 573 574 ch = bdev_io->ch->channel; 575 576 bdev_io->ch->io_outstanding++; 577 bdev_io->in_submit_request = true; 578 bdev->fn_table->submit_request(ch, bdev_io); 579 bdev_io->in_submit_request = false; 580 } 581 582 static int 583 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 584 { 585 struct spdk_bdev *bdev = bdev_io->bdev; 586 587 __submit_request(bdev, bdev_io); 588 return 0; 589 } 590 591 void 592 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev_desc *new_bdev_desc) 593 { 594 struct spdk_bdev *new_bdev = new_bdev_desc->bdev; 595 596 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 597 bdev_io->bdev = new_bdev; 598 599 /* 600 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 601 * being switched, they need to be reinitialized. 602 */ 603 bdev_io->gencnt = new_bdev->gencnt; 604 605 /* 606 * This bdev_io was already submitted so decrement io_outstanding to ensure it 607 * does not get double-counted. 608 */ 609 assert(bdev_io->ch->io_outstanding > 0); 610 bdev_io->ch->io_outstanding--; 611 __submit_request(new_bdev, bdev_io); 612 } 613 614 static void 615 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 616 struct spdk_bdev *bdev, void *cb_arg, 617 spdk_bdev_io_completion_cb cb) 618 { 619 bdev_io->bdev = bdev; 620 bdev_io->caller_ctx = cb_arg; 621 bdev_io->cb = cb; 622 bdev_io->gencnt = bdev->gencnt; 623 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 624 bdev_io->in_submit_request = false; 625 } 626 627 bool 628 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 629 { 630 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 631 } 632 633 int 634 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 635 { 636 if (bdev->fn_table->dump_config_json) { 637 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 638 } 639 640 return 0; 641 } 642 643 static int 644 spdk_bdev_channel_create(void *io_device, void *ctx_buf) 645 { 646 struct spdk_bdev *bdev = io_device; 647 struct spdk_bdev_channel *ch = ctx_buf; 648 649 ch->bdev = io_device; 650 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt); 651 ch->mgmt_channel = spdk_get_io_channel(&g_bdev_mgr); 652 memset(&ch->stat, 0, sizeof(ch->stat)); 653 ch->io_outstanding = 0; 654 655 #ifdef SPDK_CONFIG_VTUNE 656 { 657 char *name; 658 __itt_init_ittlib(NULL, 0); 659 name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch); 660 if (!name) { 661 return -1; 662 } 663 ch->handle = __itt_string_handle_create(name); 664 free(name); 665 ch->start_tsc = spdk_get_ticks(); 666 ch->interval_tsc = spdk_get_ticks_hz() / 100; 667 } 668 #endif 669 670 return 0; 671 } 672 673 static void 674 _spdk_bdev_abort_io(need_buf_tailq_t *queue, struct spdk_bdev_channel *ch) 675 { 676 struct spdk_bdev_io *bdev_io, *tmp; 677 678 TAILQ_FOREACH_SAFE(bdev_io, queue, buf_link, tmp) { 679 if (bdev_io->ch == ch) { 680 TAILQ_REMOVE(queue, bdev_io, buf_link); 681 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 682 } 683 } 684 } 685 686 static void 687 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 688 { 689 struct spdk_bdev_channel *ch = ctx_buf; 690 struct spdk_bdev_mgmt_channel *mgmt_channel; 691 692 mgmt_channel = spdk_io_channel_get_ctx(ch->mgmt_channel); 693 694 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, ch); 695 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, ch); 696 697 spdk_put_io_channel(ch->channel); 698 spdk_put_io_channel(ch->mgmt_channel); 699 assert(ch->io_outstanding == 0); 700 } 701 702 struct spdk_io_channel * 703 spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc) 704 { 705 return spdk_get_io_channel(desc->bdev); 706 } 707 708 const char * 709 spdk_bdev_get_name(const struct spdk_bdev *bdev) 710 { 711 return bdev->name; 712 } 713 714 const char * 715 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 716 { 717 return bdev->product_name; 718 } 719 720 uint32_t 721 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 722 { 723 return bdev->blocklen; 724 } 725 726 uint64_t 727 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 728 { 729 return bdev->blockcnt; 730 } 731 732 size_t 733 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 734 { 735 /* TODO: push this logic down to the bdev modules */ 736 if (bdev->need_aligned_buffer) { 737 return bdev->blocklen; 738 } 739 740 return 1; 741 } 742 743 uint32_t 744 spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev) 745 { 746 return bdev->optimal_io_boundary; 747 } 748 749 bool 750 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 751 { 752 return bdev->write_cache; 753 } 754 755 static bool 756 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 757 { 758 /* Return failure if offset is not a multiple of bdev->blocklen */ 759 if (offset % bdev->blocklen) { 760 return false; 761 } 762 763 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 764 if (nbytes % bdev->blocklen) { 765 return false; 766 } 767 768 /* Return failure if offset + nbytes is less than offset; indicates there 769 * has been an overflow and hence the offset has been wrapped around */ 770 if (offset + nbytes < offset) { 771 return false; 772 } 773 774 /* Return failure if offset + nbytes exceeds the size of the bdev */ 775 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 776 return false; 777 } 778 779 return true; 780 } 781 782 int 783 spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 784 void *buf, uint64_t offset, uint64_t nbytes, 785 spdk_bdev_io_completion_cb cb, void *cb_arg) 786 { 787 struct spdk_bdev *bdev = desc->bdev; 788 struct spdk_bdev_io *bdev_io; 789 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 790 int rc; 791 792 if (!spdk_bdev_io_valid(bdev, offset, nbytes)) { 793 return -EINVAL; 794 } 795 796 bdev_io = spdk_bdev_get_io(); 797 if (!bdev_io) { 798 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 799 return -ENOMEM; 800 } 801 802 bdev_io->ch = channel; 803 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 804 bdev_io->u.read.iov.iov_base = buf; 805 bdev_io->u.read.iov.iov_len = nbytes; 806 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 807 bdev_io->u.read.iovcnt = 1; 808 bdev_io->u.read.len = nbytes; 809 bdev_io->u.read.offset = offset; 810 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 811 812 rc = spdk_bdev_io_submit(bdev_io); 813 if (rc < 0) { 814 spdk_bdev_put_io(bdev_io); 815 return rc; 816 } 817 818 return 0; 819 } 820 821 int 822 spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 823 struct iovec *iov, int iovcnt, 824 uint64_t offset, uint64_t nbytes, 825 spdk_bdev_io_completion_cb cb, void *cb_arg) 826 { 827 struct spdk_bdev *bdev = desc->bdev; 828 struct spdk_bdev_io *bdev_io; 829 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 830 int rc; 831 832 if (!spdk_bdev_io_valid(bdev, offset, nbytes)) { 833 return -EINVAL; 834 } 835 836 bdev_io = spdk_bdev_get_io(); 837 if (!bdev_io) { 838 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 839 return -ENOMEM; 840 } 841 842 bdev_io->ch = channel; 843 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 844 bdev_io->u.read.iovs = iov; 845 bdev_io->u.read.iovcnt = iovcnt; 846 bdev_io->u.read.len = nbytes; 847 bdev_io->u.read.offset = offset; 848 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 849 850 rc = spdk_bdev_io_submit(bdev_io); 851 if (rc < 0) { 852 spdk_bdev_put_io(bdev_io); 853 return rc; 854 } 855 856 return 0; 857 } 858 859 int 860 spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 861 void *buf, uint64_t offset, uint64_t nbytes, 862 spdk_bdev_io_completion_cb cb, void *cb_arg) 863 { 864 struct spdk_bdev *bdev = desc->bdev; 865 struct spdk_bdev_io *bdev_io; 866 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 867 int rc; 868 869 if (!desc->write) { 870 return -EBADF; 871 } 872 873 if (!spdk_bdev_io_valid(bdev, offset, nbytes)) { 874 return -EINVAL; 875 } 876 877 bdev_io = spdk_bdev_get_io(); 878 if (!bdev_io) { 879 SPDK_ERRLOG("bdev_io memory allocation failed duing write\n"); 880 return -ENOMEM; 881 } 882 883 bdev_io->ch = channel; 884 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 885 bdev_io->u.write.iov.iov_base = buf; 886 bdev_io->u.write.iov.iov_len = nbytes; 887 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 888 bdev_io->u.write.iovcnt = 1; 889 bdev_io->u.write.len = nbytes; 890 bdev_io->u.write.offset = offset; 891 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 892 893 rc = spdk_bdev_io_submit(bdev_io); 894 if (rc < 0) { 895 spdk_bdev_put_io(bdev_io); 896 return rc; 897 } 898 899 return 0; 900 } 901 902 int 903 spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 904 struct iovec *iov, int iovcnt, 905 uint64_t offset, uint64_t len, 906 spdk_bdev_io_completion_cb cb, void *cb_arg) 907 { 908 struct spdk_bdev *bdev = desc->bdev; 909 struct spdk_bdev_io *bdev_io; 910 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 911 int rc; 912 913 if (!desc->write) { 914 return -EBADF; 915 } 916 917 if (!spdk_bdev_io_valid(bdev, offset, len)) { 918 return -EINVAL; 919 } 920 921 bdev_io = spdk_bdev_get_io(); 922 if (!bdev_io) { 923 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 924 return -ENOMEM; 925 } 926 927 bdev_io->ch = channel; 928 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 929 bdev_io->u.write.iovs = iov; 930 bdev_io->u.write.iovcnt = iovcnt; 931 bdev_io->u.write.len = len; 932 bdev_io->u.write.offset = offset; 933 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 934 935 rc = spdk_bdev_io_submit(bdev_io); 936 if (rc < 0) { 937 spdk_bdev_put_io(bdev_io); 938 return rc; 939 } 940 941 return 0; 942 } 943 944 int 945 spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 946 uint64_t offset, uint64_t len, 947 spdk_bdev_io_completion_cb cb, void *cb_arg) 948 { 949 int rc; 950 struct spdk_bdev *bdev = desc->bdev; 951 struct spdk_bdev_io *bdev_io; 952 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 953 954 if (!spdk_bdev_io_valid(bdev, offset, len)) { 955 return -EINVAL; 956 } 957 958 bdev_io = spdk_bdev_get_io(); 959 if (!bdev_io) { 960 SPDK_ERRLOG("bdev_io memory allocation failed duing write_zeroes\n"); 961 return -ENOMEM; 962 } 963 964 bdev_io->ch = channel; 965 bdev_io->u.write.len = len; 966 bdev_io->u.write.offset = offset; 967 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES; 968 969 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 970 971 rc = spdk_bdev_io_submit(bdev_io); 972 if (rc < 0) { 973 spdk_bdev_put_io(bdev_io); 974 return rc; 975 } 976 977 return 0; 978 } 979 980 int 981 spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 982 uint64_t offset, uint64_t nbytes, 983 spdk_bdev_io_completion_cb cb, void *cb_arg) 984 { 985 struct spdk_bdev *bdev = desc->bdev; 986 struct spdk_bdev_io *bdev_io; 987 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 988 int rc; 989 990 if (!desc->write) { 991 return -EBADF; 992 } 993 994 if (!spdk_bdev_io_valid(bdev, offset, nbytes)) { 995 return -EINVAL; 996 } 997 998 if (nbytes == 0) { 999 SPDK_ERRLOG("Can't unmap 0 bytes\n"); 1000 return -EINVAL; 1001 } 1002 1003 bdev_io = spdk_bdev_get_io(); 1004 if (!bdev_io) { 1005 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 1006 return -ENOMEM; 1007 } 1008 1009 bdev_io->ch = channel; 1010 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 1011 bdev_io->u.unmap.offset = offset; 1012 bdev_io->u.unmap.len = nbytes; 1013 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1014 1015 rc = spdk_bdev_io_submit(bdev_io); 1016 if (rc < 0) { 1017 spdk_bdev_put_io(bdev_io); 1018 return rc; 1019 } 1020 1021 return 0; 1022 } 1023 1024 int 1025 spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1026 uint64_t offset, uint64_t length, 1027 spdk_bdev_io_completion_cb cb, void *cb_arg) 1028 { 1029 struct spdk_bdev *bdev = desc->bdev; 1030 struct spdk_bdev_io *bdev_io; 1031 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1032 int rc; 1033 1034 if (!desc->write) { 1035 return -EBADF; 1036 } 1037 1038 bdev_io = spdk_bdev_get_io(); 1039 if (!bdev_io) { 1040 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 1041 return -ENOMEM; 1042 } 1043 1044 bdev_io->ch = channel; 1045 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 1046 bdev_io->u.flush.offset = offset; 1047 bdev_io->u.flush.length = length; 1048 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1049 1050 rc = spdk_bdev_io_submit(bdev_io); 1051 if (rc < 0) { 1052 spdk_bdev_put_io(bdev_io); 1053 return rc; 1054 } 1055 1056 return 0; 1057 } 1058 1059 static void 1060 _spdk_bdev_reset_dev(void *io_device, void *ctx) 1061 { 1062 struct spdk_bdev_io *bdev_io = ctx; 1063 int rc; 1064 1065 rc = spdk_bdev_io_submit(bdev_io); 1066 if (rc < 0) { 1067 spdk_bdev_put_io(bdev_io); 1068 SPDK_ERRLOG("reset failed\n"); 1069 spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED); 1070 } 1071 } 1072 1073 static void 1074 _spdk_bdev_reset_abort_channel(void *io_device, struct spdk_io_channel *ch, 1075 void *ctx) 1076 { 1077 struct spdk_bdev_channel *channel; 1078 struct spdk_bdev_mgmt_channel *mgmt_channel; 1079 1080 channel = spdk_io_channel_get_ctx(ch); 1081 mgmt_channel = spdk_io_channel_get_ctx(channel->mgmt_channel); 1082 1083 _spdk_bdev_abort_io(&mgmt_channel->need_buf_small, channel); 1084 _spdk_bdev_abort_io(&mgmt_channel->need_buf_large, channel); 1085 } 1086 1087 static void 1088 _spdk_bdev_start_reset(void *ctx) 1089 { 1090 struct spdk_bdev_io *bdev_io = ctx; 1091 1092 spdk_for_each_channel(bdev_io->bdev, _spdk_bdev_reset_abort_channel, 1093 bdev_io, _spdk_bdev_reset_dev); 1094 } 1095 1096 static void 1097 _spdk_bdev_start_next_reset(struct spdk_bdev *bdev) 1098 { 1099 struct spdk_bdev_io *bdev_io; 1100 struct spdk_thread *thread; 1101 1102 pthread_mutex_lock(&bdev->mutex); 1103 1104 if (bdev->reset_in_progress || TAILQ_EMPTY(&bdev->queued_resets)) { 1105 pthread_mutex_unlock(&bdev->mutex); 1106 return; 1107 } else { 1108 bdev_io = TAILQ_FIRST(&bdev->queued_resets); 1109 TAILQ_REMOVE(&bdev->queued_resets, bdev_io, link); 1110 bdev->reset_in_progress = true; 1111 thread = spdk_io_channel_get_thread(bdev_io->ch->channel); 1112 spdk_thread_send_msg(thread, _spdk_bdev_start_reset, bdev_io); 1113 } 1114 1115 pthread_mutex_unlock(&bdev->mutex); 1116 } 1117 1118 int 1119 spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1120 spdk_bdev_io_completion_cb cb, void *cb_arg) 1121 { 1122 struct spdk_bdev *bdev = desc->bdev; 1123 struct spdk_bdev_io *bdev_io; 1124 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1125 1126 bdev_io = spdk_bdev_get_io(); 1127 if (!bdev_io) { 1128 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 1129 return -ENOMEM;; 1130 } 1131 1132 bdev_io->ch = channel; 1133 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 1134 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1135 1136 pthread_mutex_lock(&bdev->mutex); 1137 TAILQ_INSERT_TAIL(&bdev->queued_resets, bdev_io, link); 1138 pthread_mutex_unlock(&bdev->mutex); 1139 1140 _spdk_bdev_start_next_reset(bdev); 1141 1142 return 0; 1143 } 1144 1145 void 1146 spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 1147 struct spdk_bdev_io_stat *stat) 1148 { 1149 #ifdef SPDK_CONFIG_VTUNE 1150 SPDK_ERRLOG("Calling spdk_bdev_get_io_stat is not allowed when VTune integration is enabled.\n"); 1151 memset(stat, 0, sizeof(*stat)); 1152 return; 1153 #endif 1154 1155 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1156 1157 *stat = channel->stat; 1158 memset(&channel->stat, 0, sizeof(channel->stat)); 1159 } 1160 1161 int 1162 spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1163 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1164 spdk_bdev_io_completion_cb cb, void *cb_arg) 1165 { 1166 struct spdk_bdev *bdev = desc->bdev; 1167 struct spdk_bdev_io *bdev_io; 1168 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1169 int rc; 1170 1171 if (!desc->write) { 1172 return -EBADF; 1173 } 1174 1175 bdev_io = spdk_bdev_get_io(); 1176 if (!bdev_io) { 1177 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1178 return -ENOMEM; 1179 } 1180 1181 bdev_io->ch = channel; 1182 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN; 1183 bdev_io->u.nvme_passthru.cmd = *cmd; 1184 bdev_io->u.nvme_passthru.buf = buf; 1185 bdev_io->u.nvme_passthru.nbytes = nbytes; 1186 1187 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1188 1189 rc = spdk_bdev_io_submit(bdev_io); 1190 if (rc < 0) { 1191 spdk_bdev_put_io(bdev_io); 1192 return rc; 1193 } 1194 1195 return 0; 1196 } 1197 1198 int 1199 spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, 1200 const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, 1201 spdk_bdev_io_completion_cb cb, void *cb_arg) 1202 { 1203 struct spdk_bdev *bdev = desc->bdev; 1204 struct spdk_bdev_io *bdev_io; 1205 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 1206 int rc; 1207 1208 if (!desc->write) { 1209 /* 1210 * Do not try to parse the NVMe command - we could maybe use bits in the opcode 1211 * to easily determine if the command is a read or write, but for now just 1212 * do not allow io_passthru with a read-only descriptor. 1213 */ 1214 return -EBADF; 1215 } 1216 1217 bdev_io = spdk_bdev_get_io(); 1218 if (!bdev_io) { 1219 SPDK_ERRLOG("bdev_io memory allocation failed during nvme_admin_passthru\n"); 1220 return -ENOMEM; 1221 } 1222 1223 bdev_io->ch = channel; 1224 bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO; 1225 bdev_io->u.nvme_passthru.cmd = *cmd; 1226 bdev_io->u.nvme_passthru.buf = buf; 1227 bdev_io->u.nvme_passthru.nbytes = nbytes; 1228 1229 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 1230 1231 rc = spdk_bdev_io_submit(bdev_io); 1232 if (rc < 0) { 1233 spdk_bdev_put_io(bdev_io); 1234 return rc; 1235 } 1236 1237 return 0; 1238 } 1239 1240 int 1241 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 1242 { 1243 if (!bdev_io) { 1244 SPDK_ERRLOG("bdev_io is NULL\n"); 1245 return -1; 1246 } 1247 1248 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 1249 SPDK_ERRLOG("bdev_io is in pending state\n"); 1250 assert(false); 1251 return -1; 1252 } 1253 1254 spdk_bdev_put_io(bdev_io); 1255 1256 return 0; 1257 } 1258 1259 static void 1260 _spdk_bdev_io_complete(void *ctx) 1261 { 1262 struct spdk_bdev_io *bdev_io = ctx; 1263 1264 assert(bdev_io->cb != NULL); 1265 bdev_io->cb(bdev_io, bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS, bdev_io->caller_ctx); 1266 } 1267 1268 void 1269 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 1270 { 1271 bdev_io->status = status; 1272 1273 assert(bdev_io->ch->io_outstanding > 0); 1274 bdev_io->ch->io_outstanding--; 1275 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1276 /* Successful reset */ 1277 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1278 /* Increase the bdev generation */ 1279 bdev_io->bdev->gencnt++; 1280 } 1281 bdev_io->bdev->reset_in_progress = false; 1282 _spdk_bdev_start_next_reset(bdev_io->bdev); 1283 } else { 1284 /* 1285 * Check the gencnt, to see if this I/O was issued before the most 1286 * recent reset. If the gencnt is not equal, then just free the I/O 1287 * without calling the callback, since the caller will have already 1288 * freed its context for this I/O. 1289 */ 1290 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 1291 spdk_bdev_put_io(bdev_io); 1292 return; 1293 } 1294 } 1295 1296 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1297 switch (bdev_io->type) { 1298 case SPDK_BDEV_IO_TYPE_READ: 1299 bdev_io->ch->stat.bytes_read += bdev_io->u.read.len; 1300 bdev_io->ch->stat.num_read_ops++; 1301 break; 1302 case SPDK_BDEV_IO_TYPE_WRITE: 1303 bdev_io->ch->stat.bytes_written += bdev_io->u.write.len; 1304 bdev_io->ch->stat.num_write_ops++; 1305 break; 1306 default: 1307 break; 1308 } 1309 } 1310 1311 #ifdef SPDK_CONFIG_VTUNE 1312 uint64_t now_tsc = spdk_get_ticks(); 1313 if (now_tsc > (bdev_io->ch->start_tsc + bdev_io->ch->interval_tsc)) { 1314 uint64_t data[5]; 1315 1316 data[0] = bdev_io->ch->stat.num_read_ops; 1317 data[1] = bdev_io->ch->stat.bytes_read; 1318 data[2] = bdev_io->ch->stat.num_write_ops; 1319 data[3] = bdev_io->ch->stat.bytes_written; 1320 data[4] = bdev_io->bdev->fn_table->get_spin_time ? 1321 bdev_io->bdev->fn_table->get_spin_time(bdev_io->ch->channel) : 0; 1322 1323 __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->ch->handle, 1324 __itt_metadata_u64, 5, data); 1325 1326 memset(&bdev_io->ch->stat, 0, sizeof(bdev_io->ch->stat)); 1327 bdev_io->ch->start_tsc = now_tsc; 1328 } 1329 #endif 1330 1331 if (bdev_io->in_submit_request || bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 1332 /* 1333 * Defer completion to avoid potential infinite recursion if the 1334 * user's completion callback issues a new I/O. 1335 */ 1336 spdk_thread_send_msg(spdk_io_channel_get_thread(bdev_io->ch->channel), 1337 _spdk_bdev_io_complete, bdev_io); 1338 } else { 1339 _spdk_bdev_io_complete(bdev_io); 1340 } 1341 } 1342 1343 void 1344 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 1345 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 1346 { 1347 if (sc == SPDK_SCSI_STATUS_GOOD) { 1348 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1349 } else { 1350 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 1351 bdev_io->error.scsi.sc = sc; 1352 bdev_io->error.scsi.sk = sk; 1353 bdev_io->error.scsi.asc = asc; 1354 bdev_io->error.scsi.ascq = ascq; 1355 } 1356 1357 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1358 } 1359 1360 void 1361 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 1362 int *sc, int *sk, int *asc, int *ascq) 1363 { 1364 assert(sc != NULL); 1365 assert(sk != NULL); 1366 assert(asc != NULL); 1367 assert(ascq != NULL); 1368 1369 switch (bdev_io->status) { 1370 case SPDK_BDEV_IO_STATUS_SUCCESS: 1371 *sc = SPDK_SCSI_STATUS_GOOD; 1372 *sk = SPDK_SCSI_SENSE_NO_SENSE; 1373 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1374 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1375 break; 1376 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 1377 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 1378 break; 1379 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 1380 *sc = bdev_io->error.scsi.sc; 1381 *sk = bdev_io->error.scsi.sk; 1382 *asc = bdev_io->error.scsi.asc; 1383 *ascq = bdev_io->error.scsi.ascq; 1384 break; 1385 default: 1386 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 1387 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 1388 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 1389 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 1390 break; 1391 } 1392 } 1393 1394 void 1395 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 1396 { 1397 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 1398 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1399 } else { 1400 bdev_io->error.nvme.sct = sct; 1401 bdev_io->error.nvme.sc = sc; 1402 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1403 } 1404 1405 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1406 } 1407 1408 void 1409 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1410 { 1411 assert(sct != NULL); 1412 assert(sc != NULL); 1413 1414 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1415 *sct = bdev_io->error.nvme.sct; 1416 *sc = bdev_io->error.nvme.sc; 1417 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1418 *sct = SPDK_NVME_SCT_GENERIC; 1419 *sc = SPDK_NVME_SC_SUCCESS; 1420 } else { 1421 *sct = SPDK_NVME_SCT_GENERIC; 1422 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1423 } 1424 } 1425 1426 static void 1427 _spdk_bdev_register(struct spdk_bdev *bdev) 1428 { 1429 struct spdk_bdev_module_if *module; 1430 1431 assert(bdev->module != NULL); 1432 1433 bdev->status = SPDK_BDEV_STATUS_READY; 1434 1435 /* initialize the reset generation value to zero */ 1436 bdev->gencnt = 0; 1437 TAILQ_INIT(&bdev->open_descs); 1438 bdev->bdev_opened_for_write = false; 1439 1440 TAILQ_INIT(&bdev->vbdevs); 1441 TAILQ_INIT(&bdev->base_bdevs); 1442 1443 bdev->reset_in_progress = false; 1444 TAILQ_INIT(&bdev->queued_resets); 1445 1446 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1447 sizeof(struct spdk_bdev_channel)); 1448 1449 pthread_mutex_init(&bdev->mutex, NULL); 1450 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 1451 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1452 1453 TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, tailq) { 1454 if (module->examine) { 1455 module->examine_in_progress++; 1456 module->examine(bdev); 1457 } 1458 } 1459 } 1460 1461 void 1462 spdk_bdev_register(struct spdk_bdev *bdev) 1463 { 1464 _spdk_bdev_register(bdev); 1465 } 1466 1467 void 1468 spdk_vbdev_register(struct spdk_bdev *vbdev, struct spdk_bdev **base_bdevs, int base_bdev_count) 1469 { 1470 int i; 1471 1472 _spdk_bdev_register(vbdev); 1473 for (i = 0; i < base_bdev_count; i++) { 1474 assert(base_bdevs[i] != NULL); 1475 TAILQ_INSERT_TAIL(&vbdev->base_bdevs, base_bdevs[i], base_bdev_link); 1476 TAILQ_INSERT_TAIL(&base_bdevs[i]->vbdevs, vbdev, vbdev_link); 1477 } 1478 } 1479 1480 void 1481 spdk_bdev_unregister(struct spdk_bdev *bdev) 1482 { 1483 struct spdk_bdev_desc *desc, *tmp; 1484 int rc; 1485 bool do_destruct = true; 1486 1487 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1488 1489 pthread_mutex_lock(&bdev->mutex); 1490 1491 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1492 1493 TAILQ_FOREACH_SAFE(desc, &bdev->open_descs, link, tmp) { 1494 if (desc->remove_cb) { 1495 pthread_mutex_unlock(&bdev->mutex); 1496 do_destruct = false; 1497 desc->remove_cb(desc->remove_ctx); 1498 pthread_mutex_lock(&bdev->mutex); 1499 } 1500 } 1501 1502 if (!do_destruct) { 1503 pthread_mutex_unlock(&bdev->mutex); 1504 return; 1505 } 1506 1507 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1508 pthread_mutex_unlock(&bdev->mutex); 1509 1510 pthread_mutex_destroy(&bdev->mutex); 1511 1512 spdk_io_device_unregister(bdev, NULL); 1513 1514 rc = bdev->fn_table->destruct(bdev->ctxt); 1515 if (rc < 0) { 1516 SPDK_ERRLOG("destruct failed\n"); 1517 } 1518 } 1519 1520 void 1521 spdk_vbdev_unregister(struct spdk_bdev *vbdev) 1522 { 1523 struct spdk_bdev *base_bdev; 1524 1525 assert(!TAILQ_EMPTY(&vbdev->base_bdevs)); 1526 TAILQ_FOREACH(base_bdev, &vbdev->base_bdevs, base_bdev_link) { 1527 TAILQ_REMOVE(&base_bdev->vbdevs, vbdev, vbdev_link); 1528 } 1529 spdk_bdev_unregister(vbdev); 1530 } 1531 1532 void 1533 spdk_bdev_module_examine_done(struct spdk_bdev_module_if *module) 1534 { 1535 struct spdk_bdev_module_if *m; 1536 1537 assert(module->examine_in_progress > 0); 1538 module->examine_in_progress--; 1539 1540 /* 1541 * Check all bdev modules for an examinations in progress. If any 1542 * exist, return immediately since we cannot finish bdev subsystem 1543 * initialization until all are completed. 1544 */ 1545 TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, tailq) { 1546 if (m->examine_in_progress > 0) { 1547 return; 1548 } 1549 } 1550 1551 if (g_bdev_mgr.module_init_complete && !g_bdev_mgr.init_complete) { 1552 /* 1553 * Modules already finished initialization - now that all 1554 * the bdev moduless have finished their asynchronous I/O 1555 * processing, the entire bdev layer can be marked as complete. 1556 */ 1557 spdk_bdev_init_complete(0); 1558 } 1559 } 1560 1561 int 1562 spdk_bdev_open(struct spdk_bdev *bdev, bool write, spdk_bdev_remove_cb_t remove_cb, 1563 void *remove_ctx, struct spdk_bdev_desc **_desc) 1564 { 1565 struct spdk_bdev_desc *desc; 1566 1567 desc = calloc(1, sizeof(*desc)); 1568 if (desc == NULL) { 1569 return -ENOMEM; 1570 } 1571 1572 pthread_mutex_lock(&bdev->mutex); 1573 1574 if (write && (bdev->bdev_opened_for_write || bdev->claim_module)) { 1575 SPDK_ERRLOG("failed, %s already opened for write or claimed\n", bdev->name); 1576 free(desc); 1577 pthread_mutex_unlock(&bdev->mutex); 1578 return -EPERM; 1579 } 1580 1581 TAILQ_INSERT_TAIL(&bdev->open_descs, desc, link); 1582 1583 if (write) { 1584 bdev->bdev_opened_for_write = true; 1585 } 1586 1587 desc->bdev = bdev; 1588 desc->remove_cb = remove_cb; 1589 desc->remove_ctx = remove_ctx; 1590 desc->write = write; 1591 *_desc = desc; 1592 1593 pthread_mutex_unlock(&bdev->mutex); 1594 1595 return 0; 1596 } 1597 1598 void 1599 spdk_bdev_close(struct spdk_bdev_desc *desc) 1600 { 1601 struct spdk_bdev *bdev = desc->bdev; 1602 bool do_unregister = false; 1603 1604 pthread_mutex_lock(&bdev->mutex); 1605 1606 if (desc->write) { 1607 assert(bdev->bdev_opened_for_write); 1608 bdev->bdev_opened_for_write = false; 1609 } 1610 1611 TAILQ_REMOVE(&bdev->open_descs, desc, link); 1612 free(desc); 1613 1614 if (bdev->status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->open_descs)) { 1615 do_unregister = true; 1616 } 1617 pthread_mutex_unlock(&bdev->mutex); 1618 1619 if (do_unregister == true) { 1620 spdk_bdev_unregister(bdev); 1621 } 1622 } 1623 1624 int 1625 spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc, 1626 struct spdk_bdev_module_if *module) 1627 { 1628 if (bdev->claim_module != NULL) { 1629 SPDK_ERRLOG("bdev %s already claimed by module %s\n", bdev->name, 1630 bdev->claim_module->name); 1631 return -EPERM; 1632 } 1633 1634 if ((!desc || !desc->write) && bdev->bdev_opened_for_write) { 1635 SPDK_ERRLOG("bdev %s already opened with write access\n", bdev->name); 1636 return -EPERM; 1637 } 1638 1639 if (desc && !desc->write) { 1640 bdev->bdev_opened_for_write = true; 1641 desc->write = true; 1642 } 1643 1644 bdev->claim_module = module; 1645 return 0; 1646 } 1647 1648 void 1649 spdk_bdev_module_release_bdev(struct spdk_bdev *bdev) 1650 { 1651 assert(bdev->claim_module != NULL); 1652 bdev->claim_module = NULL; 1653 } 1654 1655 struct spdk_bdev * 1656 spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc) 1657 { 1658 return desc->bdev; 1659 } 1660 1661 void 1662 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1663 { 1664 struct iovec *iovs; 1665 int iovcnt; 1666 1667 if (bdev_io == NULL) { 1668 return; 1669 } 1670 1671 switch (bdev_io->type) { 1672 case SPDK_BDEV_IO_TYPE_READ: 1673 iovs = bdev_io->u.read.iovs; 1674 iovcnt = bdev_io->u.read.iovcnt; 1675 break; 1676 case SPDK_BDEV_IO_TYPE_WRITE: 1677 iovs = bdev_io->u.write.iovs; 1678 iovcnt = bdev_io->u.write.iovcnt; 1679 break; 1680 default: 1681 iovs = NULL; 1682 iovcnt = 0; 1683 break; 1684 } 1685 1686 if (iovp) { 1687 *iovp = iovs; 1688 } 1689 if (iovcntp) { 1690 *iovcntp = iovcnt; 1691 } 1692 } 1693 1694 void 1695 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1696 { 1697 /* 1698 * Modules with examine callbacks must be initialized first, so they are 1699 * ready to handle examine callbacks from later modules that will 1700 * register physical bdevs. 1701 */ 1702 if (bdev_module->examine != NULL) { 1703 TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1704 } else { 1705 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1706 } 1707 } 1708