1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include <rte_config.h> 40 #include <rte_lcore.h> 41 #include "spdk/env.h" 42 #include "spdk/io_channel.h" 43 #include "spdk/likely.h" 44 #include "spdk/queue.h" 45 #include "spdk/nvme_spec.h" 46 #include "spdk/scsi_spec.h" 47 48 #include "spdk_internal/bdev.h" 49 #include "spdk_internal/event.h" 50 #include "spdk_internal/log.h" 51 52 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 53 #define BUF_SMALL_POOL_SIZE 8192 54 #define BUF_LARGE_POOL_SIZE 1024 55 56 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 57 58 struct spdk_bdev_mgr { 59 struct spdk_mempool *bdev_io_pool; 60 61 struct spdk_mempool *buf_small_pool; 62 struct spdk_mempool *buf_large_pool; 63 64 need_buf_tailq_t need_buf_small[RTE_MAX_LCORE]; 65 need_buf_tailq_t need_buf_large[RTE_MAX_LCORE]; 66 67 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 68 TAILQ_HEAD(, spdk_bdev_module_if) vbdev_modules; 69 70 TAILQ_HEAD(, spdk_bdev) bdevs; 71 }; 72 73 static struct spdk_bdev_mgr g_bdev_mgr = { 74 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 75 .vbdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.vbdev_modules), 76 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 77 }; 78 79 struct spdk_bdev_channel { 80 struct spdk_bdev *bdev; 81 82 /* The channel for the underlying device */ 83 struct spdk_io_channel *channel; 84 }; 85 86 struct spdk_bdev * 87 spdk_bdev_first(void) 88 { 89 struct spdk_bdev *bdev; 90 91 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 92 if (bdev) { 93 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 94 } 95 96 return bdev; 97 } 98 99 struct spdk_bdev * 100 spdk_bdev_next(struct spdk_bdev *prev) 101 { 102 struct spdk_bdev *bdev; 103 104 bdev = TAILQ_NEXT(prev, link); 105 if (bdev) { 106 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 107 } 108 109 return bdev; 110 } 111 112 struct spdk_bdev * 113 spdk_bdev_get_by_name(const char *bdev_name) 114 { 115 struct spdk_bdev *bdev = spdk_bdev_first(); 116 117 while (bdev != NULL) { 118 if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) { 119 return bdev; 120 } 121 bdev = spdk_bdev_next(bdev); 122 } 123 124 return NULL; 125 } 126 127 static void 128 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 129 { 130 assert(bdev_io->get_buf_cb != NULL); 131 assert(buf != NULL); 132 assert(bdev_io->u.read.iovs != NULL); 133 134 bdev_io->buf = buf; 135 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 136 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 137 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 138 } 139 140 static void 141 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 142 { 143 struct spdk_mempool *pool; 144 struct spdk_bdev_io *tmp; 145 void *buf; 146 need_buf_tailq_t *tailq; 147 uint64_t length; 148 149 assert(bdev_io->u.read.iovcnt == 1); 150 151 length = bdev_io->u.read.len; 152 buf = bdev_io->buf; 153 154 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 155 pool = g_bdev_mgr.buf_small_pool; 156 tailq = &g_bdev_mgr.need_buf_small[rte_lcore_id()]; 157 } else { 158 pool = g_bdev_mgr.buf_large_pool; 159 tailq = &g_bdev_mgr.need_buf_large[rte_lcore_id()]; 160 } 161 162 if (TAILQ_EMPTY(tailq)) { 163 spdk_mempool_put(pool, buf); 164 } else { 165 tmp = TAILQ_FIRST(tailq); 166 TAILQ_REMOVE(tailq, tmp, buf_link); 167 spdk_bdev_io_set_buf(tmp, buf); 168 } 169 } 170 171 void 172 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 173 { 174 uint64_t len = bdev_io->u.read.len; 175 struct spdk_mempool *pool; 176 need_buf_tailq_t *tailq; 177 void *buf = NULL; 178 179 assert(cb != NULL); 180 assert(bdev_io->u.read.iovs != NULL); 181 182 if (spdk_unlikely(bdev_io->u.read.iovs[0].iov_base != NULL)) { 183 /* Buffer already present */ 184 cb(bdev_io->ch->channel, bdev_io); 185 return; 186 } 187 188 bdev_io->get_buf_cb = cb; 189 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 190 pool = g_bdev_mgr.buf_small_pool; 191 tailq = &g_bdev_mgr.need_buf_small[rte_lcore_id()]; 192 } else { 193 pool = g_bdev_mgr.buf_large_pool; 194 tailq = &g_bdev_mgr.need_buf_large[rte_lcore_id()]; 195 } 196 197 buf = spdk_mempool_get(pool); 198 199 if (!buf) { 200 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 201 } else { 202 spdk_bdev_io_set_buf(bdev_io, buf); 203 } 204 } 205 206 static int 207 spdk_bdev_module_get_max_ctx_size(void) 208 { 209 struct spdk_bdev_module_if *bdev_module; 210 int max_bdev_module_size = 0; 211 212 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 213 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 214 max_bdev_module_size = bdev_module->get_ctx_size(); 215 } 216 } 217 218 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 219 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 220 max_bdev_module_size = bdev_module->get_ctx_size(); 221 } 222 } 223 224 return max_bdev_module_size; 225 } 226 227 static void 228 spdk_bdev_config_text(FILE *fp) 229 { 230 struct spdk_bdev_module_if *bdev_module; 231 232 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 233 if (bdev_module->config_text) { 234 bdev_module->config_text(fp); 235 } 236 } 237 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 238 if (bdev_module->config_text) { 239 bdev_module->config_text(fp); 240 } 241 } 242 } 243 244 static int 245 spdk_bdev_initialize(void) 246 { 247 int i, cache_size; 248 struct spdk_bdev_module_if *bdev_module; 249 int rc = 0; 250 251 g_bdev_mgr.bdev_io_pool = spdk_mempool_create("blockdev_io", 252 SPDK_BDEV_IO_POOL_SIZE, 253 sizeof(struct spdk_bdev_io) + 254 spdk_bdev_module_get_max_ctx_size(), 255 64, 256 SPDK_ENV_SOCKET_ID_ANY); 257 258 if (g_bdev_mgr.bdev_io_pool == NULL) { 259 SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); 260 return -1; 261 } 262 263 for (i = 0; i < RTE_MAX_LCORE; i++) { 264 TAILQ_INIT(&g_bdev_mgr.need_buf_small[i]); 265 TAILQ_INIT(&g_bdev_mgr.need_buf_large[i]); 266 } 267 268 /** 269 * Ensure no more than half of the total buffers end up local caches, by 270 * using spdk_env_get_core_count() to determine how many local caches we need 271 * to account for. 272 */ 273 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 274 g_bdev_mgr.buf_small_pool = spdk_mempool_create("buf_small_pool", 275 BUF_SMALL_POOL_SIZE, 276 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 277 cache_size, 278 SPDK_ENV_SOCKET_ID_ANY); 279 if (!g_bdev_mgr.buf_small_pool) { 280 SPDK_ERRLOG("create rbuf small pool failed\n"); 281 return -1; 282 } 283 284 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 285 g_bdev_mgr.buf_large_pool = spdk_mempool_create("buf_large_pool", 286 BUF_LARGE_POOL_SIZE, 287 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 288 cache_size, 289 SPDK_ENV_SOCKET_ID_ANY); 290 if (!g_bdev_mgr.buf_large_pool) { 291 SPDK_ERRLOG("create rbuf large pool failed\n"); 292 return -1; 293 } 294 295 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 296 rc = bdev_module->module_init(); 297 if (rc) { 298 return rc; 299 } 300 } 301 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 302 rc = bdev_module->module_init(); 303 if (rc) { 304 return rc; 305 } 306 } 307 308 return 0; 309 } 310 311 static int 312 spdk_bdev_finish(void) 313 { 314 struct spdk_bdev_module_if *bdev_module; 315 316 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 317 if (bdev_module->module_fini) { 318 bdev_module->module_fini(); 319 } 320 } 321 322 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 323 if (bdev_module->module_fini) { 324 bdev_module->module_fini(); 325 } 326 } 327 328 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 329 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 330 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 331 SPDK_BDEV_IO_POOL_SIZE); 332 } 333 334 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 335 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 336 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 337 BUF_SMALL_POOL_SIZE); 338 assert(false); 339 } 340 341 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 342 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 343 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 344 BUF_LARGE_POOL_SIZE); 345 assert(false); 346 } 347 348 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 349 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 350 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 351 352 return 0; 353 } 354 355 struct spdk_bdev_io * 356 spdk_bdev_get_io(void) 357 { 358 struct spdk_bdev_io *bdev_io; 359 360 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 361 if (!bdev_io) { 362 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 363 abort(); 364 } 365 366 memset(bdev_io, 0, sizeof(*bdev_io)); 367 368 return bdev_io; 369 } 370 371 static void 372 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 373 { 374 if (!bdev_io) { 375 return; 376 } 377 378 if (bdev_io->buf != NULL) { 379 spdk_bdev_io_put_buf(bdev_io); 380 } 381 382 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 383 } 384 385 static void 386 spdk_bdev_cleanup_pending_buf_io(struct spdk_bdev *bdev) 387 { 388 struct spdk_bdev_io *bdev_io, *tmp; 389 390 TAILQ_FOREACH_SAFE(bdev_io, &g_bdev_mgr.need_buf_small[rte_lcore_id()], buf_link, tmp) { 391 if (bdev_io->bdev == bdev) { 392 TAILQ_REMOVE(&g_bdev_mgr.need_buf_small[rte_lcore_id()], bdev_io, buf_link); 393 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 394 } 395 } 396 397 TAILQ_FOREACH_SAFE(bdev_io, &g_bdev_mgr.need_buf_large[rte_lcore_id()], buf_link, tmp) { 398 if (bdev_io->bdev == bdev) { 399 TAILQ_REMOVE(&g_bdev_mgr.need_buf_large[rte_lcore_id()], bdev_io, buf_link); 400 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 401 } 402 } 403 } 404 405 static void 406 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 407 { 408 struct spdk_io_channel *ch; 409 410 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 411 412 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 413 spdk_bdev_cleanup_pending_buf_io(bdev); 414 ch = NULL; 415 } else { 416 ch = bdev_io->ch->channel; 417 } 418 419 bdev_io->in_submit_request = true; 420 bdev->fn_table->submit_request(ch, bdev_io); 421 bdev_io->in_submit_request = false; 422 } 423 424 static int 425 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 426 { 427 struct spdk_bdev *bdev = bdev_io->bdev; 428 429 __submit_request(bdev, bdev_io); 430 return 0; 431 } 432 433 void 434 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev *new_bdev) 435 { 436 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 437 bdev_io->bdev = new_bdev; 438 439 /* 440 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 441 * being switched, they need to be reinitialized. 442 */ 443 bdev_io->gencnt = new_bdev->gencnt; 444 445 __submit_request(new_bdev, bdev_io); 446 } 447 448 static void 449 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 450 struct spdk_bdev *bdev, void *cb_arg, 451 spdk_bdev_io_completion_cb cb) 452 { 453 bdev_io->bdev = bdev; 454 bdev_io->caller_ctx = cb_arg; 455 bdev_io->cb = cb; 456 bdev_io->gencnt = bdev->gencnt; 457 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 458 bdev_io->in_submit_request = false; 459 TAILQ_INIT(&bdev_io->child_io); 460 } 461 462 struct spdk_bdev_io * 463 spdk_bdev_get_child_io(struct spdk_bdev_io *parent, 464 struct spdk_bdev *bdev, 465 spdk_bdev_io_completion_cb cb, 466 void *cb_arg) 467 { 468 struct spdk_bdev_io *child; 469 470 child = spdk_bdev_get_io(); 471 if (!child) { 472 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 473 return NULL; 474 } 475 476 if (cb_arg == NULL) { 477 cb_arg = child; 478 } 479 480 spdk_bdev_io_init(child, bdev, cb_arg, cb); 481 482 child->type = parent->type; 483 memcpy(&child->u, &parent->u, sizeof(child->u)); 484 child->buf = NULL; 485 child->get_buf_cb = NULL; 486 child->parent = parent; 487 488 TAILQ_INSERT_TAIL(&parent->child_io, child, link); 489 490 return child; 491 } 492 493 bool 494 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 495 { 496 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 497 } 498 499 int 500 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 501 { 502 if (bdev->fn_table->dump_config_json) { 503 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 504 } 505 506 return 0; 507 } 508 509 static int 510 spdk_bdev_channel_create(void *io_device, uint32_t priority, void *ctx_buf, 511 void *unique_ctx) 512 { 513 struct spdk_bdev *bdev = io_device; 514 struct spdk_bdev_channel *ch = ctx_buf; 515 516 ch->bdev = io_device; 517 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt, priority); 518 519 return 0; 520 } 521 522 static void 523 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 524 { 525 struct spdk_bdev_channel *ch = ctx_buf; 526 527 spdk_put_io_channel(ch->channel); 528 } 529 530 struct spdk_io_channel * 531 spdk_bdev_get_io_channel(struct spdk_bdev *bdev, uint32_t priority) 532 { 533 return spdk_get_io_channel(bdev, priority, false, NULL); 534 } 535 536 const char * 537 spdk_bdev_get_name(const struct spdk_bdev *bdev) 538 { 539 return bdev->name; 540 } 541 542 const char * 543 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 544 { 545 return bdev->product_name; 546 } 547 548 uint32_t 549 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 550 { 551 return bdev->blocklen; 552 } 553 554 uint64_t 555 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 556 { 557 return bdev->blockcnt; 558 } 559 560 uint32_t 561 spdk_bdev_get_max_unmap_descriptors(const struct spdk_bdev *bdev) 562 { 563 return bdev->max_unmap_bdesc_count; 564 } 565 566 size_t 567 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 568 { 569 /* TODO: push this logic down to the bdev modules */ 570 if (bdev->need_aligned_buffer) { 571 return bdev->blocklen; 572 } 573 574 return 1; 575 } 576 577 static int 578 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 579 { 580 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 581 if (nbytes % bdev->blocklen) { 582 return -1; 583 } 584 585 /* Return failure if offset + nbytes is less than offset; indicates there 586 * has been an overflow and hence the offset has been wrapped around */ 587 if (offset + nbytes < offset) { 588 return -1; 589 } 590 591 /* Return failure if offset + nbytes exceeds the size of the blockdev */ 592 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 593 return -1; 594 } 595 596 return 0; 597 } 598 599 struct spdk_bdev_io * 600 spdk_bdev_read(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 601 void *buf, uint64_t offset, uint64_t nbytes, 602 spdk_bdev_io_completion_cb cb, void *cb_arg) 603 { 604 struct spdk_bdev_io *bdev_io; 605 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 606 int rc; 607 608 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 609 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 610 return NULL; 611 } 612 613 bdev_io = spdk_bdev_get_io(); 614 if (!bdev_io) { 615 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 616 return NULL; 617 } 618 619 bdev_io->ch = channel; 620 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 621 bdev_io->u.read.iov.iov_base = buf; 622 bdev_io->u.read.iov.iov_len = nbytes; 623 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 624 bdev_io->u.read.iovcnt = 1; 625 bdev_io->u.read.len = nbytes; 626 bdev_io->u.read.offset = offset; 627 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 628 629 rc = spdk_bdev_io_submit(bdev_io); 630 if (rc < 0) { 631 spdk_bdev_put_io(bdev_io); 632 return NULL; 633 } 634 635 return bdev_io; 636 } 637 638 struct spdk_bdev_io * 639 spdk_bdev_readv(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 640 struct iovec *iov, int iovcnt, 641 uint64_t offset, uint64_t nbytes, 642 spdk_bdev_io_completion_cb cb, void *cb_arg) 643 { 644 struct spdk_bdev_io *bdev_io; 645 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 646 int rc; 647 648 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 649 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 650 return NULL; 651 } 652 653 bdev_io = spdk_bdev_get_io(); 654 if (!bdev_io) { 655 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 656 return NULL; 657 } 658 659 bdev_io->ch = channel; 660 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 661 bdev_io->u.read.iovs = iov; 662 bdev_io->u.read.iovcnt = iovcnt; 663 bdev_io->u.read.len = nbytes; 664 bdev_io->u.read.offset = offset; 665 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 666 667 rc = spdk_bdev_io_submit(bdev_io); 668 if (rc < 0) { 669 spdk_bdev_put_io(bdev_io); 670 return NULL; 671 } 672 673 return bdev_io; 674 } 675 676 struct spdk_bdev_io * 677 spdk_bdev_write(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 678 void *buf, uint64_t offset, uint64_t nbytes, 679 spdk_bdev_io_completion_cb cb, void *cb_arg) 680 { 681 struct spdk_bdev_io *bdev_io; 682 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 683 int rc; 684 685 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 686 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 687 return NULL; 688 } 689 690 bdev_io = spdk_bdev_get_io(); 691 if (!bdev_io) { 692 SPDK_ERRLOG("blockdev_io memory allocation failed duing write\n"); 693 return NULL; 694 } 695 696 bdev_io->ch = channel; 697 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 698 bdev_io->u.write.iov.iov_base = buf; 699 bdev_io->u.write.iov.iov_len = nbytes; 700 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 701 bdev_io->u.write.iovcnt = 1; 702 bdev_io->u.write.len = nbytes; 703 bdev_io->u.write.offset = offset; 704 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 705 706 rc = spdk_bdev_io_submit(bdev_io); 707 if (rc < 0) { 708 spdk_bdev_put_io(bdev_io); 709 return NULL; 710 } 711 712 return bdev_io; 713 } 714 715 struct spdk_bdev_io * 716 spdk_bdev_writev(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 717 struct iovec *iov, int iovcnt, 718 uint64_t offset, uint64_t len, 719 spdk_bdev_io_completion_cb cb, void *cb_arg) 720 { 721 struct spdk_bdev_io *bdev_io; 722 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 723 int rc; 724 725 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 726 if (spdk_bdev_io_valid(bdev, offset, len) != 0) { 727 return NULL; 728 } 729 730 bdev_io = spdk_bdev_get_io(); 731 if (!bdev_io) { 732 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 733 return NULL; 734 } 735 736 bdev_io->ch = channel; 737 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 738 bdev_io->u.write.iovs = iov; 739 bdev_io->u.write.iovcnt = iovcnt; 740 bdev_io->u.write.len = len; 741 bdev_io->u.write.offset = offset; 742 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 743 744 rc = spdk_bdev_io_submit(bdev_io); 745 if (rc < 0) { 746 spdk_bdev_put_io(bdev_io); 747 return NULL; 748 } 749 750 return bdev_io; 751 } 752 753 struct spdk_bdev_io * 754 spdk_bdev_unmap(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 755 struct spdk_scsi_unmap_bdesc *unmap_d, 756 uint16_t bdesc_count, 757 spdk_bdev_io_completion_cb cb, void *cb_arg) 758 { 759 struct spdk_bdev_io *bdev_io; 760 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 761 int rc; 762 763 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 764 if (bdesc_count == 0) { 765 SPDK_ERRLOG("Invalid bdesc_count 0\n"); 766 return NULL; 767 } 768 769 if (bdesc_count > bdev->max_unmap_bdesc_count) { 770 SPDK_ERRLOG("Invalid bdesc_count %u > max_unmap_bdesc_count %u\n", 771 bdesc_count, bdev->max_unmap_bdesc_count); 772 return NULL; 773 } 774 775 bdev_io = spdk_bdev_get_io(); 776 if (!bdev_io) { 777 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 778 return NULL; 779 } 780 781 bdev_io->ch = channel; 782 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 783 bdev_io->u.unmap.unmap_bdesc = unmap_d; 784 bdev_io->u.unmap.bdesc_count = bdesc_count; 785 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 786 787 rc = spdk_bdev_io_submit(bdev_io); 788 if (rc < 0) { 789 spdk_bdev_put_io(bdev_io); 790 return NULL; 791 } 792 793 return bdev_io; 794 } 795 796 struct spdk_bdev_io * 797 spdk_bdev_flush(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 798 uint64_t offset, uint64_t length, 799 spdk_bdev_io_completion_cb cb, void *cb_arg) 800 { 801 struct spdk_bdev_io *bdev_io; 802 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 803 int rc; 804 805 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 806 bdev_io = spdk_bdev_get_io(); 807 if (!bdev_io) { 808 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 809 return NULL; 810 } 811 812 bdev_io->ch = channel; 813 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 814 bdev_io->u.flush.offset = offset; 815 bdev_io->u.flush.length = length; 816 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 817 818 rc = spdk_bdev_io_submit(bdev_io); 819 if (rc < 0) { 820 spdk_bdev_put_io(bdev_io); 821 return NULL; 822 } 823 824 return bdev_io; 825 } 826 827 int 828 spdk_bdev_reset(struct spdk_bdev *bdev, enum spdk_bdev_reset_type reset_type, 829 spdk_bdev_io_completion_cb cb, void *cb_arg) 830 { 831 struct spdk_bdev_io *bdev_io; 832 int rc; 833 834 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 835 bdev_io = spdk_bdev_get_io(); 836 if (!bdev_io) { 837 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 838 return -1; 839 } 840 841 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 842 bdev_io->u.reset.type = reset_type; 843 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 844 845 rc = spdk_bdev_io_submit(bdev_io); 846 if (rc < 0) { 847 spdk_bdev_put_io(bdev_io); 848 SPDK_ERRLOG("reset failed\n"); 849 } 850 851 return rc; 852 } 853 854 int 855 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 856 { 857 struct spdk_bdev_io *child_io, *tmp; 858 859 if (!bdev_io) { 860 SPDK_ERRLOG("bdev_io is NULL\n"); 861 return -1; 862 } 863 864 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 865 SPDK_ERRLOG("bdev_io is in pending state\n"); 866 assert(false); 867 return -1; 868 } 869 870 TAILQ_FOREACH_SAFE(child_io, &bdev_io->child_io, link, tmp) { 871 /* 872 * Make sure no references to the parent I/O remain, since it is being 873 * returned to the free pool. 874 */ 875 child_io->parent = NULL; 876 TAILQ_REMOVE(&bdev_io->child_io, child_io, link); 877 878 /* 879 * Child I/O may have a buf that needs to be returned to a pool 880 * on a different core, so free it through the request submission 881 * process rather than calling put_io directly here. 882 */ 883 spdk_bdev_free_io(child_io); 884 } 885 886 spdk_bdev_put_io(bdev_io); 887 888 return 0; 889 } 890 891 static void 892 bdev_io_deferred_completion(void *arg1, void *arg2) 893 { 894 struct spdk_bdev_io *bdev_io = arg1; 895 enum spdk_bdev_io_status status = (enum spdk_bdev_io_status)arg2; 896 897 assert(bdev_io->in_submit_request == false); 898 899 spdk_bdev_io_complete(bdev_io, status); 900 } 901 902 void 903 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 904 { 905 if (bdev_io->in_submit_request) { 906 /* 907 * Defer completion via an event to avoid potential infinite recursion if the 908 * user's completion callback issues a new I/O. 909 */ 910 spdk_event_call(spdk_event_allocate(spdk_env_get_current_core(), 911 bdev_io_deferred_completion, 912 bdev_io, 913 (void *)status)); 914 return; 915 } 916 917 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 918 /* Successful reset */ 919 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 920 /* Increase the blockdev generation if it is a hard reset */ 921 if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) { 922 bdev_io->bdev->gencnt++; 923 } 924 } 925 } else { 926 /* 927 * Check the gencnt, to see if this I/O was issued before the most 928 * recent reset. If the gencnt is not equal, then just free the I/O 929 * without calling the callback, since the caller will have already 930 * freed its context for this I/O. 931 */ 932 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 933 spdk_bdev_put_io(bdev_io); 934 return; 935 } 936 } 937 938 bdev_io->status = status; 939 940 assert(bdev_io->cb != NULL); 941 bdev_io->cb(bdev_io, status, bdev_io->caller_ctx); 942 } 943 944 void 945 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 946 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 947 { 948 if (sc == SPDK_SCSI_STATUS_GOOD) { 949 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 950 } else { 951 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 952 bdev_io->error.scsi.sc = sc; 953 bdev_io->error.scsi.sk = sk; 954 bdev_io->error.scsi.asc = asc; 955 bdev_io->error.scsi.ascq = ascq; 956 } 957 958 spdk_bdev_io_complete(bdev_io, bdev_io->status); 959 } 960 961 void 962 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 963 int *sc, int *sk, int *asc, int *ascq) 964 { 965 assert(sc != NULL); 966 assert(sk != NULL); 967 assert(asc != NULL); 968 assert(ascq != NULL); 969 970 switch (bdev_io->status) { 971 case SPDK_BDEV_IO_STATUS_SUCCESS: 972 *sc = SPDK_SCSI_STATUS_GOOD; 973 *sk = SPDK_SCSI_SENSE_NO_SENSE; 974 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 975 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 976 break; 977 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 978 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 979 break; 980 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 981 *sc = bdev_io->error.scsi.sc; 982 *sk = bdev_io->error.scsi.sk; 983 *asc = bdev_io->error.scsi.asc; 984 *ascq = bdev_io->error.scsi.ascq; 985 break; 986 default: 987 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 988 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 989 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 990 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 991 break; 992 } 993 } 994 995 void 996 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 997 { 998 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 999 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1000 } else { 1001 bdev_io->error.nvme.sct = sct; 1002 bdev_io->error.nvme.sc = sc; 1003 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1004 } 1005 1006 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1007 } 1008 1009 void 1010 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1011 { 1012 assert(sct != NULL); 1013 assert(sc != NULL); 1014 1015 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1016 *sct = bdev_io->error.nvme.sct; 1017 *sc = bdev_io->error.nvme.sc; 1018 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1019 *sct = SPDK_NVME_SCT_GENERIC; 1020 *sc = SPDK_NVME_SC_SUCCESS; 1021 } else { 1022 *sct = SPDK_NVME_SCT_GENERIC; 1023 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1024 } 1025 } 1026 1027 void 1028 spdk_bdev_register(struct spdk_bdev *bdev) 1029 { 1030 /* initialize the reset generation value to zero */ 1031 bdev->gencnt = 0; 1032 1033 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1034 sizeof(struct spdk_bdev_channel)); 1035 1036 pthread_mutex_init(&bdev->mutex, NULL); 1037 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1038 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 1039 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1040 } 1041 1042 void 1043 spdk_bdev_unregister(struct spdk_bdev *bdev) 1044 { 1045 int rc; 1046 1047 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1048 1049 pthread_mutex_lock(&bdev->mutex); 1050 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_UNCLAIMED); 1051 if (bdev->status == SPDK_BDEV_STATUS_CLAIMED) { 1052 if (bdev->remove_cb) { 1053 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1054 pthread_mutex_unlock(&bdev->mutex); 1055 bdev->remove_cb(bdev->remove_ctx); 1056 return; 1057 } else { 1058 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1059 } 1060 } 1061 1062 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1063 pthread_mutex_unlock(&bdev->mutex); 1064 1065 pthread_mutex_destroy(&bdev->mutex); 1066 1067 spdk_io_device_unregister(bdev); 1068 1069 rc = bdev->fn_table->destruct(bdev->ctxt); 1070 if (rc < 0) { 1071 SPDK_ERRLOG("destruct failed\n"); 1072 } 1073 } 1074 1075 bool 1076 spdk_bdev_claim(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, 1077 void *remove_ctx) 1078 { 1079 bool success; 1080 1081 pthread_mutex_lock(&bdev->mutex); 1082 1083 if (bdev->status != SPDK_BDEV_STATUS_CLAIMED) { 1084 /* Take ownership of bdev. */ 1085 bdev->remove_cb = remove_cb; 1086 bdev->remove_ctx = remove_ctx; 1087 bdev->status = SPDK_BDEV_STATUS_CLAIMED; 1088 success = true; 1089 } else { 1090 /* bdev is already claimed. */ 1091 success = false; 1092 } 1093 1094 pthread_mutex_unlock(&bdev->mutex); 1095 1096 return success; 1097 } 1098 1099 void 1100 spdk_bdev_unclaim(struct spdk_bdev *bdev) 1101 { 1102 bool do_unregister = false; 1103 1104 pthread_mutex_lock(&bdev->mutex); 1105 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_REMOVING); 1106 if (bdev->status == SPDK_BDEV_STATUS_REMOVING) { 1107 do_unregister = true; 1108 } 1109 bdev->remove_cb = NULL; 1110 bdev->remove_ctx = NULL; 1111 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1112 pthread_mutex_unlock(&bdev->mutex); 1113 1114 if (do_unregister == true) { 1115 spdk_bdev_unregister(bdev); 1116 } 1117 } 1118 1119 void 1120 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1121 { 1122 struct iovec *iovs; 1123 int iovcnt; 1124 1125 if (bdev_io == NULL) { 1126 return; 1127 } 1128 1129 switch (bdev_io->type) { 1130 case SPDK_BDEV_IO_TYPE_READ: 1131 iovs = bdev_io->u.read.iovs; 1132 iovcnt = bdev_io->u.read.iovcnt; 1133 break; 1134 case SPDK_BDEV_IO_TYPE_WRITE: 1135 iovs = bdev_io->u.write.iovs; 1136 iovcnt = bdev_io->u.write.iovcnt; 1137 break; 1138 default: 1139 iovs = NULL; 1140 iovcnt = 0; 1141 break; 1142 } 1143 1144 if (iovp) { 1145 *iovp = iovs; 1146 } 1147 if (iovcntp) { 1148 *iovcntp = iovcnt; 1149 } 1150 } 1151 1152 void 1153 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1154 { 1155 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1156 } 1157 1158 void 1159 spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module) 1160 { 1161 TAILQ_INSERT_TAIL(&g_bdev_mgr.vbdev_modules, vbdev_module, tailq); 1162 } 1163 SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text) 1164 SPDK_SUBSYSTEM_DEPEND(bdev, copy) 1165