1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include <rte_config.h> 40 #include <rte_lcore.h> 41 #include "spdk/env.h" 42 #include "spdk/io_channel.h" 43 #include "spdk/likely.h" 44 #include "spdk/queue.h" 45 #include "spdk/nvme_spec.h" 46 #include "spdk/scsi_spec.h" 47 48 #include "spdk_internal/bdev.h" 49 #include "spdk_internal/event.h" 50 #include "spdk_internal/log.h" 51 52 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 53 #define BUF_SMALL_POOL_SIZE 8192 54 #define BUF_LARGE_POOL_SIZE 1024 55 56 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 57 58 struct spdk_bdev_mgr { 59 struct spdk_mempool *bdev_io_pool; 60 61 struct spdk_mempool *buf_small_pool; 62 struct spdk_mempool *buf_large_pool; 63 64 need_buf_tailq_t need_buf_small[RTE_MAX_LCORE]; 65 need_buf_tailq_t need_buf_large[RTE_MAX_LCORE]; 66 67 TAILQ_HEAD(, spdk_bdev_module_if) bdev_modules; 68 TAILQ_HEAD(, spdk_bdev_module_if) vbdev_modules; 69 70 TAILQ_HEAD(, spdk_bdev) bdevs; 71 }; 72 73 static struct spdk_bdev_mgr g_bdev_mgr = { 74 .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules), 75 .vbdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.vbdev_modules), 76 .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs), 77 }; 78 79 struct spdk_bdev_channel { 80 struct spdk_bdev *bdev; 81 82 /* The channel for the underlying device */ 83 struct spdk_io_channel *channel; 84 }; 85 86 struct spdk_bdev * 87 spdk_bdev_first(void) 88 { 89 struct spdk_bdev *bdev; 90 91 bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs); 92 if (bdev) { 93 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 94 } 95 96 return bdev; 97 } 98 99 struct spdk_bdev * 100 spdk_bdev_next(struct spdk_bdev *prev) 101 { 102 struct spdk_bdev *bdev; 103 104 bdev = TAILQ_NEXT(prev, link); 105 if (bdev) { 106 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 107 } 108 109 return bdev; 110 } 111 112 struct spdk_bdev * 113 spdk_bdev_get_by_name(const char *bdev_name) 114 { 115 struct spdk_bdev *bdev = spdk_bdev_first(); 116 117 while (bdev != NULL) { 118 if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) { 119 return bdev; 120 } 121 bdev = spdk_bdev_next(bdev); 122 } 123 124 return NULL; 125 } 126 127 static void 128 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 129 { 130 assert(bdev_io->get_buf_cb != NULL); 131 assert(buf != NULL); 132 assert(bdev_io->u.read.iovs != NULL); 133 134 bdev_io->buf = buf; 135 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 136 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 137 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 138 } 139 140 static void 141 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 142 { 143 struct spdk_mempool *pool; 144 struct spdk_bdev_io *tmp; 145 void *buf; 146 need_buf_tailq_t *tailq; 147 uint64_t length; 148 149 assert(bdev_io->u.read.iovcnt == 1); 150 151 length = bdev_io->u.read.len; 152 buf = bdev_io->buf; 153 154 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 155 pool = g_bdev_mgr.buf_small_pool; 156 tailq = &g_bdev_mgr.need_buf_small[rte_lcore_id()]; 157 } else { 158 pool = g_bdev_mgr.buf_large_pool; 159 tailq = &g_bdev_mgr.need_buf_large[rte_lcore_id()]; 160 } 161 162 if (TAILQ_EMPTY(tailq)) { 163 spdk_mempool_put(pool, buf); 164 } else { 165 tmp = TAILQ_FIRST(tailq); 166 TAILQ_REMOVE(tailq, tmp, buf_link); 167 spdk_bdev_io_set_buf(tmp, buf); 168 } 169 } 170 171 void 172 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 173 { 174 uint64_t len = bdev_io->u.read.len; 175 struct spdk_mempool *pool; 176 need_buf_tailq_t *tailq; 177 void *buf = NULL; 178 179 assert(cb != NULL); 180 assert(bdev_io->u.read.iovs != NULL); 181 182 if (spdk_unlikely(bdev_io->u.read.iovs[0].iov_base != NULL)) { 183 /* Buffer already present */ 184 cb(bdev_io->ch->channel, bdev_io); 185 return; 186 } 187 188 bdev_io->get_buf_cb = cb; 189 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 190 pool = g_bdev_mgr.buf_small_pool; 191 tailq = &g_bdev_mgr.need_buf_small[rte_lcore_id()]; 192 } else { 193 pool = g_bdev_mgr.buf_large_pool; 194 tailq = &g_bdev_mgr.need_buf_large[rte_lcore_id()]; 195 } 196 197 buf = spdk_mempool_get(pool); 198 199 if (!buf) { 200 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 201 } else { 202 spdk_bdev_io_set_buf(bdev_io, buf); 203 } 204 } 205 206 static int 207 spdk_bdev_module_get_max_ctx_size(void) 208 { 209 struct spdk_bdev_module_if *bdev_module; 210 int max_bdev_module_size = 0; 211 212 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 213 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 214 max_bdev_module_size = bdev_module->get_ctx_size(); 215 } 216 } 217 218 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 219 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 220 max_bdev_module_size = bdev_module->get_ctx_size(); 221 } 222 } 223 224 return max_bdev_module_size; 225 } 226 227 static void 228 spdk_bdev_config_text(FILE *fp) 229 { 230 struct spdk_bdev_module_if *bdev_module; 231 232 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 233 if (bdev_module->config_text) { 234 bdev_module->config_text(fp); 235 } 236 } 237 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 238 if (bdev_module->config_text) { 239 bdev_module->config_text(fp); 240 } 241 } 242 } 243 244 static int 245 spdk_bdev_initialize(void) 246 { 247 int i, cache_size; 248 struct spdk_bdev_module_if *bdev_module; 249 int rc = 0; 250 251 g_bdev_mgr.bdev_io_pool = spdk_mempool_create("blockdev_io", 252 SPDK_BDEV_IO_POOL_SIZE, 253 sizeof(struct spdk_bdev_io) + 254 spdk_bdev_module_get_max_ctx_size(), 255 64, 256 SPDK_ENV_SOCKET_ID_ANY); 257 258 if (g_bdev_mgr.bdev_io_pool == NULL) { 259 SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); 260 return -1; 261 } 262 263 for (i = 0; i < RTE_MAX_LCORE; i++) { 264 TAILQ_INIT(&g_bdev_mgr.need_buf_small[i]); 265 TAILQ_INIT(&g_bdev_mgr.need_buf_large[i]); 266 } 267 268 /** 269 * Ensure no more than half of the total buffers end up local caches, by 270 * using spdk_env_get_core_count() to determine how many local caches we need 271 * to account for. 272 */ 273 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 274 g_bdev_mgr.buf_small_pool = spdk_mempool_create("buf_small_pool", 275 BUF_SMALL_POOL_SIZE, 276 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 277 cache_size, 278 SPDK_ENV_SOCKET_ID_ANY); 279 if (!g_bdev_mgr.buf_small_pool) { 280 SPDK_ERRLOG("create rbuf small pool failed\n"); 281 return -1; 282 } 283 284 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 285 g_bdev_mgr.buf_large_pool = spdk_mempool_create("buf_large_pool", 286 BUF_LARGE_POOL_SIZE, 287 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 288 cache_size, 289 SPDK_ENV_SOCKET_ID_ANY); 290 if (!g_bdev_mgr.buf_large_pool) { 291 SPDK_ERRLOG("create rbuf large pool failed\n"); 292 return -1; 293 } 294 295 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 296 rc = bdev_module->module_init(); 297 if (rc) { 298 return rc; 299 } 300 } 301 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 302 rc = bdev_module->module_init(); 303 if (rc) { 304 return rc; 305 } 306 } 307 308 return 0; 309 } 310 311 static int 312 spdk_bdev_finish(void) 313 { 314 struct spdk_bdev_module_if *bdev_module; 315 316 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.vbdev_modules, tailq) { 317 if (bdev_module->module_fini) { 318 bdev_module->module_fini(); 319 } 320 } 321 322 TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, tailq) { 323 if (bdev_module->module_fini) { 324 bdev_module->module_fini(); 325 } 326 } 327 328 if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != SPDK_BDEV_IO_POOL_SIZE) { 329 SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n", 330 spdk_mempool_count(g_bdev_mgr.bdev_io_pool), 331 SPDK_BDEV_IO_POOL_SIZE); 332 } 333 334 if (spdk_mempool_count(g_bdev_mgr.buf_small_pool) != BUF_SMALL_POOL_SIZE) { 335 SPDK_ERRLOG("Small buffer pool count is %zu but should be %u\n", 336 spdk_mempool_count(g_bdev_mgr.buf_small_pool), 337 BUF_SMALL_POOL_SIZE); 338 assert(false); 339 } 340 341 if (spdk_mempool_count(g_bdev_mgr.buf_large_pool) != BUF_LARGE_POOL_SIZE) { 342 SPDK_ERRLOG("Large buffer pool count is %zu but should be %u\n", 343 spdk_mempool_count(g_bdev_mgr.buf_large_pool), 344 BUF_LARGE_POOL_SIZE); 345 assert(false); 346 } 347 348 spdk_mempool_free(g_bdev_mgr.bdev_io_pool); 349 spdk_mempool_free(g_bdev_mgr.buf_small_pool); 350 spdk_mempool_free(g_bdev_mgr.buf_large_pool); 351 352 return 0; 353 } 354 355 struct spdk_bdev_io * 356 spdk_bdev_get_io(void) 357 { 358 struct spdk_bdev_io *bdev_io; 359 360 bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool); 361 if (!bdev_io) { 362 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 363 abort(); 364 } 365 366 memset(bdev_io, 0, sizeof(*bdev_io)); 367 368 return bdev_io; 369 } 370 371 static void 372 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 373 { 374 if (!bdev_io) { 375 return; 376 } 377 378 if (bdev_io->buf != NULL) { 379 spdk_bdev_io_put_buf(bdev_io); 380 } 381 382 spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io); 383 } 384 385 static void 386 spdk_bdev_cleanup_pending_buf_io(struct spdk_bdev *bdev) 387 { 388 struct spdk_bdev_io *bdev_io, *tmp; 389 390 TAILQ_FOREACH_SAFE(bdev_io, &g_bdev_mgr.need_buf_small[rte_lcore_id()], buf_link, tmp) { 391 if (bdev_io->bdev == bdev) { 392 TAILQ_REMOVE(&g_bdev_mgr.need_buf_small[rte_lcore_id()], bdev_io, buf_link); 393 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 394 } 395 } 396 397 TAILQ_FOREACH_SAFE(bdev_io, &g_bdev_mgr.need_buf_large[rte_lcore_id()], buf_link, tmp) { 398 if (bdev_io->bdev == bdev) { 399 TAILQ_REMOVE(&g_bdev_mgr.need_buf_large[rte_lcore_id()], bdev_io, buf_link); 400 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 401 } 402 } 403 } 404 405 static void 406 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 407 { 408 struct spdk_io_channel *ch; 409 410 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 411 412 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 413 spdk_bdev_cleanup_pending_buf_io(bdev); 414 ch = NULL; 415 } else { 416 ch = bdev_io->ch->channel; 417 } 418 419 bdev_io->in_submit_request = true; 420 bdev->fn_table->submit_request(ch, bdev_io); 421 bdev_io->in_submit_request = false; 422 } 423 424 static int 425 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 426 { 427 struct spdk_bdev *bdev = bdev_io->bdev; 428 429 __submit_request(bdev, bdev_io); 430 return 0; 431 } 432 433 void 434 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev *new_bdev) 435 { 436 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 437 bdev_io->bdev = new_bdev; 438 439 /* 440 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 441 * being switched, they need to be reinitialized. 442 */ 443 bdev_io->gencnt = new_bdev->gencnt; 444 445 __submit_request(new_bdev, bdev_io); 446 } 447 448 static void 449 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 450 struct spdk_bdev *bdev, void *cb_arg, 451 spdk_bdev_io_completion_cb cb) 452 { 453 bdev_io->bdev = bdev; 454 bdev_io->caller_ctx = cb_arg; 455 bdev_io->cb = cb; 456 bdev_io->gencnt = bdev->gencnt; 457 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 458 bdev_io->in_submit_request = false; 459 TAILQ_INIT(&bdev_io->child_io); 460 } 461 462 struct spdk_bdev_io * 463 spdk_bdev_get_child_io(struct spdk_bdev_io *parent, 464 struct spdk_bdev *bdev, 465 spdk_bdev_io_completion_cb cb, 466 void *cb_arg) 467 { 468 struct spdk_bdev_io *child; 469 470 child = spdk_bdev_get_io(); 471 if (!child) { 472 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 473 return NULL; 474 } 475 476 if (cb_arg == NULL) { 477 cb_arg = child; 478 } 479 480 spdk_bdev_io_init(child, bdev, cb_arg, cb); 481 482 child->type = parent->type; 483 memcpy(&child->u, &parent->u, sizeof(child->u)); 484 child->buf = NULL; 485 child->get_buf_cb = NULL; 486 child->parent = parent; 487 488 TAILQ_INSERT_TAIL(&parent->child_io, child, link); 489 490 return child; 491 } 492 493 bool 494 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 495 { 496 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 497 } 498 499 int 500 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 501 { 502 if (bdev->fn_table->dump_config_json) { 503 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 504 } 505 506 return 0; 507 } 508 509 static int 510 spdk_bdev_channel_create(void *io_device, uint32_t priority, void *ctx_buf, 511 void *unique_ctx) 512 { 513 struct spdk_bdev *bdev = io_device; 514 struct spdk_bdev_channel *ch = ctx_buf; 515 516 ch->bdev = io_device; 517 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt, priority); 518 519 return 0; 520 } 521 522 static void 523 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 524 { 525 struct spdk_bdev_channel *ch = ctx_buf; 526 527 spdk_put_io_channel(ch->channel); 528 } 529 530 struct spdk_io_channel * 531 spdk_bdev_get_io_channel(struct spdk_bdev *bdev, uint32_t priority) 532 { 533 return spdk_get_io_channel(bdev, priority, false, NULL); 534 } 535 536 const char * 537 spdk_bdev_get_name(const struct spdk_bdev *bdev) 538 { 539 return bdev->name; 540 } 541 542 const char * 543 spdk_bdev_get_product_name(const struct spdk_bdev *bdev) 544 { 545 return bdev->product_name; 546 } 547 548 uint32_t 549 spdk_bdev_get_block_size(const struct spdk_bdev *bdev) 550 { 551 return bdev->blocklen; 552 } 553 554 uint64_t 555 spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev) 556 { 557 return bdev->blockcnt; 558 } 559 560 uint32_t 561 spdk_bdev_get_max_unmap_descriptors(const struct spdk_bdev *bdev) 562 { 563 return bdev->max_unmap_bdesc_count; 564 } 565 566 size_t 567 spdk_bdev_get_buf_align(const struct spdk_bdev *bdev) 568 { 569 /* TODO: push this logic down to the bdev modules */ 570 if (bdev->need_aligned_buffer) { 571 return bdev->blocklen; 572 } 573 574 return 1; 575 } 576 577 bool 578 spdk_bdev_has_write_cache(const struct spdk_bdev *bdev) 579 { 580 return bdev->write_cache; 581 } 582 583 static int 584 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 585 { 586 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 587 if (nbytes % bdev->blocklen) { 588 return -1; 589 } 590 591 /* Return failure if offset + nbytes is less than offset; indicates there 592 * has been an overflow and hence the offset has been wrapped around */ 593 if (offset + nbytes < offset) { 594 return -1; 595 } 596 597 /* Return failure if offset + nbytes exceeds the size of the blockdev */ 598 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 599 return -1; 600 } 601 602 return 0; 603 } 604 605 struct spdk_bdev_io * 606 spdk_bdev_read(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 607 void *buf, uint64_t offset, uint64_t nbytes, 608 spdk_bdev_io_completion_cb cb, void *cb_arg) 609 { 610 struct spdk_bdev_io *bdev_io; 611 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 612 int rc; 613 614 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 615 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 616 return NULL; 617 } 618 619 bdev_io = spdk_bdev_get_io(); 620 if (!bdev_io) { 621 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 622 return NULL; 623 } 624 625 bdev_io->ch = channel; 626 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 627 bdev_io->u.read.iov.iov_base = buf; 628 bdev_io->u.read.iov.iov_len = nbytes; 629 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 630 bdev_io->u.read.iovcnt = 1; 631 bdev_io->u.read.len = nbytes; 632 bdev_io->u.read.offset = offset; 633 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 634 635 rc = spdk_bdev_io_submit(bdev_io); 636 if (rc < 0) { 637 spdk_bdev_put_io(bdev_io); 638 return NULL; 639 } 640 641 return bdev_io; 642 } 643 644 struct spdk_bdev_io * 645 spdk_bdev_readv(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 646 struct iovec *iov, int iovcnt, 647 uint64_t offset, uint64_t nbytes, 648 spdk_bdev_io_completion_cb cb, void *cb_arg) 649 { 650 struct spdk_bdev_io *bdev_io; 651 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 652 int rc; 653 654 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 655 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 656 return NULL; 657 } 658 659 bdev_io = spdk_bdev_get_io(); 660 if (!bdev_io) { 661 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 662 return NULL; 663 } 664 665 bdev_io->ch = channel; 666 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 667 bdev_io->u.read.iovs = iov; 668 bdev_io->u.read.iovcnt = iovcnt; 669 bdev_io->u.read.len = nbytes; 670 bdev_io->u.read.offset = offset; 671 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 672 673 rc = spdk_bdev_io_submit(bdev_io); 674 if (rc < 0) { 675 spdk_bdev_put_io(bdev_io); 676 return NULL; 677 } 678 679 return bdev_io; 680 } 681 682 struct spdk_bdev_io * 683 spdk_bdev_write(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 684 void *buf, uint64_t offset, uint64_t nbytes, 685 spdk_bdev_io_completion_cb cb, void *cb_arg) 686 { 687 struct spdk_bdev_io *bdev_io; 688 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 689 int rc; 690 691 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 692 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 693 return NULL; 694 } 695 696 bdev_io = spdk_bdev_get_io(); 697 if (!bdev_io) { 698 SPDK_ERRLOG("blockdev_io memory allocation failed duing write\n"); 699 return NULL; 700 } 701 702 bdev_io->ch = channel; 703 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 704 bdev_io->u.write.iov.iov_base = buf; 705 bdev_io->u.write.iov.iov_len = nbytes; 706 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 707 bdev_io->u.write.iovcnt = 1; 708 bdev_io->u.write.len = nbytes; 709 bdev_io->u.write.offset = offset; 710 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 711 712 rc = spdk_bdev_io_submit(bdev_io); 713 if (rc < 0) { 714 spdk_bdev_put_io(bdev_io); 715 return NULL; 716 } 717 718 return bdev_io; 719 } 720 721 struct spdk_bdev_io * 722 spdk_bdev_writev(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 723 struct iovec *iov, int iovcnt, 724 uint64_t offset, uint64_t len, 725 spdk_bdev_io_completion_cb cb, void *cb_arg) 726 { 727 struct spdk_bdev_io *bdev_io; 728 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 729 int rc; 730 731 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 732 if (spdk_bdev_io_valid(bdev, offset, len) != 0) { 733 return NULL; 734 } 735 736 bdev_io = spdk_bdev_get_io(); 737 if (!bdev_io) { 738 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 739 return NULL; 740 } 741 742 bdev_io->ch = channel; 743 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 744 bdev_io->u.write.iovs = iov; 745 bdev_io->u.write.iovcnt = iovcnt; 746 bdev_io->u.write.len = len; 747 bdev_io->u.write.offset = offset; 748 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 749 750 rc = spdk_bdev_io_submit(bdev_io); 751 if (rc < 0) { 752 spdk_bdev_put_io(bdev_io); 753 return NULL; 754 } 755 756 return bdev_io; 757 } 758 759 struct spdk_bdev_io * 760 spdk_bdev_unmap(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 761 struct spdk_scsi_unmap_bdesc *unmap_d, 762 uint16_t bdesc_count, 763 spdk_bdev_io_completion_cb cb, void *cb_arg) 764 { 765 struct spdk_bdev_io *bdev_io; 766 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 767 int rc; 768 769 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 770 if (bdesc_count == 0) { 771 SPDK_ERRLOG("Invalid bdesc_count 0\n"); 772 return NULL; 773 } 774 775 if (bdesc_count > bdev->max_unmap_bdesc_count) { 776 SPDK_ERRLOG("Invalid bdesc_count %u > max_unmap_bdesc_count %u\n", 777 bdesc_count, bdev->max_unmap_bdesc_count); 778 return NULL; 779 } 780 781 bdev_io = spdk_bdev_get_io(); 782 if (!bdev_io) { 783 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 784 return NULL; 785 } 786 787 bdev_io->ch = channel; 788 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 789 bdev_io->u.unmap.unmap_bdesc = unmap_d; 790 bdev_io->u.unmap.bdesc_count = bdesc_count; 791 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 792 793 rc = spdk_bdev_io_submit(bdev_io); 794 if (rc < 0) { 795 spdk_bdev_put_io(bdev_io); 796 return NULL; 797 } 798 799 return bdev_io; 800 } 801 802 struct spdk_bdev_io * 803 spdk_bdev_flush(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 804 uint64_t offset, uint64_t length, 805 spdk_bdev_io_completion_cb cb, void *cb_arg) 806 { 807 struct spdk_bdev_io *bdev_io; 808 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 809 int rc; 810 811 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 812 bdev_io = spdk_bdev_get_io(); 813 if (!bdev_io) { 814 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 815 return NULL; 816 } 817 818 bdev_io->ch = channel; 819 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 820 bdev_io->u.flush.offset = offset; 821 bdev_io->u.flush.length = length; 822 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 823 824 rc = spdk_bdev_io_submit(bdev_io); 825 if (rc < 0) { 826 spdk_bdev_put_io(bdev_io); 827 return NULL; 828 } 829 830 return bdev_io; 831 } 832 833 int 834 spdk_bdev_reset(struct spdk_bdev *bdev, enum spdk_bdev_reset_type reset_type, 835 spdk_bdev_io_completion_cb cb, void *cb_arg) 836 { 837 struct spdk_bdev_io *bdev_io; 838 int rc; 839 840 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 841 bdev_io = spdk_bdev_get_io(); 842 if (!bdev_io) { 843 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 844 return -1; 845 } 846 847 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 848 bdev_io->u.reset.type = reset_type; 849 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 850 851 rc = spdk_bdev_io_submit(bdev_io); 852 if (rc < 0) { 853 spdk_bdev_put_io(bdev_io); 854 SPDK_ERRLOG("reset failed\n"); 855 } 856 857 return rc; 858 } 859 860 int 861 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 862 { 863 struct spdk_bdev_io *child_io, *tmp; 864 865 if (!bdev_io) { 866 SPDK_ERRLOG("bdev_io is NULL\n"); 867 return -1; 868 } 869 870 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 871 SPDK_ERRLOG("bdev_io is in pending state\n"); 872 assert(false); 873 return -1; 874 } 875 876 TAILQ_FOREACH_SAFE(child_io, &bdev_io->child_io, link, tmp) { 877 /* 878 * Make sure no references to the parent I/O remain, since it is being 879 * returned to the free pool. 880 */ 881 child_io->parent = NULL; 882 TAILQ_REMOVE(&bdev_io->child_io, child_io, link); 883 884 /* 885 * Child I/O may have a buf that needs to be returned to a pool 886 * on a different core, so free it through the request submission 887 * process rather than calling put_io directly here. 888 */ 889 spdk_bdev_free_io(child_io); 890 } 891 892 spdk_bdev_put_io(bdev_io); 893 894 return 0; 895 } 896 897 static void 898 bdev_io_deferred_completion(void *arg1, void *arg2) 899 { 900 struct spdk_bdev_io *bdev_io = arg1; 901 enum spdk_bdev_io_status status = (enum spdk_bdev_io_status)arg2; 902 903 assert(bdev_io->in_submit_request == false); 904 905 spdk_bdev_io_complete(bdev_io, status); 906 } 907 908 void 909 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 910 { 911 if (bdev_io->in_submit_request) { 912 /* 913 * Defer completion via an event to avoid potential infinite recursion if the 914 * user's completion callback issues a new I/O. 915 */ 916 spdk_event_call(spdk_event_allocate(spdk_env_get_current_core(), 917 bdev_io_deferred_completion, 918 bdev_io, 919 (void *)status)); 920 return; 921 } 922 923 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 924 /* Successful reset */ 925 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 926 /* Increase the blockdev generation if it is a hard reset */ 927 if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) { 928 bdev_io->bdev->gencnt++; 929 } 930 } 931 } else { 932 /* 933 * Check the gencnt, to see if this I/O was issued before the most 934 * recent reset. If the gencnt is not equal, then just free the I/O 935 * without calling the callback, since the caller will have already 936 * freed its context for this I/O. 937 */ 938 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 939 spdk_bdev_put_io(bdev_io); 940 return; 941 } 942 } 943 944 bdev_io->status = status; 945 946 assert(bdev_io->cb != NULL); 947 bdev_io->cb(bdev_io, status, bdev_io->caller_ctx); 948 } 949 950 void 951 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 952 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 953 { 954 if (sc == SPDK_SCSI_STATUS_GOOD) { 955 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 956 } else { 957 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 958 bdev_io->error.scsi.sc = sc; 959 bdev_io->error.scsi.sk = sk; 960 bdev_io->error.scsi.asc = asc; 961 bdev_io->error.scsi.ascq = ascq; 962 } 963 964 spdk_bdev_io_complete(bdev_io, bdev_io->status); 965 } 966 967 void 968 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 969 int *sc, int *sk, int *asc, int *ascq) 970 { 971 assert(sc != NULL); 972 assert(sk != NULL); 973 assert(asc != NULL); 974 assert(ascq != NULL); 975 976 switch (bdev_io->status) { 977 case SPDK_BDEV_IO_STATUS_SUCCESS: 978 *sc = SPDK_SCSI_STATUS_GOOD; 979 *sk = SPDK_SCSI_SENSE_NO_SENSE; 980 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 981 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 982 break; 983 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 984 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 985 break; 986 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 987 *sc = bdev_io->error.scsi.sc; 988 *sk = bdev_io->error.scsi.sk; 989 *asc = bdev_io->error.scsi.asc; 990 *ascq = bdev_io->error.scsi.ascq; 991 break; 992 default: 993 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 994 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 995 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 996 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 997 break; 998 } 999 } 1000 1001 void 1002 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 1003 { 1004 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 1005 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 1006 } else { 1007 bdev_io->error.nvme.sct = sct; 1008 bdev_io->error.nvme.sc = sc; 1009 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 1010 } 1011 1012 spdk_bdev_io_complete(bdev_io, bdev_io->status); 1013 } 1014 1015 void 1016 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 1017 { 1018 assert(sct != NULL); 1019 assert(sc != NULL); 1020 1021 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 1022 *sct = bdev_io->error.nvme.sct; 1023 *sc = bdev_io->error.nvme.sc; 1024 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 1025 *sct = SPDK_NVME_SCT_GENERIC; 1026 *sc = SPDK_NVME_SC_SUCCESS; 1027 } else { 1028 *sct = SPDK_NVME_SCT_GENERIC; 1029 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 1030 } 1031 } 1032 1033 void 1034 spdk_bdev_register(struct spdk_bdev *bdev) 1035 { 1036 /* initialize the reset generation value to zero */ 1037 bdev->gencnt = 0; 1038 1039 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1040 sizeof(struct spdk_bdev_channel)); 1041 1042 pthread_mutex_init(&bdev->mutex, NULL); 1043 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1044 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 1045 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, link); 1046 } 1047 1048 void 1049 spdk_bdev_unregister(struct spdk_bdev *bdev) 1050 { 1051 int rc; 1052 1053 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1054 1055 pthread_mutex_lock(&bdev->mutex); 1056 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_UNCLAIMED); 1057 if (bdev->status == SPDK_BDEV_STATUS_CLAIMED) { 1058 if (bdev->remove_cb) { 1059 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1060 pthread_mutex_unlock(&bdev->mutex); 1061 bdev->remove_cb(bdev->remove_ctx); 1062 return; 1063 } else { 1064 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1065 } 1066 } 1067 1068 TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, link); 1069 pthread_mutex_unlock(&bdev->mutex); 1070 1071 pthread_mutex_destroy(&bdev->mutex); 1072 1073 spdk_io_device_unregister(bdev); 1074 1075 rc = bdev->fn_table->destruct(bdev->ctxt); 1076 if (rc < 0) { 1077 SPDK_ERRLOG("destruct failed\n"); 1078 } 1079 } 1080 1081 bool 1082 spdk_bdev_claim(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, 1083 void *remove_ctx) 1084 { 1085 bool success; 1086 1087 pthread_mutex_lock(&bdev->mutex); 1088 1089 if (bdev->status != SPDK_BDEV_STATUS_CLAIMED) { 1090 /* Take ownership of bdev. */ 1091 bdev->remove_cb = remove_cb; 1092 bdev->remove_ctx = remove_ctx; 1093 bdev->status = SPDK_BDEV_STATUS_CLAIMED; 1094 success = true; 1095 } else { 1096 /* bdev is already claimed. */ 1097 success = false; 1098 } 1099 1100 pthread_mutex_unlock(&bdev->mutex); 1101 1102 return success; 1103 } 1104 1105 void 1106 spdk_bdev_unclaim(struct spdk_bdev *bdev) 1107 { 1108 bool do_unregister = false; 1109 1110 pthread_mutex_lock(&bdev->mutex); 1111 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_REMOVING); 1112 if (bdev->status == SPDK_BDEV_STATUS_REMOVING) { 1113 do_unregister = true; 1114 } 1115 bdev->remove_cb = NULL; 1116 bdev->remove_ctx = NULL; 1117 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1118 pthread_mutex_unlock(&bdev->mutex); 1119 1120 if (do_unregister == true) { 1121 spdk_bdev_unregister(bdev); 1122 } 1123 } 1124 1125 void 1126 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1127 { 1128 struct iovec *iovs; 1129 int iovcnt; 1130 1131 if (bdev_io == NULL) { 1132 return; 1133 } 1134 1135 switch (bdev_io->type) { 1136 case SPDK_BDEV_IO_TYPE_READ: 1137 iovs = bdev_io->u.read.iovs; 1138 iovcnt = bdev_io->u.read.iovcnt; 1139 break; 1140 case SPDK_BDEV_IO_TYPE_WRITE: 1141 iovs = bdev_io->u.write.iovs; 1142 iovcnt = bdev_io->u.write.iovcnt; 1143 break; 1144 default: 1145 iovs = NULL; 1146 iovcnt = 0; 1147 break; 1148 } 1149 1150 if (iovp) { 1151 *iovp = iovs; 1152 } 1153 if (iovcntp) { 1154 *iovcntp = iovcnt; 1155 } 1156 } 1157 1158 void 1159 spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1160 { 1161 TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, tailq); 1162 } 1163 1164 void 1165 spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module) 1166 { 1167 TAILQ_INSERT_TAIL(&g_bdev_mgr.vbdev_modules, vbdev_module, tailq); 1168 } 1169 SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text) 1170 SPDK_SUBSYSTEM_DEPEND(bdev, copy) 1171