1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include <rte_config.h> 40 #include <rte_mempool.h> 41 #include <rte_version.h> 42 43 #include "spdk/env.h" 44 #include "spdk/io_channel.h" 45 #include "spdk/queue.h" 46 #include "spdk/nvme_spec.h" 47 #include "spdk/scsi_spec.h" 48 49 #include "spdk_internal/bdev.h" 50 #include "spdk_internal/event.h" 51 #include "spdk_internal/log.h" 52 53 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 54 #define RBUF_SMALL_POOL_SIZE 8192 55 #define RBUF_LARGE_POOL_SIZE 1024 56 57 static struct rte_mempool *spdk_bdev_g_io_pool = NULL; 58 static struct rte_mempool *g_rbuf_small_pool = NULL; 59 static struct rte_mempool *g_rbuf_large_pool = NULL; 60 61 typedef TAILQ_HEAD(, spdk_bdev_io) need_rbuf_tailq_t; 62 static need_rbuf_tailq_t g_need_rbuf_small[RTE_MAX_LCORE]; 63 static need_rbuf_tailq_t g_need_rbuf_large[RTE_MAX_LCORE]; 64 65 static TAILQ_HEAD(, spdk_bdev_module_if) spdk_bdev_module_list = 66 TAILQ_HEAD_INITIALIZER(spdk_bdev_module_list); 67 static TAILQ_HEAD(, spdk_bdev_module_if) spdk_vbdev_module_list = 68 TAILQ_HEAD_INITIALIZER(spdk_vbdev_module_list); 69 70 static TAILQ_HEAD(, spdk_bdev) spdk_bdev_list = 71 TAILQ_HEAD_INITIALIZER(spdk_bdev_list); 72 73 struct spdk_bdev_channel { 74 struct spdk_bdev *bdev; 75 76 /* The channel for the underlying device */ 77 struct spdk_io_channel *channel; 78 }; 79 80 struct spdk_bdev *spdk_bdev_first(void) 81 { 82 struct spdk_bdev *bdev; 83 84 bdev = TAILQ_FIRST(&spdk_bdev_list); 85 if (bdev) { 86 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 87 } 88 89 return bdev; 90 } 91 92 struct spdk_bdev *spdk_bdev_next(struct spdk_bdev *prev) 93 { 94 struct spdk_bdev *bdev; 95 96 bdev = TAILQ_NEXT(prev, link); 97 if (bdev) { 98 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 99 } 100 101 return bdev; 102 } 103 104 struct spdk_bdev *spdk_bdev_get_by_name(const char *bdev_name) 105 { 106 struct spdk_bdev *bdev = spdk_bdev_first(); 107 108 while (bdev != NULL) { 109 if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) { 110 return bdev; 111 } 112 bdev = spdk_bdev_next(bdev); 113 } 114 115 return NULL; 116 } 117 118 static void 119 spdk_bdev_io_set_rbuf(struct spdk_bdev_io *bdev_io, void *buf) 120 { 121 assert(bdev_io->get_rbuf_cb != NULL); 122 assert(buf != NULL); 123 assert(bdev_io->u.read.iovs != NULL); 124 125 bdev_io->u.read.buf_unaligned = buf; 126 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 127 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 128 bdev_io->u.read.put_rbuf = true; 129 bdev_io->get_rbuf_cb(bdev_io); 130 } 131 132 static void 133 spdk_bdev_io_put_rbuf(struct spdk_bdev_io *bdev_io) 134 { 135 struct rte_mempool *pool; 136 struct spdk_bdev_io *tmp; 137 void *buf; 138 need_rbuf_tailq_t *tailq; 139 uint64_t length; 140 141 assert(bdev_io->u.read.iovcnt == 1); 142 143 length = bdev_io->u.read.len; 144 buf = bdev_io->u.read.buf_unaligned; 145 146 if (length <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) { 147 pool = g_rbuf_small_pool; 148 tailq = &g_need_rbuf_small[rte_lcore_id()]; 149 } else { 150 pool = g_rbuf_large_pool; 151 tailq = &g_need_rbuf_large[rte_lcore_id()]; 152 } 153 154 if (TAILQ_EMPTY(tailq)) { 155 rte_mempool_put(pool, buf); 156 } else { 157 tmp = TAILQ_FIRST(tailq); 158 TAILQ_REMOVE(tailq, tmp, rbuf_link); 159 spdk_bdev_io_set_rbuf(tmp, buf); 160 } 161 } 162 163 static int spdk_initialize_rbuf_pool(void) 164 { 165 int cache_size; 166 167 /** 168 * Ensure no more than half of the total buffers end up local caches, by 169 * using spdk_event_get_active_core_count() to determine how many local caches we need 170 * to account for. 171 */ 172 cache_size = RBUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 173 if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) 174 cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; 175 g_rbuf_small_pool = rte_mempool_create("rbuf_small_pool", 176 RBUF_SMALL_POOL_SIZE, 177 SPDK_BDEV_SMALL_RBUF_MAX_SIZE + 512, 178 cache_size, 0, NULL, NULL, NULL, NULL, 179 SOCKET_ID_ANY, 0); 180 if (!g_rbuf_small_pool) { 181 SPDK_ERRLOG("create rbuf small pool failed\n"); 182 return -1; 183 } 184 185 cache_size = RBUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 186 if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE) 187 cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; 188 g_rbuf_large_pool = rte_mempool_create("rbuf_large_pool", 189 RBUF_LARGE_POOL_SIZE, 190 SPDK_BDEV_LARGE_RBUF_MAX_SIZE + 512, 191 cache_size, 0, NULL, NULL, NULL, NULL, 192 SOCKET_ID_ANY, 0); 193 if (!g_rbuf_large_pool) { 194 SPDK_ERRLOG("create rbuf large pool failed\n"); 195 return -1; 196 } 197 198 return 0; 199 } 200 201 static int 202 spdk_bdev_module_get_max_ctx_size(void) 203 { 204 struct spdk_bdev_module_if *bdev_module; 205 int max_bdev_module_size = 0; 206 207 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 208 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 209 max_bdev_module_size = bdev_module->get_ctx_size(); 210 } 211 } 212 213 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 214 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 215 max_bdev_module_size = bdev_module->get_ctx_size(); 216 } 217 } 218 219 return max_bdev_module_size; 220 } 221 222 static int 223 spdk_bdev_module_initialize(void) 224 { 225 struct spdk_bdev_module_if *bdev_module; 226 int rc = 0; 227 228 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 229 rc = bdev_module->module_init(); 230 if (rc) 231 return rc; 232 } 233 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 234 rc = bdev_module->module_init(); 235 if (rc) 236 return rc; 237 } 238 return rc; 239 } 240 241 static void 242 spdk_bdev_module_finish(void) 243 { 244 struct spdk_bdev_module_if *bdev_module; 245 246 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 247 if (bdev_module->module_fini) { 248 bdev_module->module_fini(); 249 } 250 } 251 252 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 253 if (bdev_module->module_fini) { 254 bdev_module->module_fini(); 255 } 256 } 257 } 258 259 static void 260 spdk_bdev_config_text(FILE *fp) 261 { 262 struct spdk_bdev_module_if *bdev_module; 263 264 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 265 if (bdev_module->config_text) { 266 bdev_module->config_text(fp); 267 } 268 } 269 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 270 if (bdev_module->config_text) { 271 bdev_module->config_text(fp); 272 } 273 } 274 } 275 276 static int 277 spdk_bdev_initialize(void) 278 { 279 int i; 280 281 if (spdk_bdev_module_initialize()) { 282 SPDK_ERRLOG("bdev module initialize failed"); 283 return -1; 284 } 285 286 spdk_bdev_g_io_pool = rte_mempool_create("blockdev_io", 287 SPDK_BDEV_IO_POOL_SIZE, 288 sizeof(struct spdk_bdev_io) + 289 spdk_bdev_module_get_max_ctx_size(), 290 64, 0, 291 NULL, NULL, NULL, NULL, 292 SOCKET_ID_ANY, 0); 293 294 if (spdk_bdev_g_io_pool == NULL) { 295 SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); 296 return -1; 297 } 298 299 for (i = 0; i < RTE_MAX_LCORE; i++) { 300 TAILQ_INIT(&g_need_rbuf_small[i]); 301 TAILQ_INIT(&g_need_rbuf_large[i]); 302 } 303 304 return spdk_initialize_rbuf_pool(); 305 } 306 307 /* 308 * Wrapper to provide rte_mempool_avail_count() on older DPDK versions. 309 * Drop this if the minimum DPDK version is raised to at least 16.07. 310 */ 311 #if RTE_VERSION < RTE_VERSION_NUM(16, 7, 0, 1) 312 static unsigned rte_mempool_avail_count(const struct rte_mempool *pool) 313 { 314 return rte_mempool_count(pool); 315 } 316 #endif 317 318 static int 319 spdk_bdev_check_pool(struct rte_mempool *pool, uint32_t count) 320 { 321 if (rte_mempool_avail_count(pool) != count) { 322 SPDK_ERRLOG("rte_mempool_avail_count(%s) == %d, should be %d\n", 323 pool->name, rte_mempool_avail_count(pool), count); 324 return -1; 325 } else { 326 return 0; 327 } 328 } 329 330 static int 331 spdk_bdev_finish(void) 332 { 333 int rc = 0; 334 335 spdk_bdev_module_finish(); 336 337 rc += spdk_bdev_check_pool(g_rbuf_small_pool, RBUF_SMALL_POOL_SIZE); 338 rc += spdk_bdev_check_pool(g_rbuf_large_pool, RBUF_LARGE_POOL_SIZE); 339 340 return (rc != 0); 341 } 342 343 struct spdk_bdev_io *spdk_bdev_get_io(void) 344 { 345 struct spdk_bdev_io *bdev_io; 346 int rc; 347 348 rc = rte_mempool_get(spdk_bdev_g_io_pool, (void **)&bdev_io); 349 if (rc < 0 || !bdev_io) { 350 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 351 abort(); 352 } 353 354 memset(bdev_io, 0, sizeof(*bdev_io)); 355 356 return bdev_io; 357 } 358 359 static void 360 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 361 { 362 if (!bdev_io) { 363 return; 364 } 365 366 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && bdev_io->u.read.put_rbuf) { 367 spdk_bdev_io_put_rbuf(bdev_io); 368 } 369 370 rte_mempool_put(spdk_bdev_g_io_pool, bdev_io); 371 } 372 373 static void 374 _spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io) 375 { 376 uint64_t len = bdev_io->u.read.len; 377 struct rte_mempool *pool; 378 need_rbuf_tailq_t *tailq; 379 int rc; 380 void *buf = NULL; 381 382 if (len <= SPDK_BDEV_SMALL_RBUF_MAX_SIZE) { 383 pool = g_rbuf_small_pool; 384 tailq = &g_need_rbuf_small[rte_lcore_id()]; 385 } else { 386 pool = g_rbuf_large_pool; 387 tailq = &g_need_rbuf_large[rte_lcore_id()]; 388 } 389 390 rc = rte_mempool_get(pool, (void **)&buf); 391 if (rc < 0 || !buf) { 392 TAILQ_INSERT_TAIL(tailq, bdev_io, rbuf_link); 393 } else { 394 spdk_bdev_io_set_rbuf(bdev_io, buf); 395 } 396 } 397 398 399 static void 400 spdk_bdev_cleanup_pending_rbuf_io(struct spdk_bdev *bdev) 401 { 402 struct spdk_bdev_io *bdev_io, *tmp; 403 404 TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_small[rte_lcore_id()], rbuf_link, tmp) { 405 if (bdev_io->bdev == bdev) { 406 TAILQ_REMOVE(&g_need_rbuf_small[rte_lcore_id()], bdev_io, rbuf_link); 407 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 408 } 409 } 410 411 TAILQ_FOREACH_SAFE(bdev_io, &g_need_rbuf_large[rte_lcore_id()], rbuf_link, tmp) { 412 if (bdev_io->bdev == bdev) { 413 TAILQ_REMOVE(&g_need_rbuf_large[rte_lcore_id()], bdev_io, rbuf_link); 414 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 415 } 416 } 417 } 418 419 static void 420 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 421 { 422 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 423 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 424 spdk_bdev_cleanup_pending_rbuf_io(bdev); 425 } 426 bdev_io->in_submit_request = true; 427 bdev->fn_table->submit_request(bdev_io); 428 bdev_io->in_submit_request = false; 429 } 430 431 static int 432 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 433 { 434 struct spdk_bdev *bdev = bdev_io->bdev; 435 436 __submit_request(bdev, bdev_io); 437 return 0; 438 } 439 440 void 441 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev *new_bdev) 442 { 443 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 444 bdev_io->bdev = new_bdev; 445 446 /* 447 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 448 * being switched, they need to be reinitialized. 449 */ 450 bdev_io->gencnt = new_bdev->gencnt; 451 bdev_io->ctx = new_bdev->ctxt; 452 453 __submit_request(new_bdev, bdev_io); 454 } 455 456 static void 457 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 458 struct spdk_bdev *bdev, void *cb_arg, 459 spdk_bdev_io_completion_cb cb) 460 { 461 bdev_io->bdev = bdev; 462 bdev_io->ctx = bdev->ctxt; 463 bdev_io->caller_ctx = cb_arg; 464 bdev_io->cb = cb; 465 bdev_io->gencnt = bdev->gencnt; 466 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 467 bdev_io->in_submit_request = false; 468 TAILQ_INIT(&bdev_io->child_io); 469 } 470 471 struct spdk_bdev_io * 472 spdk_bdev_get_child_io(struct spdk_bdev_io *parent, 473 struct spdk_bdev *bdev, 474 spdk_bdev_io_completion_cb cb, 475 void *cb_arg) 476 { 477 struct spdk_bdev_io *child; 478 479 child = spdk_bdev_get_io(); 480 if (!child) { 481 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 482 return NULL; 483 } 484 485 if (cb_arg == NULL) { 486 cb_arg = child; 487 } 488 489 spdk_bdev_io_init(child, bdev, cb_arg, cb); 490 491 child->type = parent->type; 492 memcpy(&child->u, &parent->u, sizeof(child->u)); 493 if (child->type == SPDK_BDEV_IO_TYPE_READ) { 494 child->u.read.put_rbuf = false; 495 } 496 child->get_rbuf_cb = NULL; 497 child->parent = parent; 498 499 TAILQ_INSERT_TAIL(&parent->child_io, child, link); 500 501 return child; 502 } 503 504 bool 505 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 506 { 507 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 508 } 509 510 int 511 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 512 { 513 if (bdev->fn_table->dump_config_json) { 514 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 515 } 516 517 return 0; 518 } 519 520 static int 521 spdk_bdev_channel_create(void *io_device, uint32_t priority, void *ctx_buf, 522 void *unique_ctx) 523 { 524 struct spdk_bdev *bdev = io_device; 525 struct spdk_bdev_channel *ch = ctx_buf; 526 527 ch->bdev = io_device; 528 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt, priority); 529 530 return 0; 531 } 532 533 static void 534 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 535 { 536 struct spdk_bdev_channel *ch = ctx_buf; 537 538 spdk_put_io_channel(ch->channel); 539 } 540 541 struct spdk_io_channel * 542 spdk_bdev_get_io_channel(struct spdk_bdev *bdev, uint32_t priority) 543 { 544 return spdk_get_io_channel(bdev, priority, false, NULL); 545 } 546 547 static int 548 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 549 { 550 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 551 if (nbytes % bdev->blocklen) { 552 return -1; 553 } 554 555 /* Return failure if offset + nbytes is less than offset; indicates there 556 * has been an overflow and hence the offset has been wrapped around */ 557 if (offset + nbytes < offset) { 558 return -1; 559 } 560 561 /* Return failure if offset + nbytes exceeds the size of the blockdev */ 562 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 563 return -1; 564 } 565 566 return 0; 567 } 568 569 struct spdk_bdev_io * 570 spdk_bdev_read(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 571 void *buf, uint64_t offset, uint64_t nbytes, 572 spdk_bdev_io_completion_cb cb, void *cb_arg) 573 { 574 struct spdk_bdev_io *bdev_io; 575 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 576 int rc; 577 578 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 579 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 580 return NULL; 581 } 582 583 bdev_io = spdk_bdev_get_io(); 584 if (!bdev_io) { 585 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 586 return NULL; 587 } 588 589 bdev_io->ch = channel->channel; 590 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 591 bdev_io->u.read.iov.iov_base = buf; 592 bdev_io->u.read.iov.iov_len = nbytes; 593 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 594 bdev_io->u.read.iovcnt = 1; 595 bdev_io->u.read.len = nbytes; 596 bdev_io->u.read.offset = offset; 597 bdev_io->u.read.put_rbuf = false; 598 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 599 600 rc = spdk_bdev_io_submit(bdev_io); 601 if (rc < 0) { 602 spdk_bdev_put_io(bdev_io); 603 return NULL; 604 } 605 606 return bdev_io; 607 } 608 609 struct spdk_bdev_io * 610 spdk_bdev_readv(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 611 struct iovec *iov, int iovcnt, 612 uint64_t offset, uint64_t nbytes, 613 spdk_bdev_io_completion_cb cb, void *cb_arg) 614 { 615 struct spdk_bdev_io *bdev_io; 616 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 617 int rc; 618 619 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 620 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 621 return NULL; 622 } 623 624 bdev_io = spdk_bdev_get_io(); 625 if (!bdev_io) { 626 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 627 return NULL; 628 } 629 630 bdev_io->ch = channel->channel; 631 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 632 bdev_io->u.read.iovs = iov; 633 bdev_io->u.read.iovcnt = iovcnt; 634 bdev_io->u.read.len = nbytes; 635 bdev_io->u.read.offset = offset; 636 bdev_io->u.read.put_rbuf = false; 637 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 638 639 rc = spdk_bdev_io_submit(bdev_io); 640 if (rc < 0) { 641 spdk_bdev_put_io(bdev_io); 642 return NULL; 643 } 644 645 return bdev_io; 646 } 647 648 struct spdk_bdev_io * 649 spdk_bdev_write(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 650 void *buf, uint64_t offset, uint64_t nbytes, 651 spdk_bdev_io_completion_cb cb, void *cb_arg) 652 { 653 struct spdk_bdev_io *bdev_io; 654 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 655 int rc; 656 657 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 658 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 659 return NULL; 660 } 661 662 bdev_io = spdk_bdev_get_io(); 663 if (!bdev_io) { 664 SPDK_ERRLOG("blockdev_io memory allocation failed duing write\n"); 665 return NULL; 666 } 667 668 bdev_io->ch = channel->channel; 669 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 670 bdev_io->u.write.iov.iov_base = buf; 671 bdev_io->u.write.iov.iov_len = nbytes; 672 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 673 bdev_io->u.write.iovcnt = 1; 674 bdev_io->u.write.len = nbytes; 675 bdev_io->u.write.offset = offset; 676 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 677 678 rc = spdk_bdev_io_submit(bdev_io); 679 if (rc < 0) { 680 spdk_bdev_put_io(bdev_io); 681 return NULL; 682 } 683 684 return bdev_io; 685 } 686 687 struct spdk_bdev_io * 688 spdk_bdev_writev(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 689 struct iovec *iov, int iovcnt, 690 uint64_t offset, uint64_t len, 691 spdk_bdev_io_completion_cb cb, void *cb_arg) 692 { 693 struct spdk_bdev_io *bdev_io; 694 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 695 int rc; 696 697 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 698 if (spdk_bdev_io_valid(bdev, offset, len) != 0) { 699 return NULL; 700 } 701 702 bdev_io = spdk_bdev_get_io(); 703 if (!bdev_io) { 704 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 705 return NULL; 706 } 707 708 bdev_io->ch = channel->channel; 709 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 710 bdev_io->u.write.iovs = iov; 711 bdev_io->u.write.iovcnt = iovcnt; 712 bdev_io->u.write.len = len; 713 bdev_io->u.write.offset = offset; 714 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 715 716 rc = spdk_bdev_io_submit(bdev_io); 717 if (rc < 0) { 718 spdk_bdev_put_io(bdev_io); 719 return NULL; 720 } 721 722 return bdev_io; 723 } 724 725 struct spdk_bdev_io * 726 spdk_bdev_unmap(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 727 struct spdk_scsi_unmap_bdesc *unmap_d, 728 uint16_t bdesc_count, 729 spdk_bdev_io_completion_cb cb, void *cb_arg) 730 { 731 struct spdk_bdev_io *bdev_io; 732 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 733 int rc; 734 735 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 736 if (bdesc_count == 0) { 737 SPDK_ERRLOG("Invalid bdesc_count 0\n"); 738 return NULL; 739 } 740 741 if (bdesc_count > bdev->max_unmap_bdesc_count) { 742 SPDK_ERRLOG("Invalid bdesc_count %u > max_unmap_bdesc_count %u\n", 743 bdesc_count, bdev->max_unmap_bdesc_count); 744 return NULL; 745 } 746 747 bdev_io = spdk_bdev_get_io(); 748 if (!bdev_io) { 749 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 750 return NULL; 751 } 752 753 bdev_io->ch = channel->channel; 754 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 755 bdev_io->u.unmap.unmap_bdesc = unmap_d; 756 bdev_io->u.unmap.bdesc_count = bdesc_count; 757 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 758 759 rc = spdk_bdev_io_submit(bdev_io); 760 if (rc < 0) { 761 spdk_bdev_put_io(bdev_io); 762 return NULL; 763 } 764 765 return bdev_io; 766 } 767 768 struct spdk_bdev_io * 769 spdk_bdev_flush(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 770 uint64_t offset, uint64_t length, 771 spdk_bdev_io_completion_cb cb, void *cb_arg) 772 { 773 struct spdk_bdev_io *bdev_io; 774 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 775 int rc; 776 777 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 778 bdev_io = spdk_bdev_get_io(); 779 if (!bdev_io) { 780 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 781 return NULL; 782 } 783 784 bdev_io->ch = channel->channel; 785 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 786 bdev_io->u.flush.offset = offset; 787 bdev_io->u.flush.length = length; 788 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 789 790 rc = spdk_bdev_io_submit(bdev_io); 791 if (rc < 0) { 792 spdk_bdev_put_io(bdev_io); 793 return NULL; 794 } 795 796 return bdev_io; 797 } 798 799 int 800 spdk_bdev_reset(struct spdk_bdev *bdev, enum spdk_bdev_reset_type reset_type, 801 spdk_bdev_io_completion_cb cb, void *cb_arg) 802 { 803 struct spdk_bdev_io *bdev_io; 804 int rc; 805 806 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 807 bdev_io = spdk_bdev_get_io(); 808 if (!bdev_io) { 809 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 810 return -1; 811 } 812 813 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 814 bdev_io->u.reset.type = reset_type; 815 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 816 817 rc = spdk_bdev_io_submit(bdev_io); 818 if (rc < 0) { 819 spdk_bdev_put_io(bdev_io); 820 SPDK_ERRLOG("reset failed\n"); 821 } 822 823 return rc; 824 } 825 826 int 827 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 828 { 829 struct spdk_bdev_io *child_io, *tmp; 830 831 if (!bdev_io) { 832 SPDK_ERRLOG("bdev_io is NULL\n"); 833 return -1; 834 } 835 836 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 837 SPDK_ERRLOG("bdev_io is in pending state\n"); 838 assert(false); 839 return -1; 840 } 841 842 TAILQ_FOREACH_SAFE(child_io, &bdev_io->child_io, link, tmp) { 843 /* 844 * Make sure no references to the parent I/O remain, since it is being 845 * returned to the free pool. 846 */ 847 child_io->parent = NULL; 848 TAILQ_REMOVE(&bdev_io->child_io, child_io, link); 849 850 /* 851 * Child I/O may have an rbuf that needs to be returned to a pool 852 * on a different core, so free it through the request submission 853 * process rather than calling put_io directly here. 854 */ 855 spdk_bdev_free_io(child_io); 856 } 857 858 spdk_bdev_put_io(bdev_io); 859 860 return 0; 861 } 862 863 static void 864 bdev_io_deferred_completion(void *arg1, void *arg2) 865 { 866 struct spdk_bdev_io *bdev_io = arg1; 867 enum spdk_bdev_io_status status = (enum spdk_bdev_io_status)arg2; 868 869 assert(bdev_io->in_submit_request == false); 870 871 spdk_bdev_io_complete(bdev_io, status); 872 } 873 874 void 875 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 876 { 877 if (bdev_io->in_submit_request) { 878 /* 879 * Defer completion via an event to avoid potential infinite recursion if the 880 * user's completion callback issues a new I/O. 881 */ 882 spdk_event_call(spdk_event_allocate(spdk_env_get_current_core(), 883 bdev_io_deferred_completion, 884 bdev_io, 885 (void *)status)); 886 return; 887 } 888 889 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 890 /* Successful reset */ 891 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 892 /* Increase the blockdev generation if it is a hard reset */ 893 if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) { 894 bdev_io->bdev->gencnt++; 895 } 896 } 897 } else { 898 /* 899 * Check the gencnt, to see if this I/O was issued before the most 900 * recent reset. If the gencnt is not equal, then just free the I/O 901 * without calling the callback, since the caller will have already 902 * freed its context for this I/O. 903 */ 904 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 905 spdk_bdev_put_io(bdev_io); 906 return; 907 } 908 } 909 910 bdev_io->status = status; 911 912 assert(bdev_io->cb != NULL); 913 bdev_io->cb(bdev_io, status, bdev_io->caller_ctx); 914 } 915 916 void 917 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 918 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 919 { 920 if (sc == SPDK_SCSI_STATUS_GOOD) { 921 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 922 } else { 923 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 924 bdev_io->error.scsi.sc = sc; 925 bdev_io->error.scsi.sk = sk; 926 bdev_io->error.scsi.asc = asc; 927 bdev_io->error.scsi.ascq = ascq; 928 } 929 930 spdk_bdev_io_complete(bdev_io, bdev_io->status); 931 } 932 933 void 934 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 935 int *sc, int *sk, int *asc, int *ascq) 936 { 937 assert(sc != NULL); 938 assert(sk != NULL); 939 assert(asc != NULL); 940 assert(ascq != NULL); 941 942 switch (bdev_io->status) { 943 case SPDK_BDEV_IO_STATUS_SUCCESS: 944 *sc = SPDK_SCSI_STATUS_GOOD; 945 *sk = SPDK_SCSI_SENSE_NO_SENSE; 946 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 947 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 948 break; 949 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 950 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 951 break; 952 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 953 *sc = bdev_io->error.scsi.sc; 954 *sk = bdev_io->error.scsi.sk; 955 *asc = bdev_io->error.scsi.asc; 956 *ascq = bdev_io->error.scsi.ascq; 957 break; 958 default: 959 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 960 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 961 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 962 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 963 break; 964 } 965 } 966 967 void 968 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 969 { 970 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 971 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 972 } else { 973 bdev_io->error.nvme.sct = sct; 974 bdev_io->error.nvme.sc = sc; 975 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 976 } 977 978 spdk_bdev_io_complete(bdev_io, bdev_io->status); 979 } 980 981 void 982 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 983 { 984 assert(sct != NULL); 985 assert(sc != NULL); 986 987 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 988 *sct = bdev_io->error.nvme.sct; 989 *sc = bdev_io->error.nvme.sc; 990 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 991 *sct = SPDK_NVME_SCT_GENERIC; 992 *sc = SPDK_NVME_SC_SUCCESS; 993 } else { 994 *sct = SPDK_NVME_SCT_GENERIC; 995 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 996 } 997 } 998 999 void 1000 spdk_bdev_register(struct spdk_bdev *bdev) 1001 { 1002 /* initialize the reset generation value to zero */ 1003 bdev->gencnt = 0; 1004 1005 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 1006 sizeof(struct spdk_bdev_channel)); 1007 1008 pthread_mutex_init(&bdev->mutex, NULL); 1009 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1010 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 1011 TAILQ_INSERT_TAIL(&spdk_bdev_list, bdev, link); 1012 } 1013 1014 void 1015 spdk_bdev_unregister(struct spdk_bdev *bdev) 1016 { 1017 int rc; 1018 1019 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1020 1021 pthread_mutex_lock(&bdev->mutex); 1022 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_UNCLAIMED); 1023 if (bdev->status == SPDK_BDEV_STATUS_CLAIMED) { 1024 if (bdev->remove_cb) { 1025 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1026 pthread_mutex_unlock(&bdev->mutex); 1027 bdev->remove_cb(bdev->remove_ctx); 1028 return; 1029 } else { 1030 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1031 } 1032 } 1033 1034 TAILQ_REMOVE(&spdk_bdev_list, bdev, link); 1035 pthread_mutex_unlock(&bdev->mutex); 1036 1037 pthread_mutex_destroy(&bdev->mutex); 1038 1039 spdk_io_device_unregister(bdev); 1040 1041 rc = bdev->fn_table->destruct(bdev->ctxt); 1042 if (rc < 0) { 1043 SPDK_ERRLOG("destruct failed\n"); 1044 } 1045 } 1046 1047 bool 1048 spdk_bdev_claim(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, 1049 void *remove_ctx) 1050 { 1051 bool success; 1052 1053 pthread_mutex_lock(&bdev->mutex); 1054 1055 if (bdev->status != SPDK_BDEV_STATUS_CLAIMED) { 1056 /* Take ownership of bdev. */ 1057 bdev->remove_cb = remove_cb; 1058 bdev->remove_ctx = remove_ctx; 1059 bdev->status = SPDK_BDEV_STATUS_CLAIMED; 1060 success = true; 1061 } else { 1062 /* bdev is already claimed. */ 1063 success = false; 1064 } 1065 1066 pthread_mutex_unlock(&bdev->mutex); 1067 1068 return success; 1069 } 1070 1071 void 1072 spdk_bdev_unclaim(struct spdk_bdev *bdev) 1073 { 1074 bool do_unregister = false; 1075 1076 pthread_mutex_lock(&bdev->mutex); 1077 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_REMOVING); 1078 if (bdev->status == SPDK_BDEV_STATUS_REMOVING) { 1079 do_unregister = true; 1080 } 1081 bdev->remove_cb = NULL; 1082 bdev->remove_ctx = NULL; 1083 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1084 pthread_mutex_unlock(&bdev->mutex); 1085 1086 if (do_unregister == true) { 1087 spdk_bdev_unregister(bdev); 1088 } 1089 } 1090 1091 void 1092 spdk_bdev_io_get_rbuf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_rbuf_cb cb) 1093 { 1094 assert(cb != NULL); 1095 assert(bdev_io->u.read.iovs != NULL); 1096 1097 if (bdev_io->u.read.iovs[0].iov_base == NULL) { 1098 bdev_io->get_rbuf_cb = cb; 1099 _spdk_bdev_io_get_rbuf(bdev_io); 1100 } else { 1101 cb(bdev_io); 1102 } 1103 } 1104 1105 void 1106 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1107 { 1108 struct iovec *iovs; 1109 int iovcnt; 1110 1111 if (bdev_io == NULL) { 1112 return; 1113 } 1114 1115 switch (bdev_io->type) { 1116 case SPDK_BDEV_IO_TYPE_READ: 1117 iovs = bdev_io->u.read.iovs; 1118 iovcnt = bdev_io->u.read.iovcnt; 1119 break; 1120 case SPDK_BDEV_IO_TYPE_WRITE: 1121 iovs = bdev_io->u.write.iovs; 1122 iovcnt = bdev_io->u.write.iovcnt; 1123 break; 1124 default: 1125 iovs = NULL; 1126 iovcnt = 0; 1127 break; 1128 } 1129 1130 if (iovp) { 1131 *iovp = iovs; 1132 } 1133 if (iovcntp) { 1134 *iovcntp = iovcnt; 1135 } 1136 } 1137 1138 void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1139 { 1140 TAILQ_INSERT_TAIL(&spdk_bdev_module_list, bdev_module, tailq); 1141 } 1142 1143 void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module) 1144 { 1145 TAILQ_INSERT_TAIL(&spdk_vbdev_module_list, vbdev_module, tailq); 1146 } 1147 SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text) 1148 SPDK_SUBSYSTEM_DEPEND(bdev, copy) 1149