1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (C) 2008-2012 Daisuke Aoyama <aoyama@peach.ne.jp>. 5 * Copyright (c) Intel Corporation. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * * Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * * Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * * Neither the name of Intel Corporation nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include "spdk/stdinc.h" 36 37 #include "spdk/bdev.h" 38 39 #include <rte_config.h> 40 #include <rte_lcore.h> 41 #include "spdk/env.h" 42 #include "spdk/io_channel.h" 43 #include "spdk/queue.h" 44 #include "spdk/nvme_spec.h" 45 #include "spdk/scsi_spec.h" 46 47 #include "spdk_internal/bdev.h" 48 #include "spdk_internal/event.h" 49 #include "spdk_internal/log.h" 50 51 #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024) 52 #define BUF_SMALL_POOL_SIZE 8192 53 #define BUF_LARGE_POOL_SIZE 1024 54 55 static struct spdk_mempool *spdk_bdev_g_io_pool = NULL; 56 static struct spdk_mempool *g_buf_small_pool = NULL; 57 static struct spdk_mempool *g_buf_large_pool = NULL; 58 59 typedef TAILQ_HEAD(, spdk_bdev_io) need_buf_tailq_t; 60 static need_buf_tailq_t g_need_buf_small[RTE_MAX_LCORE]; 61 static need_buf_tailq_t g_need_buf_large[RTE_MAX_LCORE]; 62 63 static TAILQ_HEAD(, spdk_bdev_module_if) spdk_bdev_module_list = 64 TAILQ_HEAD_INITIALIZER(spdk_bdev_module_list); 65 static TAILQ_HEAD(, spdk_bdev_module_if) spdk_vbdev_module_list = 66 TAILQ_HEAD_INITIALIZER(spdk_vbdev_module_list); 67 68 static TAILQ_HEAD(, spdk_bdev) spdk_bdev_list = 69 TAILQ_HEAD_INITIALIZER(spdk_bdev_list); 70 71 struct spdk_bdev_channel { 72 struct spdk_bdev *bdev; 73 74 /* The channel for the underlying device */ 75 struct spdk_io_channel *channel; 76 }; 77 78 struct spdk_bdev *spdk_bdev_first(void) 79 { 80 struct spdk_bdev *bdev; 81 82 bdev = TAILQ_FIRST(&spdk_bdev_list); 83 if (bdev) { 84 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Starting bdev iteration at %s\n", bdev->name); 85 } 86 87 return bdev; 88 } 89 90 struct spdk_bdev *spdk_bdev_next(struct spdk_bdev *prev) 91 { 92 struct spdk_bdev *bdev; 93 94 bdev = TAILQ_NEXT(prev, link); 95 if (bdev) { 96 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Continuing bdev iteration at %s\n", bdev->name); 97 } 98 99 return bdev; 100 } 101 102 struct spdk_bdev *spdk_bdev_get_by_name(const char *bdev_name) 103 { 104 struct spdk_bdev *bdev = spdk_bdev_first(); 105 106 while (bdev != NULL) { 107 if (strncmp(bdev_name, bdev->name, sizeof(bdev->name)) == 0) { 108 return bdev; 109 } 110 bdev = spdk_bdev_next(bdev); 111 } 112 113 return NULL; 114 } 115 116 static void 117 spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf) 118 { 119 assert(bdev_io->get_buf_cb != NULL); 120 assert(buf != NULL); 121 assert(bdev_io->u.read.iovs != NULL); 122 123 bdev_io->u.read.buf_unaligned = buf; 124 bdev_io->u.read.iovs[0].iov_base = (void *)((unsigned long)((char *)buf + 512) & ~511UL); 125 bdev_io->u.read.iovs[0].iov_len = bdev_io->u.read.len; 126 bdev_io->u.read.put_buf = true; 127 bdev_io->get_buf_cb(bdev_io->ch->channel, bdev_io); 128 } 129 130 static void 131 spdk_bdev_io_put_buf(struct spdk_bdev_io *bdev_io) 132 { 133 struct spdk_mempool *pool; 134 struct spdk_bdev_io *tmp; 135 void *buf; 136 need_buf_tailq_t *tailq; 137 uint64_t length; 138 139 assert(bdev_io->u.read.iovcnt == 1); 140 141 length = bdev_io->u.read.len; 142 buf = bdev_io->u.read.buf_unaligned; 143 144 if (length <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 145 pool = g_buf_small_pool; 146 tailq = &g_need_buf_small[rte_lcore_id()]; 147 } else { 148 pool = g_buf_large_pool; 149 tailq = &g_need_buf_large[rte_lcore_id()]; 150 } 151 152 if (TAILQ_EMPTY(tailq)) { 153 spdk_mempool_put(pool, buf); 154 } else { 155 tmp = TAILQ_FIRST(tailq); 156 TAILQ_REMOVE(tailq, tmp, buf_link); 157 spdk_bdev_io_set_buf(tmp, buf); 158 } 159 } 160 161 static int spdk_initialize_buf_pool(void) 162 { 163 int cache_size; 164 165 /** 166 * Ensure no more than half of the total buffers end up local caches, by 167 * using spdk_event_get_active_core_count() to determine how many local caches we need 168 * to account for. 169 */ 170 cache_size = BUF_SMALL_POOL_SIZE / (2 * spdk_env_get_core_count()); 171 g_buf_small_pool = spdk_mempool_create("buf_small_pool", 172 BUF_SMALL_POOL_SIZE, 173 SPDK_BDEV_SMALL_BUF_MAX_SIZE + 512, 174 cache_size, 175 SPDK_ENV_SOCKET_ID_ANY); 176 if (!g_buf_small_pool) { 177 SPDK_ERRLOG("create rbuf small pool failed\n"); 178 return -1; 179 } 180 181 cache_size = BUF_LARGE_POOL_SIZE / (2 * spdk_env_get_core_count()); 182 g_buf_large_pool = spdk_mempool_create("buf_large_pool", 183 BUF_LARGE_POOL_SIZE, 184 SPDK_BDEV_LARGE_BUF_MAX_SIZE + 512, 185 cache_size, 186 SPDK_ENV_SOCKET_ID_ANY); 187 if (!g_buf_large_pool) { 188 SPDK_ERRLOG("create rbuf large pool failed\n"); 189 return -1; 190 } 191 192 return 0; 193 } 194 195 static int 196 spdk_bdev_module_get_max_ctx_size(void) 197 { 198 struct spdk_bdev_module_if *bdev_module; 199 int max_bdev_module_size = 0; 200 201 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 202 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 203 max_bdev_module_size = bdev_module->get_ctx_size(); 204 } 205 } 206 207 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 208 if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) { 209 max_bdev_module_size = bdev_module->get_ctx_size(); 210 } 211 } 212 213 return max_bdev_module_size; 214 } 215 216 static int 217 spdk_bdev_module_initialize(void) 218 { 219 struct spdk_bdev_module_if *bdev_module; 220 int rc = 0; 221 222 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 223 rc = bdev_module->module_init(); 224 if (rc) 225 return rc; 226 } 227 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 228 rc = bdev_module->module_init(); 229 if (rc) 230 return rc; 231 } 232 return rc; 233 } 234 235 static void 236 spdk_bdev_module_finish(void) 237 { 238 struct spdk_bdev_module_if *bdev_module; 239 240 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 241 if (bdev_module->module_fini) { 242 bdev_module->module_fini(); 243 } 244 } 245 246 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 247 if (bdev_module->module_fini) { 248 bdev_module->module_fini(); 249 } 250 } 251 } 252 253 static void 254 spdk_bdev_config_text(FILE *fp) 255 { 256 struct spdk_bdev_module_if *bdev_module; 257 258 TAILQ_FOREACH(bdev_module, &spdk_bdev_module_list, tailq) { 259 if (bdev_module->config_text) { 260 bdev_module->config_text(fp); 261 } 262 } 263 TAILQ_FOREACH(bdev_module, &spdk_vbdev_module_list, tailq) { 264 if (bdev_module->config_text) { 265 bdev_module->config_text(fp); 266 } 267 } 268 } 269 270 static int 271 spdk_bdev_initialize(void) 272 { 273 int i; 274 275 if (spdk_bdev_module_initialize()) { 276 SPDK_ERRLOG("bdev module initialize failed"); 277 return -1; 278 } 279 280 spdk_bdev_g_io_pool = spdk_mempool_create("blockdev_io", 281 SPDK_BDEV_IO_POOL_SIZE, 282 sizeof(struct spdk_bdev_io) + 283 spdk_bdev_module_get_max_ctx_size(), 284 64, 285 SPDK_ENV_SOCKET_ID_ANY); 286 287 if (spdk_bdev_g_io_pool == NULL) { 288 SPDK_ERRLOG("could not allocate spdk_bdev_io pool"); 289 return -1; 290 } 291 292 for (i = 0; i < RTE_MAX_LCORE; i++) { 293 TAILQ_INIT(&g_need_buf_small[i]); 294 TAILQ_INIT(&g_need_buf_large[i]); 295 } 296 297 return spdk_initialize_buf_pool(); 298 } 299 300 static int 301 spdk_bdev_check_pool(struct spdk_mempool *pool, uint32_t count) 302 { 303 if (spdk_mempool_count(pool) != count) { 304 SPDK_ERRLOG("spdk_mempool_count(%p) == %zu, should be %u\n", 305 pool, spdk_mempool_count(pool), count); 306 return -1; 307 } else { 308 return 0; 309 } 310 } 311 312 static int 313 spdk_bdev_finish(void) 314 { 315 int rc = 0; 316 317 spdk_bdev_module_finish(); 318 319 rc += spdk_bdev_check_pool(g_buf_small_pool, BUF_SMALL_POOL_SIZE); 320 rc += spdk_bdev_check_pool(g_buf_large_pool, BUF_LARGE_POOL_SIZE); 321 322 return (rc != 0); 323 } 324 325 struct spdk_bdev_io *spdk_bdev_get_io(void) 326 { 327 struct spdk_bdev_io *bdev_io; 328 329 bdev_io = spdk_mempool_get(spdk_bdev_g_io_pool); 330 if (!bdev_io) { 331 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 332 abort(); 333 } 334 335 memset(bdev_io, 0, sizeof(*bdev_io)); 336 337 return bdev_io; 338 } 339 340 static void 341 spdk_bdev_put_io(struct spdk_bdev_io *bdev_io) 342 { 343 if (!bdev_io) { 344 return; 345 } 346 347 if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ && bdev_io->u.read.put_buf) { 348 spdk_bdev_io_put_buf(bdev_io); 349 } 350 351 spdk_mempool_put(spdk_bdev_g_io_pool, (void *)bdev_io); 352 } 353 354 static void 355 _spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io) 356 { 357 uint64_t len = bdev_io->u.read.len; 358 struct spdk_mempool *pool; 359 need_buf_tailq_t *tailq; 360 void *buf = NULL; 361 362 if (len <= SPDK_BDEV_SMALL_BUF_MAX_SIZE) { 363 pool = g_buf_small_pool; 364 tailq = &g_need_buf_small[rte_lcore_id()]; 365 } else { 366 pool = g_buf_large_pool; 367 tailq = &g_need_buf_large[rte_lcore_id()]; 368 } 369 370 buf = spdk_mempool_get(pool); 371 372 if (!buf) { 373 TAILQ_INSERT_TAIL(tailq, bdev_io, buf_link); 374 } else { 375 spdk_bdev_io_set_buf(bdev_io, buf); 376 } 377 } 378 379 380 static void 381 spdk_bdev_cleanup_pending_buf_io(struct spdk_bdev *bdev) 382 { 383 struct spdk_bdev_io *bdev_io, *tmp; 384 385 TAILQ_FOREACH_SAFE(bdev_io, &g_need_buf_small[rte_lcore_id()], buf_link, tmp) { 386 if (bdev_io->bdev == bdev) { 387 TAILQ_REMOVE(&g_need_buf_small[rte_lcore_id()], bdev_io, buf_link); 388 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 389 } 390 } 391 392 TAILQ_FOREACH_SAFE(bdev_io, &g_need_buf_large[rte_lcore_id()], buf_link, tmp) { 393 if (bdev_io->bdev == bdev) { 394 TAILQ_REMOVE(&g_need_buf_large[rte_lcore_id()], bdev_io, buf_link); 395 bdev_io->status = SPDK_BDEV_IO_STATUS_FAILED; 396 } 397 } 398 } 399 400 static void 401 __submit_request(struct spdk_bdev *bdev, struct spdk_bdev_io *bdev_io) 402 { 403 struct spdk_io_channel *ch; 404 405 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 406 407 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 408 spdk_bdev_cleanup_pending_buf_io(bdev); 409 ch = NULL; 410 } else { 411 ch = bdev_io->ch->channel; 412 } 413 414 bdev_io->in_submit_request = true; 415 bdev->fn_table->submit_request(ch, bdev_io); 416 bdev_io->in_submit_request = false; 417 } 418 419 static int 420 spdk_bdev_io_submit(struct spdk_bdev_io *bdev_io) 421 { 422 struct spdk_bdev *bdev = bdev_io->bdev; 423 424 __submit_request(bdev, bdev_io); 425 return 0; 426 } 427 428 void 429 spdk_bdev_io_resubmit(struct spdk_bdev_io *bdev_io, struct spdk_bdev *new_bdev) 430 { 431 assert(bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING); 432 bdev_io->bdev = new_bdev; 433 434 /* 435 * These fields are normally set during spdk_bdev_io_init(), but since bdev is 436 * being switched, they need to be reinitialized. 437 */ 438 bdev_io->gencnt = new_bdev->gencnt; 439 440 __submit_request(new_bdev, bdev_io); 441 } 442 443 static void 444 spdk_bdev_io_init(struct spdk_bdev_io *bdev_io, 445 struct spdk_bdev *bdev, void *cb_arg, 446 spdk_bdev_io_completion_cb cb) 447 { 448 bdev_io->bdev = bdev; 449 bdev_io->caller_ctx = cb_arg; 450 bdev_io->cb = cb; 451 bdev_io->gencnt = bdev->gencnt; 452 bdev_io->status = SPDK_BDEV_IO_STATUS_PENDING; 453 bdev_io->in_submit_request = false; 454 TAILQ_INIT(&bdev_io->child_io); 455 } 456 457 struct spdk_bdev_io * 458 spdk_bdev_get_child_io(struct spdk_bdev_io *parent, 459 struct spdk_bdev *bdev, 460 spdk_bdev_io_completion_cb cb, 461 void *cb_arg) 462 { 463 struct spdk_bdev_io *child; 464 465 child = spdk_bdev_get_io(); 466 if (!child) { 467 SPDK_ERRLOG("Unable to get spdk_bdev_io\n"); 468 return NULL; 469 } 470 471 if (cb_arg == NULL) { 472 cb_arg = child; 473 } 474 475 spdk_bdev_io_init(child, bdev, cb_arg, cb); 476 477 child->type = parent->type; 478 memcpy(&child->u, &parent->u, sizeof(child->u)); 479 if (child->type == SPDK_BDEV_IO_TYPE_READ) { 480 child->u.read.put_buf = false; 481 } 482 child->get_buf_cb = NULL; 483 child->parent = parent; 484 485 TAILQ_INSERT_TAIL(&parent->child_io, child, link); 486 487 return child; 488 } 489 490 bool 491 spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) 492 { 493 return bdev->fn_table->io_type_supported(bdev->ctxt, io_type); 494 } 495 496 int 497 spdk_bdev_dump_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w) 498 { 499 if (bdev->fn_table->dump_config_json) { 500 return bdev->fn_table->dump_config_json(bdev->ctxt, w); 501 } 502 503 return 0; 504 } 505 506 static int 507 spdk_bdev_channel_create(void *io_device, uint32_t priority, void *ctx_buf, 508 void *unique_ctx) 509 { 510 struct spdk_bdev *bdev = io_device; 511 struct spdk_bdev_channel *ch = ctx_buf; 512 513 ch->bdev = io_device; 514 ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt, priority); 515 516 return 0; 517 } 518 519 static void 520 spdk_bdev_channel_destroy(void *io_device, void *ctx_buf) 521 { 522 struct spdk_bdev_channel *ch = ctx_buf; 523 524 spdk_put_io_channel(ch->channel); 525 } 526 527 struct spdk_io_channel * 528 spdk_bdev_get_io_channel(struct spdk_bdev *bdev, uint32_t priority) 529 { 530 return spdk_get_io_channel(bdev, priority, false, NULL); 531 } 532 533 static int 534 spdk_bdev_io_valid(struct spdk_bdev *bdev, uint64_t offset, uint64_t nbytes) 535 { 536 /* Return failure if nbytes is not a multiple of bdev->blocklen */ 537 if (nbytes % bdev->blocklen) { 538 return -1; 539 } 540 541 /* Return failure if offset + nbytes is less than offset; indicates there 542 * has been an overflow and hence the offset has been wrapped around */ 543 if (offset + nbytes < offset) { 544 return -1; 545 } 546 547 /* Return failure if offset + nbytes exceeds the size of the blockdev */ 548 if (offset + nbytes > bdev->blockcnt * bdev->blocklen) { 549 return -1; 550 } 551 552 return 0; 553 } 554 555 struct spdk_bdev_io * 556 spdk_bdev_read(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 557 void *buf, uint64_t offset, uint64_t nbytes, 558 spdk_bdev_io_completion_cb cb, void *cb_arg) 559 { 560 struct spdk_bdev_io *bdev_io; 561 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 562 int rc; 563 564 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 565 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 566 return NULL; 567 } 568 569 bdev_io = spdk_bdev_get_io(); 570 if (!bdev_io) { 571 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 572 return NULL; 573 } 574 575 bdev_io->ch = channel; 576 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 577 bdev_io->u.read.iov.iov_base = buf; 578 bdev_io->u.read.iov.iov_len = nbytes; 579 bdev_io->u.read.iovs = &bdev_io->u.read.iov; 580 bdev_io->u.read.iovcnt = 1; 581 bdev_io->u.read.len = nbytes; 582 bdev_io->u.read.offset = offset; 583 bdev_io->u.read.put_buf = false; 584 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 585 586 rc = spdk_bdev_io_submit(bdev_io); 587 if (rc < 0) { 588 spdk_bdev_put_io(bdev_io); 589 return NULL; 590 } 591 592 return bdev_io; 593 } 594 595 struct spdk_bdev_io * 596 spdk_bdev_readv(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 597 struct iovec *iov, int iovcnt, 598 uint64_t offset, uint64_t nbytes, 599 spdk_bdev_io_completion_cb cb, void *cb_arg) 600 { 601 struct spdk_bdev_io *bdev_io; 602 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 603 int rc; 604 605 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 606 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 607 return NULL; 608 } 609 610 bdev_io = spdk_bdev_get_io(); 611 if (!bdev_io) { 612 SPDK_ERRLOG("spdk_bdev_io memory allocation failed duing read\n"); 613 return NULL; 614 } 615 616 bdev_io->ch = channel; 617 bdev_io->type = SPDK_BDEV_IO_TYPE_READ; 618 bdev_io->u.read.iovs = iov; 619 bdev_io->u.read.iovcnt = iovcnt; 620 bdev_io->u.read.len = nbytes; 621 bdev_io->u.read.offset = offset; 622 bdev_io->u.read.put_buf = false; 623 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 624 625 rc = spdk_bdev_io_submit(bdev_io); 626 if (rc < 0) { 627 spdk_bdev_put_io(bdev_io); 628 return NULL; 629 } 630 631 return bdev_io; 632 } 633 634 struct spdk_bdev_io * 635 spdk_bdev_write(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 636 void *buf, uint64_t offset, uint64_t nbytes, 637 spdk_bdev_io_completion_cb cb, void *cb_arg) 638 { 639 struct spdk_bdev_io *bdev_io; 640 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 641 int rc; 642 643 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 644 if (spdk_bdev_io_valid(bdev, offset, nbytes) != 0) { 645 return NULL; 646 } 647 648 bdev_io = spdk_bdev_get_io(); 649 if (!bdev_io) { 650 SPDK_ERRLOG("blockdev_io memory allocation failed duing write\n"); 651 return NULL; 652 } 653 654 bdev_io->ch = channel; 655 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 656 bdev_io->u.write.iov.iov_base = buf; 657 bdev_io->u.write.iov.iov_len = nbytes; 658 bdev_io->u.write.iovs = &bdev_io->u.write.iov; 659 bdev_io->u.write.iovcnt = 1; 660 bdev_io->u.write.len = nbytes; 661 bdev_io->u.write.offset = offset; 662 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 663 664 rc = spdk_bdev_io_submit(bdev_io); 665 if (rc < 0) { 666 spdk_bdev_put_io(bdev_io); 667 return NULL; 668 } 669 670 return bdev_io; 671 } 672 673 struct spdk_bdev_io * 674 spdk_bdev_writev(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 675 struct iovec *iov, int iovcnt, 676 uint64_t offset, uint64_t len, 677 spdk_bdev_io_completion_cb cb, void *cb_arg) 678 { 679 struct spdk_bdev_io *bdev_io; 680 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 681 int rc; 682 683 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 684 if (spdk_bdev_io_valid(bdev, offset, len) != 0) { 685 return NULL; 686 } 687 688 bdev_io = spdk_bdev_get_io(); 689 if (!bdev_io) { 690 SPDK_ERRLOG("bdev_io memory allocation failed duing writev\n"); 691 return NULL; 692 } 693 694 bdev_io->ch = channel; 695 bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; 696 bdev_io->u.write.iovs = iov; 697 bdev_io->u.write.iovcnt = iovcnt; 698 bdev_io->u.write.len = len; 699 bdev_io->u.write.offset = offset; 700 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 701 702 rc = spdk_bdev_io_submit(bdev_io); 703 if (rc < 0) { 704 spdk_bdev_put_io(bdev_io); 705 return NULL; 706 } 707 708 return bdev_io; 709 } 710 711 struct spdk_bdev_io * 712 spdk_bdev_unmap(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 713 struct spdk_scsi_unmap_bdesc *unmap_d, 714 uint16_t bdesc_count, 715 spdk_bdev_io_completion_cb cb, void *cb_arg) 716 { 717 struct spdk_bdev_io *bdev_io; 718 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 719 int rc; 720 721 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 722 if (bdesc_count == 0) { 723 SPDK_ERRLOG("Invalid bdesc_count 0\n"); 724 return NULL; 725 } 726 727 if (bdesc_count > bdev->max_unmap_bdesc_count) { 728 SPDK_ERRLOG("Invalid bdesc_count %u > max_unmap_bdesc_count %u\n", 729 bdesc_count, bdev->max_unmap_bdesc_count); 730 return NULL; 731 } 732 733 bdev_io = spdk_bdev_get_io(); 734 if (!bdev_io) { 735 SPDK_ERRLOG("bdev_io memory allocation failed duing unmap\n"); 736 return NULL; 737 } 738 739 bdev_io->ch = channel; 740 bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP; 741 bdev_io->u.unmap.unmap_bdesc = unmap_d; 742 bdev_io->u.unmap.bdesc_count = bdesc_count; 743 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 744 745 rc = spdk_bdev_io_submit(bdev_io); 746 if (rc < 0) { 747 spdk_bdev_put_io(bdev_io); 748 return NULL; 749 } 750 751 return bdev_io; 752 } 753 754 struct spdk_bdev_io * 755 spdk_bdev_flush(struct spdk_bdev *bdev, struct spdk_io_channel *ch, 756 uint64_t offset, uint64_t length, 757 spdk_bdev_io_completion_cb cb, void *cb_arg) 758 { 759 struct spdk_bdev_io *bdev_io; 760 struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); 761 int rc; 762 763 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 764 bdev_io = spdk_bdev_get_io(); 765 if (!bdev_io) { 766 SPDK_ERRLOG("bdev_io memory allocation failed duing flush\n"); 767 return NULL; 768 } 769 770 bdev_io->ch = channel; 771 bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH; 772 bdev_io->u.flush.offset = offset; 773 bdev_io->u.flush.length = length; 774 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 775 776 rc = spdk_bdev_io_submit(bdev_io); 777 if (rc < 0) { 778 spdk_bdev_put_io(bdev_io); 779 return NULL; 780 } 781 782 return bdev_io; 783 } 784 785 int 786 spdk_bdev_reset(struct spdk_bdev *bdev, enum spdk_bdev_reset_type reset_type, 787 spdk_bdev_io_completion_cb cb, void *cb_arg) 788 { 789 struct spdk_bdev_io *bdev_io; 790 int rc; 791 792 assert(bdev->status != SPDK_BDEV_STATUS_UNCLAIMED); 793 bdev_io = spdk_bdev_get_io(); 794 if (!bdev_io) { 795 SPDK_ERRLOG("bdev_io memory allocation failed duing reset\n"); 796 return -1; 797 } 798 799 bdev_io->type = SPDK_BDEV_IO_TYPE_RESET; 800 bdev_io->u.reset.type = reset_type; 801 spdk_bdev_io_init(bdev_io, bdev, cb_arg, cb); 802 803 rc = spdk_bdev_io_submit(bdev_io); 804 if (rc < 0) { 805 spdk_bdev_put_io(bdev_io); 806 SPDK_ERRLOG("reset failed\n"); 807 } 808 809 return rc; 810 } 811 812 int 813 spdk_bdev_free_io(struct spdk_bdev_io *bdev_io) 814 { 815 struct spdk_bdev_io *child_io, *tmp; 816 817 if (!bdev_io) { 818 SPDK_ERRLOG("bdev_io is NULL\n"); 819 return -1; 820 } 821 822 if (bdev_io->status == SPDK_BDEV_IO_STATUS_PENDING) { 823 SPDK_ERRLOG("bdev_io is in pending state\n"); 824 assert(false); 825 return -1; 826 } 827 828 TAILQ_FOREACH_SAFE(child_io, &bdev_io->child_io, link, tmp) { 829 /* 830 * Make sure no references to the parent I/O remain, since it is being 831 * returned to the free pool. 832 */ 833 child_io->parent = NULL; 834 TAILQ_REMOVE(&bdev_io->child_io, child_io, link); 835 836 /* 837 * Child I/O may have a buf that needs to be returned to a pool 838 * on a different core, so free it through the request submission 839 * process rather than calling put_io directly here. 840 */ 841 spdk_bdev_free_io(child_io); 842 } 843 844 spdk_bdev_put_io(bdev_io); 845 846 return 0; 847 } 848 849 static void 850 bdev_io_deferred_completion(void *arg1, void *arg2) 851 { 852 struct spdk_bdev_io *bdev_io = arg1; 853 enum spdk_bdev_io_status status = (enum spdk_bdev_io_status)arg2; 854 855 assert(bdev_io->in_submit_request == false); 856 857 spdk_bdev_io_complete(bdev_io, status); 858 } 859 860 void 861 spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status) 862 { 863 if (bdev_io->in_submit_request) { 864 /* 865 * Defer completion via an event to avoid potential infinite recursion if the 866 * user's completion callback issues a new I/O. 867 */ 868 spdk_event_call(spdk_event_allocate(spdk_env_get_current_core(), 869 bdev_io_deferred_completion, 870 bdev_io, 871 (void *)status)); 872 return; 873 } 874 875 if (bdev_io->type == SPDK_BDEV_IO_TYPE_RESET) { 876 /* Successful reset */ 877 if (status == SPDK_BDEV_IO_STATUS_SUCCESS) { 878 /* Increase the blockdev generation if it is a hard reset */ 879 if (bdev_io->u.reset.type == SPDK_BDEV_RESET_HARD) { 880 bdev_io->bdev->gencnt++; 881 } 882 } 883 } else { 884 /* 885 * Check the gencnt, to see if this I/O was issued before the most 886 * recent reset. If the gencnt is not equal, then just free the I/O 887 * without calling the callback, since the caller will have already 888 * freed its context for this I/O. 889 */ 890 if (bdev_io->bdev->gencnt != bdev_io->gencnt) { 891 spdk_bdev_put_io(bdev_io); 892 return; 893 } 894 } 895 896 bdev_io->status = status; 897 898 assert(bdev_io->cb != NULL); 899 bdev_io->cb(bdev_io, status, bdev_io->caller_ctx); 900 } 901 902 void 903 spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc, 904 enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq) 905 { 906 if (sc == SPDK_SCSI_STATUS_GOOD) { 907 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 908 } else { 909 bdev_io->status = SPDK_BDEV_IO_STATUS_SCSI_ERROR; 910 bdev_io->error.scsi.sc = sc; 911 bdev_io->error.scsi.sk = sk; 912 bdev_io->error.scsi.asc = asc; 913 bdev_io->error.scsi.ascq = ascq; 914 } 915 916 spdk_bdev_io_complete(bdev_io, bdev_io->status); 917 } 918 919 void 920 spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io, 921 int *sc, int *sk, int *asc, int *ascq) 922 { 923 assert(sc != NULL); 924 assert(sk != NULL); 925 assert(asc != NULL); 926 assert(ascq != NULL); 927 928 switch (bdev_io->status) { 929 case SPDK_BDEV_IO_STATUS_SUCCESS: 930 *sc = SPDK_SCSI_STATUS_GOOD; 931 *sk = SPDK_SCSI_SENSE_NO_SENSE; 932 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 933 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 934 break; 935 case SPDK_BDEV_IO_STATUS_NVME_ERROR: 936 spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq); 937 break; 938 case SPDK_BDEV_IO_STATUS_SCSI_ERROR: 939 *sc = bdev_io->error.scsi.sc; 940 *sk = bdev_io->error.scsi.sk; 941 *asc = bdev_io->error.scsi.asc; 942 *ascq = bdev_io->error.scsi.ascq; 943 break; 944 default: 945 *sc = SPDK_SCSI_STATUS_CHECK_CONDITION; 946 *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND; 947 *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE; 948 *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE; 949 break; 950 } 951 } 952 953 void 954 spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, int sct, int sc) 955 { 956 if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS) { 957 bdev_io->status = SPDK_BDEV_IO_STATUS_SUCCESS; 958 } else { 959 bdev_io->error.nvme.sct = sct; 960 bdev_io->error.nvme.sc = sc; 961 bdev_io->status = SPDK_BDEV_IO_STATUS_NVME_ERROR; 962 } 963 964 spdk_bdev_io_complete(bdev_io, bdev_io->status); 965 } 966 967 void 968 spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, int *sct, int *sc) 969 { 970 assert(sct != NULL); 971 assert(sc != NULL); 972 973 if (bdev_io->status == SPDK_BDEV_IO_STATUS_NVME_ERROR) { 974 *sct = bdev_io->error.nvme.sct; 975 *sc = bdev_io->error.nvme.sc; 976 } else if (bdev_io->status == SPDK_BDEV_IO_STATUS_SUCCESS) { 977 *sct = SPDK_NVME_SCT_GENERIC; 978 *sc = SPDK_NVME_SC_SUCCESS; 979 } else { 980 *sct = SPDK_NVME_SCT_GENERIC; 981 *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR; 982 } 983 } 984 985 void 986 spdk_bdev_register(struct spdk_bdev *bdev) 987 { 988 /* initialize the reset generation value to zero */ 989 bdev->gencnt = 0; 990 991 spdk_io_device_register(bdev, spdk_bdev_channel_create, spdk_bdev_channel_destroy, 992 sizeof(struct spdk_bdev_channel)); 993 994 pthread_mutex_init(&bdev->mutex, NULL); 995 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 996 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Inserting bdev %s into list\n", bdev->name); 997 TAILQ_INSERT_TAIL(&spdk_bdev_list, bdev, link); 998 } 999 1000 void 1001 spdk_bdev_unregister(struct spdk_bdev *bdev) 1002 { 1003 int rc; 1004 1005 SPDK_TRACELOG(SPDK_TRACE_DEBUG, "Removing bdev %s from list\n", bdev->name); 1006 1007 pthread_mutex_lock(&bdev->mutex); 1008 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_UNCLAIMED); 1009 if (bdev->status == SPDK_BDEV_STATUS_CLAIMED) { 1010 if (bdev->remove_cb) { 1011 bdev->status = SPDK_BDEV_STATUS_REMOVING; 1012 pthread_mutex_unlock(&bdev->mutex); 1013 bdev->remove_cb(bdev->remove_ctx); 1014 return; 1015 } else { 1016 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1017 } 1018 } 1019 1020 TAILQ_REMOVE(&spdk_bdev_list, bdev, link); 1021 pthread_mutex_unlock(&bdev->mutex); 1022 1023 pthread_mutex_destroy(&bdev->mutex); 1024 1025 spdk_io_device_unregister(bdev); 1026 1027 rc = bdev->fn_table->destruct(bdev->ctxt); 1028 if (rc < 0) { 1029 SPDK_ERRLOG("destruct failed\n"); 1030 } 1031 } 1032 1033 bool 1034 spdk_bdev_claim(struct spdk_bdev *bdev, spdk_bdev_remove_cb_t remove_cb, 1035 void *remove_ctx) 1036 { 1037 bool success; 1038 1039 pthread_mutex_lock(&bdev->mutex); 1040 1041 if (bdev->status != SPDK_BDEV_STATUS_CLAIMED) { 1042 /* Take ownership of bdev. */ 1043 bdev->remove_cb = remove_cb; 1044 bdev->remove_ctx = remove_ctx; 1045 bdev->status = SPDK_BDEV_STATUS_CLAIMED; 1046 success = true; 1047 } else { 1048 /* bdev is already claimed. */ 1049 success = false; 1050 } 1051 1052 pthread_mutex_unlock(&bdev->mutex); 1053 1054 return success; 1055 } 1056 1057 void 1058 spdk_bdev_unclaim(struct spdk_bdev *bdev) 1059 { 1060 bool do_unregister = false; 1061 1062 pthread_mutex_lock(&bdev->mutex); 1063 assert(bdev->status == SPDK_BDEV_STATUS_CLAIMED || bdev->status == SPDK_BDEV_STATUS_REMOVING); 1064 if (bdev->status == SPDK_BDEV_STATUS_REMOVING) { 1065 do_unregister = true; 1066 } 1067 bdev->remove_cb = NULL; 1068 bdev->remove_ctx = NULL; 1069 bdev->status = SPDK_BDEV_STATUS_UNCLAIMED; 1070 pthread_mutex_unlock(&bdev->mutex); 1071 1072 if (do_unregister == true) { 1073 spdk_bdev_unregister(bdev); 1074 } 1075 } 1076 1077 void 1078 spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb) 1079 { 1080 assert(cb != NULL); 1081 assert(bdev_io->u.read.iovs != NULL); 1082 1083 if (bdev_io->u.read.iovs[0].iov_base == NULL) { 1084 bdev_io->get_buf_cb = cb; 1085 _spdk_bdev_io_get_buf(bdev_io); 1086 } else { 1087 cb(bdev_io->ch->channel, bdev_io); 1088 } 1089 } 1090 1091 void 1092 spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp) 1093 { 1094 struct iovec *iovs; 1095 int iovcnt; 1096 1097 if (bdev_io == NULL) { 1098 return; 1099 } 1100 1101 switch (bdev_io->type) { 1102 case SPDK_BDEV_IO_TYPE_READ: 1103 iovs = bdev_io->u.read.iovs; 1104 iovcnt = bdev_io->u.read.iovcnt; 1105 break; 1106 case SPDK_BDEV_IO_TYPE_WRITE: 1107 iovs = bdev_io->u.write.iovs; 1108 iovcnt = bdev_io->u.write.iovcnt; 1109 break; 1110 default: 1111 iovs = NULL; 1112 iovcnt = 0; 1113 break; 1114 } 1115 1116 if (iovp) { 1117 *iovp = iovs; 1118 } 1119 if (iovcntp) { 1120 *iovcntp = iovcnt; 1121 } 1122 } 1123 1124 void spdk_bdev_module_list_add(struct spdk_bdev_module_if *bdev_module) 1125 { 1126 TAILQ_INSERT_TAIL(&spdk_bdev_module_list, bdev_module, tailq); 1127 } 1128 1129 void spdk_vbdev_module_list_add(struct spdk_bdev_module_if *vbdev_module) 1130 { 1131 TAILQ_INSERT_TAIL(&spdk_vbdev_module_list, vbdev_module, tailq); 1132 } 1133 SPDK_SUBSYSTEM_REGISTER(bdev, spdk_bdev_initialize, spdk_bdev_finish, spdk_bdev_config_text) 1134 SPDK_SUBSYSTEM_DEPEND(bdev, copy) 1135