1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/likely.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/nvme.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/string.h" 40 #include "spdk_internal/log.h" 41 #include "spdk/ftl.h" 42 #include "spdk/crc32.h" 43 44 #include "ftl_core.h" 45 #include "ftl_band.h" 46 #include "ftl_io.h" 47 #include "ftl_rwb.h" 48 #include "ftl_debug.h" 49 #include "ftl_reloc.h" 50 51 struct ftl_band_flush { 52 struct spdk_ftl_dev *dev; 53 /* Number of bands left to be flushed */ 54 size_t num_bands; 55 /* User callback */ 56 spdk_ftl_fn cb_fn; 57 /* Callback's argument */ 58 void *cb_arg; 59 /* List link */ 60 LIST_ENTRY(ftl_band_flush) list_entry; 61 }; 62 63 struct ftl_wptr { 64 /* Owner device */ 65 struct spdk_ftl_dev *dev; 66 67 /* Current address */ 68 struct ftl_addr addr; 69 70 /* Band currently being written to */ 71 struct ftl_band *band; 72 73 /* Current logical block's offset */ 74 uint64_t offset; 75 76 /* Current zone */ 77 struct ftl_zone *zone; 78 79 /* Pending IO queue */ 80 TAILQ_HEAD(, ftl_io) pending_queue; 81 82 /* List link */ 83 LIST_ENTRY(ftl_wptr) list_entry; 84 85 /* 86 * If setup in direct mode, there will be no offset or band state update after IO. 87 * The zoned bdev address is not assigned by wptr, and is instead taken directly 88 * from the request. 89 */ 90 bool direct_mode; 91 92 /* Number of outstanding write requests */ 93 uint32_t num_outstanding; 94 95 /* Marks that the band related to this wptr needs to be closed as soon as possible */ 96 bool flush; 97 }; 98 99 struct ftl_flush { 100 /* Owner device */ 101 struct spdk_ftl_dev *dev; 102 103 /* Number of batches to wait for */ 104 size_t num_req; 105 106 /* Callback */ 107 struct { 108 spdk_ftl_fn fn; 109 void *ctx; 110 } cb; 111 112 /* Batch bitmap */ 113 struct spdk_bit_array *bmap; 114 115 /* List link */ 116 LIST_ENTRY(ftl_flush) list_entry; 117 }; 118 119 static int 120 ftl_rwb_flags_from_io(const struct ftl_io *io) 121 { 122 int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; 123 return io->flags & valid_flags; 124 } 125 126 static int 127 ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) 128 { 129 return entry->flags & FTL_IO_WEAK; 130 } 131 132 static void 133 ftl_wptr_free(struct ftl_wptr *wptr) 134 { 135 if (!wptr) { 136 return; 137 } 138 139 free(wptr); 140 } 141 142 static void 143 ftl_remove_wptr(struct ftl_wptr *wptr) 144 { 145 struct spdk_ftl_dev *dev = wptr->dev; 146 struct ftl_band_flush *flush, *tmp; 147 148 if (spdk_unlikely(wptr->flush)) { 149 LIST_FOREACH_SAFE(flush, &dev->band_flush_list, list_entry, tmp) { 150 assert(flush->num_bands > 0); 151 if (--flush->num_bands == 0) { 152 flush->cb_fn(flush->cb_arg, 0); 153 LIST_REMOVE(flush, list_entry); 154 free(flush); 155 } 156 } 157 } 158 159 LIST_REMOVE(wptr, list_entry); 160 ftl_wptr_free(wptr); 161 } 162 163 static void 164 ftl_io_cmpl_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 165 { 166 struct ftl_io *io = cb_arg; 167 struct spdk_ftl_dev *dev = io->dev; 168 169 if (spdk_unlikely(!success)) { 170 io->status = -EIO; 171 } 172 173 ftl_trace_completion(dev, io, FTL_TRACE_COMPLETION_DISK); 174 175 if (io->type == FTL_IO_WRITE && ftl_is_append_supported(dev)) { 176 assert(io->parent); 177 io->parent->addr.offset = spdk_bdev_io_get_append_location(bdev_io); 178 } 179 180 ftl_io_dec_req(io); 181 if (ftl_io_done(io)) { 182 ftl_io_complete(io); 183 } 184 185 spdk_bdev_free_io(bdev_io); 186 } 187 188 static void 189 ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) 190 { 191 struct ftl_wptr *wptr = NULL; 192 193 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 194 if (wptr->band == band) { 195 break; 196 } 197 } 198 199 /* If the band already has the high_prio flag set, other writes must */ 200 /* have failed earlier, so it's already taken care of. */ 201 if (band->high_prio) { 202 assert(wptr == NULL); 203 return; 204 } 205 206 ftl_band_write_failed(band); 207 ftl_remove_wptr(wptr); 208 } 209 210 static struct ftl_wptr * 211 ftl_wptr_from_band(struct ftl_band *band) 212 { 213 struct spdk_ftl_dev *dev = band->dev; 214 struct ftl_wptr *wptr = NULL; 215 216 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 217 if (wptr->band == band) { 218 return wptr; 219 } 220 } 221 222 return NULL; 223 } 224 225 static void 226 ftl_md_write_fail(struct ftl_io *io, int status) 227 { 228 struct ftl_band *band = io->band; 229 struct ftl_wptr *wptr; 230 char buf[128]; 231 232 wptr = ftl_wptr_from_band(band); 233 assert(wptr); 234 235 SPDK_ERRLOG("Metadata write failed @addr: %s, status: %d\n", 236 ftl_addr2str(wptr->addr, buf, sizeof(buf)), status); 237 238 ftl_halt_writes(io->dev, band); 239 } 240 241 static void 242 ftl_md_write_cb(struct ftl_io *io, void *arg, int status) 243 { 244 struct spdk_ftl_dev *dev = io->dev; 245 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 246 struct ftl_band *band = io->band; 247 struct ftl_wptr *wptr; 248 size_t id; 249 250 wptr = ftl_wptr_from_band(band); 251 assert(wptr); 252 253 if (status) { 254 ftl_md_write_fail(io, status); 255 return; 256 } 257 258 ftl_band_set_next_state(band); 259 if (band->state == FTL_BAND_STATE_CLOSED) { 260 if (ftl_dev_has_nv_cache(dev)) { 261 pthread_spin_lock(&nv_cache->lock); 262 nv_cache->num_available += ftl_band_user_blocks(band); 263 264 if (spdk_unlikely(nv_cache->num_available > nv_cache->num_data_blocks)) { 265 nv_cache->num_available = nv_cache->num_data_blocks; 266 } 267 pthread_spin_unlock(&nv_cache->lock); 268 } 269 270 /* 271 * Go through the reloc_bitmap, checking for all the bands that had its data moved 272 * onto current band and update their counters to allow them to be used for writing 273 * (once they're closed and empty). 274 */ 275 for (id = 0; id < ftl_get_num_bands(dev); ++id) { 276 if (spdk_bit_array_get(band->reloc_bitmap, id)) { 277 assert(dev->bands[id].num_reloc_bands > 0); 278 dev->bands[id].num_reloc_bands--; 279 280 spdk_bit_array_clear(band->reloc_bitmap, id); 281 } 282 } 283 284 ftl_remove_wptr(wptr); 285 } 286 } 287 288 static int 289 ftl_read_next_physical_addr(struct ftl_io *io, struct ftl_addr *addr) 290 { 291 struct spdk_ftl_dev *dev = io->dev; 292 size_t num_blocks, max_blocks; 293 294 assert(ftl_io_mode_physical(io)); 295 assert(io->iov_pos < io->iov_cnt); 296 297 if (io->pos == 0) { 298 *addr = io->addr; 299 } else { 300 *addr = ftl_band_next_xfer_addr(io->band, io->addr, io->pos); 301 } 302 303 assert(!ftl_addr_invalid(*addr)); 304 305 /* Metadata has to be read in the way it's written (jumping across */ 306 /* the zones in xfer_size increments) */ 307 if (io->flags & FTL_IO_MD) { 308 max_blocks = dev->xfer_size - (addr->offset % dev->xfer_size); 309 num_blocks = spdk_min(ftl_io_iovec_len_left(io), max_blocks); 310 assert(addr->offset / dev->xfer_size == 311 (addr->offset + num_blocks - 1) / dev->xfer_size); 312 } else { 313 num_blocks = ftl_io_iovec_len_left(io); 314 } 315 316 return num_blocks; 317 } 318 319 static int 320 ftl_wptr_close_band(struct ftl_wptr *wptr) 321 { 322 struct ftl_band *band = wptr->band; 323 324 ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); 325 326 return ftl_band_write_tail_md(band, ftl_md_write_cb); 327 } 328 329 static int 330 ftl_wptr_open_band(struct ftl_wptr *wptr) 331 { 332 struct ftl_band *band = wptr->band; 333 334 assert(ftl_band_zone_is_first(band, wptr->zone)); 335 assert(band->lba_map.num_vld == 0); 336 337 ftl_band_clear_lba_map(band); 338 339 assert(band->state == FTL_BAND_STATE_PREP); 340 ftl_band_set_state(band, FTL_BAND_STATE_OPENING); 341 342 return ftl_band_write_head_md(band, ftl_md_write_cb); 343 } 344 345 static int 346 ftl_submit_erase(struct ftl_io *io) 347 { 348 struct spdk_ftl_dev *dev = io->dev; 349 struct ftl_band *band = io->band; 350 struct ftl_addr addr = io->addr; 351 struct ftl_io_channel *ioch; 352 struct ftl_zone *zone; 353 int rc = 0; 354 size_t i; 355 356 ioch = spdk_io_channel_get_ctx(ftl_get_io_channel(dev)); 357 358 for (i = 0; i < io->num_blocks; ++i) { 359 if (i != 0) { 360 zone = ftl_band_next_zone(band, ftl_band_zone_from_addr(band, addr)); 361 assert(zone->info.state == SPDK_BDEV_ZONE_STATE_FULL); 362 addr.offset = zone->info.zone_id; 363 } 364 365 assert(ftl_addr_get_zone_offset(dev, addr) == 0); 366 367 ftl_trace_submission(dev, io, addr, 1); 368 rc = spdk_bdev_zone_management(dev->base_bdev_desc, ioch->base_ioch, addr.offset, 369 SPDK_BDEV_ZONE_RESET, ftl_io_cmpl_cb, io); 370 if (spdk_unlikely(rc)) { 371 ftl_io_fail(io, rc); 372 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); 373 break; 374 } 375 376 ftl_io_inc_req(io); 377 ftl_io_advance(io, 1); 378 } 379 380 if (ftl_io_done(io)) { 381 ftl_io_complete(io); 382 } 383 384 return rc; 385 } 386 387 static bool 388 ftl_check_core_thread(const struct spdk_ftl_dev *dev) 389 { 390 return dev->core_thread.thread == spdk_get_thread(); 391 } 392 393 struct spdk_io_channel * 394 ftl_get_io_channel(const struct spdk_ftl_dev *dev) 395 { 396 if (ftl_check_core_thread(dev)) { 397 return dev->core_thread.ioch; 398 } 399 400 return NULL; 401 } 402 403 static void 404 ftl_erase_fail(struct ftl_io *io, int status) 405 { 406 struct ftl_zone *zone; 407 struct ftl_band *band = io->band; 408 char buf[128]; 409 410 SPDK_ERRLOG("Erase failed at address: %s, status: %d\n", 411 ftl_addr2str(io->addr, buf, sizeof(buf)), status); 412 413 zone = ftl_band_zone_from_addr(band, io->addr); 414 zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE; 415 ftl_band_remove_zone(band, zone); 416 band->tail_md_addr = ftl_band_tail_md_addr(band); 417 } 418 419 static void 420 ftl_zone_erase_cb(struct ftl_io *io, void *ctx, int status) 421 { 422 struct ftl_zone *zone; 423 424 zone = ftl_band_zone_from_addr(io->band, io->addr); 425 zone->busy = false; 426 427 if (spdk_unlikely(status)) { 428 ftl_erase_fail(io, status); 429 return; 430 } 431 432 zone->info.state = SPDK_BDEV_ZONE_STATE_EMPTY; 433 zone->info.write_pointer = zone->info.zone_id; 434 } 435 436 static int 437 ftl_band_erase(struct ftl_band *band) 438 { 439 struct ftl_zone *zone; 440 struct ftl_io *io; 441 int rc = 0; 442 443 assert(band->state == FTL_BAND_STATE_CLOSED || 444 band->state == FTL_BAND_STATE_FREE); 445 446 ftl_band_set_state(band, FTL_BAND_STATE_PREP); 447 448 CIRCLEQ_FOREACH(zone, &band->zones, circleq) { 449 if (zone->info.state == SPDK_BDEV_ZONE_STATE_EMPTY) { 450 continue; 451 } 452 453 io = ftl_io_erase_init(band, 1, ftl_zone_erase_cb); 454 if (!io) { 455 rc = -ENOMEM; 456 break; 457 } 458 459 zone->busy = true; 460 io->addr.offset = zone->info.zone_id; 461 rc = ftl_submit_erase(io); 462 if (rc) { 463 zone->busy = false; 464 assert(0); 465 /* TODO: change band's state back to close? */ 466 break; 467 } 468 } 469 470 return rc; 471 } 472 473 static struct ftl_band * 474 ftl_next_write_band(struct spdk_ftl_dev *dev) 475 { 476 struct ftl_band *band; 477 478 /* Find a free band that has all of its data moved onto other closed bands */ 479 LIST_FOREACH(band, &dev->free_bands, list_entry) { 480 assert(band->state == FTL_BAND_STATE_FREE); 481 if (band->num_reloc_bands == 0 && band->num_reloc_blocks == 0) { 482 break; 483 } 484 } 485 486 if (spdk_unlikely(!band)) { 487 return NULL; 488 } 489 490 if (ftl_band_erase(band)) { 491 /* TODO: handle erase failure */ 492 return NULL; 493 } 494 495 return band; 496 } 497 498 static struct ftl_band * 499 ftl_next_wptr_band(struct spdk_ftl_dev *dev) 500 { 501 struct ftl_band *band; 502 503 if (!dev->next_band) { 504 band = ftl_next_write_band(dev); 505 } else { 506 assert(dev->next_band->state == FTL_BAND_STATE_PREP); 507 band = dev->next_band; 508 dev->next_band = NULL; 509 } 510 511 return band; 512 } 513 514 static struct ftl_wptr * 515 ftl_wptr_init(struct ftl_band *band) 516 { 517 struct spdk_ftl_dev *dev = band->dev; 518 struct ftl_wptr *wptr; 519 520 wptr = calloc(1, sizeof(*wptr)); 521 if (!wptr) { 522 return NULL; 523 } 524 525 wptr->dev = dev; 526 wptr->band = band; 527 wptr->zone = CIRCLEQ_FIRST(&band->zones); 528 wptr->addr.offset = wptr->zone->info.zone_id; 529 TAILQ_INIT(&wptr->pending_queue); 530 531 return wptr; 532 } 533 534 static int 535 ftl_add_direct_wptr(struct ftl_band *band) 536 { 537 struct spdk_ftl_dev *dev = band->dev; 538 struct ftl_wptr *wptr; 539 540 assert(band->state == FTL_BAND_STATE_OPEN); 541 542 wptr = ftl_wptr_init(band); 543 if (!wptr) { 544 return -1; 545 } 546 547 wptr->direct_mode = true; 548 549 if (ftl_band_alloc_lba_map(band)) { 550 ftl_wptr_free(wptr); 551 return -1; 552 } 553 554 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 555 556 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: direct band %u\n", band->id); 557 ftl_trace_write_band(dev, band); 558 return 0; 559 } 560 561 static void 562 ftl_close_direct_wptr(struct ftl_band *band) 563 { 564 struct ftl_wptr *wptr = ftl_wptr_from_band(band); 565 566 assert(wptr); 567 assert(wptr->direct_mode); 568 assert(band->state == FTL_BAND_STATE_CLOSED); 569 570 ftl_band_release_lba_map(band); 571 572 ftl_remove_wptr(wptr); 573 } 574 575 int 576 ftl_band_set_direct_access(struct ftl_band *band, bool access) 577 { 578 if (access) { 579 return ftl_add_direct_wptr(band); 580 } else { 581 ftl_close_direct_wptr(band); 582 return 0; 583 } 584 } 585 586 static int 587 ftl_add_wptr(struct spdk_ftl_dev *dev) 588 { 589 struct ftl_band *band; 590 struct ftl_wptr *wptr; 591 592 band = ftl_next_wptr_band(dev); 593 if (!band) { 594 return -1; 595 } 596 597 wptr = ftl_wptr_init(band); 598 if (!wptr) { 599 return -1; 600 } 601 602 if (ftl_band_write_prep(band)) { 603 ftl_wptr_free(wptr); 604 return -1; 605 } 606 607 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 608 609 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); 610 ftl_trace_write_band(dev, band); 611 return 0; 612 } 613 614 static void 615 ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) 616 { 617 struct ftl_band *band = wptr->band; 618 struct spdk_ftl_dev *dev = wptr->dev; 619 struct spdk_ftl_conf *conf = &dev->conf; 620 size_t next_thld; 621 622 if (spdk_unlikely(wptr->direct_mode)) { 623 return; 624 } 625 626 wptr->offset += xfer_size; 627 next_thld = (ftl_band_num_usable_blocks(band) * conf->band_thld) / 100; 628 629 if (ftl_band_full(band, wptr->offset)) { 630 ftl_band_set_state(band, FTL_BAND_STATE_FULL); 631 } 632 633 wptr->zone->busy = true; 634 wptr->addr = ftl_band_next_xfer_addr(band, wptr->addr, xfer_size); 635 wptr->zone = ftl_band_next_operational_zone(band, wptr->zone); 636 637 assert(!ftl_addr_invalid(wptr->addr)); 638 639 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: pu:%lu band:%lu, offset:%lu\n", 640 ftl_addr_get_punit(dev, wptr->addr), 641 ftl_addr_get_band(dev, wptr->addr), 642 wptr->addr.offset); 643 644 if (wptr->offset >= next_thld && !dev->next_band) { 645 dev->next_band = ftl_next_write_band(dev); 646 } 647 } 648 649 static size_t 650 ftl_wptr_user_blocks_left(const struct ftl_wptr *wptr) 651 { 652 return ftl_band_user_blocks_left(wptr->band, wptr->offset); 653 } 654 655 static bool 656 ftl_wptr_ready(struct ftl_wptr *wptr) 657 { 658 struct ftl_band *band = wptr->band; 659 660 /* TODO: add handling of empty bands */ 661 662 if (spdk_unlikely(!ftl_zone_is_writable(wptr->zone))) { 663 /* Erasing band may fail after it was assigned to wptr. */ 664 if (spdk_unlikely(wptr->zone->info.state == SPDK_BDEV_ZONE_STATE_OFFLINE)) { 665 ftl_wptr_advance(wptr, wptr->dev->xfer_size); 666 } 667 return false; 668 } 669 670 /* If we're in the process of writing metadata, wait till it is */ 671 /* completed. */ 672 /* TODO: we should probably change bands once we're writing tail md */ 673 if (ftl_band_state_changing(band)) { 674 return false; 675 } 676 677 if (band->state == FTL_BAND_STATE_FULL) { 678 if (wptr->num_outstanding == 0) { 679 if (ftl_wptr_close_band(wptr)) { 680 /* TODO: need recovery here */ 681 assert(false); 682 } 683 } 684 685 return false; 686 } 687 688 if (band->state != FTL_BAND_STATE_OPEN) { 689 if (ftl_wptr_open_band(wptr)) { 690 /* TODO: need recovery here */ 691 assert(false); 692 } 693 694 return false; 695 } 696 697 return true; 698 } 699 700 int 701 ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 702 { 703 struct ftl_wptr *wptr; 704 struct ftl_band_flush *flush; 705 706 assert(ftl_get_core_thread(dev) == spdk_get_thread()); 707 708 flush = calloc(1, sizeof(*flush)); 709 if (spdk_unlikely(!flush)) { 710 return -ENOMEM; 711 } 712 713 LIST_INSERT_HEAD(&dev->band_flush_list, flush, list_entry); 714 715 flush->cb_fn = cb_fn; 716 flush->cb_arg = cb_arg; 717 flush->dev = dev; 718 719 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 720 wptr->flush = true; 721 flush->num_bands++; 722 } 723 724 return 0; 725 } 726 727 static const struct spdk_ftl_limit * 728 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 729 { 730 assert(type < SPDK_FTL_LIMIT_MAX); 731 return &dev->conf.limits[type]; 732 } 733 734 static bool 735 ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 736 { 737 struct ftl_addr addr; 738 739 /* If the LBA is invalid don't bother checking the md and l2p */ 740 if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { 741 return false; 742 } 743 744 addr = ftl_l2p_get(dev, entry->lba); 745 if (!(ftl_addr_cached(addr) && addr.cache_offset == entry->pos)) { 746 return false; 747 } 748 749 return true; 750 } 751 752 static void 753 ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 754 { 755 pthread_spin_lock(&entry->lock); 756 757 if (!ftl_rwb_entry_valid(entry)) { 758 goto unlock; 759 } 760 761 /* If the l2p wasn't updated and still points at the entry, fill it with the */ 762 /* on-disk address and clear the cache status bit. Otherwise, skip the l2p update */ 763 /* and just clear the cache status. */ 764 if (!ftl_cache_lba_valid(dev, entry)) { 765 goto clear; 766 } 767 768 ftl_l2p_set(dev, entry->lba, entry->addr); 769 clear: 770 ftl_rwb_entry_invalidate(entry); 771 unlock: 772 pthread_spin_unlock(&entry->lock); 773 } 774 775 static struct ftl_rwb_entry * 776 ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) 777 { 778 struct ftl_rwb_entry *entry; 779 780 entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); 781 if (!entry) { 782 return NULL; 783 } 784 785 ftl_evict_cache_entry(dev, entry); 786 787 entry->flags = flags; 788 return entry; 789 } 790 791 static void 792 ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) 793 { 794 struct ftl_rwb_entry *entry; 795 int flags = FTL_IO_PAD | FTL_IO_INTERNAL; 796 797 for (size_t i = 0; i < size; ++i) { 798 entry = ftl_acquire_entry(dev, flags); 799 if (!entry) { 800 break; 801 } 802 803 entry->lba = FTL_LBA_INVALID; 804 entry->addr = ftl_to_addr(FTL_ADDR_INVALID); 805 memset(entry->data, 0, FTL_BLOCK_SIZE); 806 ftl_rwb_push(entry); 807 } 808 } 809 810 static void 811 ftl_remove_free_bands(struct spdk_ftl_dev *dev) 812 { 813 while (!LIST_EMPTY(&dev->free_bands)) { 814 LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); 815 } 816 817 dev->next_band = NULL; 818 } 819 820 static void 821 ftl_wptr_pad_band(struct ftl_wptr *wptr) 822 { 823 struct spdk_ftl_dev *dev = wptr->dev; 824 size_t size = ftl_rwb_num_pending(dev->rwb); 825 size_t blocks_left, rwb_size, pad_size; 826 827 blocks_left = ftl_wptr_user_blocks_left(wptr); 828 assert(size <= blocks_left); 829 assert(blocks_left % dev->xfer_size == 0); 830 rwb_size = ftl_rwb_size(dev->rwb) - size; 831 pad_size = spdk_min(blocks_left - size, rwb_size); 832 833 /* Pad write buffer until band is full */ 834 ftl_rwb_pad(dev, pad_size); 835 } 836 837 static void 838 ftl_wptr_process_shutdown(struct ftl_wptr *wptr) 839 { 840 struct spdk_ftl_dev *dev = wptr->dev; 841 size_t size = ftl_rwb_num_pending(dev->rwb); 842 size_t num_active = dev->xfer_size * ftl_rwb_get_active_batches(dev->rwb); 843 844 num_active = num_active ? num_active : dev->xfer_size; 845 if (size >= num_active) { 846 return; 847 } 848 849 /* If we reach this point we need to remove free bands */ 850 /* and pad current wptr band to the end */ 851 if (ftl_rwb_get_active_batches(dev->rwb) <= 1) { 852 ftl_remove_free_bands(dev); 853 } 854 855 ftl_wptr_pad_band(wptr); 856 } 857 858 static int 859 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 860 { 861 struct ftl_io_channel *ioch = spdk_io_channel_get_ctx(dev->core_thread.ioch); 862 863 return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && 864 LIST_EMPTY(&dev->wptr_list) && TAILQ_EMPTY(&ioch->retry_queue); 865 } 866 867 void 868 ftl_apply_limits(struct spdk_ftl_dev *dev) 869 { 870 const struct spdk_ftl_limit *limit; 871 struct ftl_stats *stats = &dev->stats; 872 size_t rwb_limit[FTL_RWB_TYPE_MAX]; 873 int i; 874 875 ftl_rwb_get_limits(dev->rwb, rwb_limit); 876 877 /* Clear existing limit */ 878 dev->limit = SPDK_FTL_LIMIT_MAX; 879 880 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 881 limit = ftl_get_limit(dev, i); 882 883 if (dev->num_free <= limit->thld) { 884 rwb_limit[FTL_RWB_TYPE_USER] = 885 (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; 886 stats->limits[i]++; 887 dev->limit = i; 888 goto apply; 889 } 890 } 891 892 /* Clear the limits, since we don't need to apply them anymore */ 893 rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); 894 apply: 895 ftl_trace_limits(dev, rwb_limit, dev->num_free); 896 ftl_rwb_set_limits(dev->rwb, rwb_limit); 897 } 898 899 static int 900 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_addr addr) 901 { 902 struct ftl_band *band = ftl_band_from_addr(dev, addr); 903 struct ftl_lba_map *lba_map = &band->lba_map; 904 uint64_t offset; 905 906 offset = ftl_band_block_offset_from_addr(band, addr); 907 908 /* The bit might be already cleared if two writes are scheduled to the */ 909 /* same LBA at the same time */ 910 if (spdk_bit_array_get(lba_map->vld, offset)) { 911 assert(lba_map->num_vld > 0); 912 spdk_bit_array_clear(lba_map->vld, offset); 913 lba_map->num_vld--; 914 return 1; 915 } 916 917 return 0; 918 } 919 920 int 921 ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr) 922 { 923 struct ftl_band *band; 924 int rc; 925 926 assert(!ftl_addr_cached(addr)); 927 band = ftl_band_from_addr(dev, addr); 928 929 pthread_spin_lock(&band->lba_map.lock); 930 rc = ftl_invalidate_addr_unlocked(dev, addr); 931 pthread_spin_unlock(&band->lba_map.lock); 932 933 return rc; 934 } 935 936 static int 937 ftl_read_retry(int rc) 938 { 939 return rc == -EAGAIN; 940 } 941 942 static int 943 ftl_read_canceled(int rc) 944 { 945 return rc == -EFAULT || rc == 0; 946 } 947 948 static int 949 ftl_cache_read(struct ftl_io *io, uint64_t lba, 950 struct ftl_addr addr, void *buf) 951 { 952 struct ftl_rwb *rwb = io->dev->rwb; 953 struct ftl_rwb_entry *entry; 954 struct ftl_addr naddr; 955 int rc = 0; 956 957 entry = ftl_rwb_entry_from_offset(rwb, addr.cache_offset); 958 pthread_spin_lock(&entry->lock); 959 960 naddr = ftl_l2p_get(io->dev, lba); 961 if (addr.offset != naddr.offset) { 962 rc = -1; 963 goto out; 964 } 965 966 memcpy(buf, entry->data, FTL_BLOCK_SIZE); 967 out: 968 pthread_spin_unlock(&entry->lock); 969 return rc; 970 } 971 972 static int 973 ftl_read_next_logical_addr(struct ftl_io *io, struct ftl_addr *addr) 974 { 975 struct spdk_ftl_dev *dev = io->dev; 976 struct ftl_addr next_addr; 977 size_t i; 978 979 *addr = ftl_l2p_get(dev, ftl_io_current_lba(io)); 980 981 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read addr:%lx, lba:%lu\n", 982 addr->offset, ftl_io_current_lba(io)); 983 984 /* If the address is invalid, skip it (the buffer should already be zero'ed) */ 985 if (ftl_addr_invalid(*addr)) { 986 return -EFAULT; 987 } 988 989 if (ftl_addr_cached(*addr)) { 990 if (!ftl_cache_read(io, ftl_io_current_lba(io), *addr, ftl_io_iovec_addr(io))) { 991 return 0; 992 } 993 994 /* If the state changed, we have to re-read the l2p */ 995 return -EAGAIN; 996 } 997 998 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 999 next_addr = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i)); 1000 1001 if (ftl_addr_invalid(next_addr) || ftl_addr_cached(next_addr)) { 1002 break; 1003 } 1004 1005 if (addr->offset + i != next_addr.offset) { 1006 break; 1007 } 1008 } 1009 1010 return i; 1011 } 1012 1013 static int 1014 ftl_submit_read(struct ftl_io *io) 1015 { 1016 struct spdk_ftl_dev *dev = io->dev; 1017 struct ftl_io_channel *ioch; 1018 struct ftl_addr addr; 1019 int rc = 0, num_blocks; 1020 1021 ioch = spdk_io_channel_get_ctx(io->ioch); 1022 1023 assert(LIST_EMPTY(&io->children)); 1024 1025 while (io->pos < io->num_blocks) { 1026 if (ftl_io_mode_physical(io)) { 1027 num_blocks = rc = ftl_read_next_physical_addr(io, &addr); 1028 } else { 1029 num_blocks = rc = ftl_read_next_logical_addr(io, &addr); 1030 } 1031 1032 /* We might need to retry the read from scratch (e.g. */ 1033 /* because write was under way and completed before */ 1034 /* we could read it from rwb */ 1035 if (ftl_read_retry(rc)) { 1036 continue; 1037 } 1038 1039 /* We don't have to schedule the read, as it was read from cache */ 1040 if (ftl_read_canceled(rc)) { 1041 ftl_io_advance(io, 1); 1042 ftl_trace_completion(io->dev, io, rc ? FTL_TRACE_COMPLETION_INVALID : 1043 FTL_TRACE_COMPLETION_CACHE); 1044 rc = 0; 1045 continue; 1046 } 1047 1048 assert(num_blocks > 0); 1049 1050 ftl_trace_submission(dev, io, addr, num_blocks); 1051 rc = spdk_bdev_read_blocks(dev->base_bdev_desc, ioch->base_ioch, 1052 ftl_io_iovec_addr(io), 1053 addr.offset, 1054 num_blocks, ftl_io_cmpl_cb, io); 1055 if (spdk_unlikely(rc)) { 1056 if (rc == -ENOMEM) { 1057 TAILQ_INSERT_TAIL(&ioch->retry_queue, io, ioch_entry); 1058 rc = 0; 1059 } else { 1060 ftl_io_fail(io, rc); 1061 } 1062 break; 1063 } 1064 1065 ftl_io_inc_req(io); 1066 ftl_io_advance(io, num_blocks); 1067 } 1068 1069 /* If we didn't have to read anything from the device, */ 1070 /* complete the request right away */ 1071 if (ftl_io_done(io)) { 1072 ftl_io_complete(io); 1073 } 1074 1075 return rc; 1076 } 1077 1078 static void 1079 ftl_complete_flush(struct ftl_flush *flush) 1080 { 1081 assert(flush->num_req == 0); 1082 LIST_REMOVE(flush, list_entry); 1083 1084 flush->cb.fn(flush->cb.ctx, 0); 1085 1086 spdk_bit_array_free(&flush->bmap); 1087 free(flush); 1088 } 1089 1090 static void 1091 ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) 1092 { 1093 struct ftl_flush *flush, *tflush; 1094 size_t offset; 1095 1096 LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { 1097 offset = ftl_rwb_batch_get_offset(batch); 1098 1099 if (spdk_bit_array_get(flush->bmap, offset)) { 1100 spdk_bit_array_clear(flush->bmap, offset); 1101 if (!(--flush->num_req)) { 1102 ftl_complete_flush(flush); 1103 } 1104 } 1105 } 1106 } 1107 1108 static void 1109 ftl_nv_cache_wrap_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1110 { 1111 struct ftl_nv_cache *nv_cache = cb_arg; 1112 1113 if (!success) { 1114 SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n"); 1115 /* TODO: go into read-only mode */ 1116 assert(0); 1117 } 1118 1119 pthread_spin_lock(&nv_cache->lock); 1120 nv_cache->ready = true; 1121 pthread_spin_unlock(&nv_cache->lock); 1122 1123 spdk_bdev_free_io(bdev_io); 1124 } 1125 1126 static void 1127 ftl_nv_cache_wrap(void *ctx) 1128 { 1129 struct ftl_nv_cache *nv_cache = ctx; 1130 int rc; 1131 1132 rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_wrap_cb, nv_cache); 1133 if (spdk_unlikely(rc != 0)) { 1134 SPDK_ERRLOG("Unable to write non-volatile cache metadata header: %s\n", 1135 spdk_strerror(-rc)); 1136 /* TODO: go into read-only mode */ 1137 assert(0); 1138 } 1139 } 1140 1141 static uint64_t 1142 ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_blocks, unsigned int *phase) 1143 { 1144 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1145 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1146 uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID; 1147 1148 cache_size = spdk_bdev_get_num_blocks(bdev); 1149 1150 pthread_spin_lock(&nv_cache->lock); 1151 if (spdk_unlikely(nv_cache->num_available == 0 || !nv_cache->ready)) { 1152 goto out; 1153 } 1154 1155 num_available = spdk_min(nv_cache->num_available, *num_blocks); 1156 num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt); 1157 1158 if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) { 1159 *num_blocks = cache_size - nv_cache->current_addr; 1160 } else { 1161 *num_blocks = num_available; 1162 } 1163 1164 cache_addr = nv_cache->current_addr; 1165 nv_cache->current_addr += *num_blocks; 1166 nv_cache->num_available -= *num_blocks; 1167 *phase = nv_cache->phase; 1168 1169 if (nv_cache->current_addr == spdk_bdev_get_num_blocks(bdev)) { 1170 nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET; 1171 nv_cache->phase = ftl_nv_cache_next_phase(nv_cache->phase); 1172 nv_cache->ready = false; 1173 spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_nv_cache_wrap, nv_cache); 1174 } 1175 out: 1176 pthread_spin_unlock(&nv_cache->lock); 1177 return cache_addr; 1178 } 1179 1180 static struct ftl_io * 1181 ftl_alloc_io_nv_cache(struct ftl_io *parent, size_t num_blocks) 1182 { 1183 struct ftl_io_init_opts opts = { 1184 .dev = parent->dev, 1185 .parent = parent, 1186 .iovcnt = 0, 1187 .num_blocks = num_blocks, 1188 .flags = parent->flags | FTL_IO_CACHE, 1189 }; 1190 1191 return ftl_io_init_internal(&opts); 1192 } 1193 1194 static void 1195 ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1196 { 1197 struct ftl_io *io = cb_arg; 1198 struct ftl_nv_cache *nv_cache = &io->dev->nv_cache; 1199 1200 if (spdk_unlikely(!success)) { 1201 SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->addr.offset); 1202 io->status = -EIO; 1203 } 1204 1205 ftl_io_dec_req(io); 1206 if (ftl_io_done(io)) { 1207 spdk_mempool_put(nv_cache->md_pool, io->md); 1208 ftl_io_complete(io); 1209 } 1210 1211 spdk_bdev_free_io(bdev_io); 1212 } 1213 1214 static void 1215 ftl_submit_nv_cache(void *ctx) 1216 { 1217 struct ftl_io *io = ctx; 1218 struct spdk_ftl_dev *dev = io->dev; 1219 struct spdk_thread *thread; 1220 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 1221 struct ftl_io_channel *ioch; 1222 int rc; 1223 1224 ioch = spdk_io_channel_get_ctx(io->ioch); 1225 thread = spdk_io_channel_get_thread(io->ioch); 1226 1227 rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch, 1228 ftl_io_iovec_addr(io), io->md, io->addr.offset, 1229 io->num_blocks, ftl_nv_cache_submit_cb, io); 1230 if (rc == -ENOMEM) { 1231 spdk_thread_send_msg(thread, ftl_submit_nv_cache, io); 1232 return; 1233 } else if (rc) { 1234 SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n", 1235 spdk_strerror(-rc), io->addr.offset, io->num_blocks); 1236 spdk_mempool_put(nv_cache->md_pool, io->md); 1237 io->status = -EIO; 1238 ftl_io_complete(io); 1239 return; 1240 } 1241 1242 ftl_io_advance(io, io->num_blocks); 1243 ftl_io_inc_req(io); 1244 } 1245 1246 static void 1247 ftl_nv_cache_fill_md(struct ftl_io *io, unsigned int phase) 1248 { 1249 struct spdk_bdev *bdev; 1250 struct ftl_nv_cache *nv_cache = &io->dev->nv_cache; 1251 uint64_t block_off, lba; 1252 void *md_buf = io->md; 1253 1254 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1255 1256 for (block_off = 0; block_off < io->num_blocks; ++block_off) { 1257 lba = ftl_nv_cache_pack_lba(ftl_io_get_lba(io, block_off), phase); 1258 memcpy(md_buf, &lba, sizeof(lba)); 1259 md_buf += spdk_bdev_get_md_size(bdev); 1260 } 1261 } 1262 1263 static void 1264 _ftl_write_nv_cache(void *ctx) 1265 { 1266 struct ftl_io *child, *io = ctx; 1267 struct spdk_ftl_dev *dev = io->dev; 1268 struct spdk_thread *thread; 1269 unsigned int phase; 1270 uint64_t num_blocks; 1271 1272 thread = spdk_io_channel_get_thread(io->ioch); 1273 1274 while (io->pos < io->num_blocks) { 1275 num_blocks = ftl_io_iovec_len_left(io); 1276 1277 child = ftl_alloc_io_nv_cache(io, num_blocks); 1278 if (spdk_unlikely(!child)) { 1279 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1280 return; 1281 } 1282 1283 child->md = spdk_mempool_get(dev->nv_cache.md_pool); 1284 if (spdk_unlikely(!child->md)) { 1285 ftl_io_free(child); 1286 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1287 break; 1288 } 1289 1290 /* Reserve area on the write buffer cache */ 1291 child->addr.offset = ftl_reserve_nv_cache(&dev->nv_cache, &num_blocks, &phase); 1292 if (child->addr.offset == FTL_LBA_INVALID) { 1293 spdk_mempool_put(dev->nv_cache.md_pool, child->md); 1294 ftl_io_free(child); 1295 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1296 break; 1297 } 1298 1299 /* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */ 1300 if (spdk_unlikely(num_blocks != ftl_io_iovec_len_left(io))) { 1301 ftl_io_shrink_iovec(child, num_blocks); 1302 } 1303 1304 ftl_nv_cache_fill_md(child, phase); 1305 ftl_submit_nv_cache(child); 1306 } 1307 1308 if (ftl_io_done(io)) { 1309 ftl_io_complete(io); 1310 } 1311 } 1312 1313 static void 1314 ftl_write_nv_cache(struct ftl_io *parent) 1315 { 1316 ftl_io_reset(parent); 1317 parent->flags |= FTL_IO_CACHE; 1318 _ftl_write_nv_cache(parent); 1319 } 1320 1321 int 1322 ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown, 1323 spdk_bdev_io_completion_cb cb_fn, void *cb_arg) 1324 { 1325 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1326 struct ftl_nv_cache_header *hdr = nv_cache->dma_buf; 1327 struct spdk_bdev *bdev; 1328 struct ftl_io_channel *ioch; 1329 1330 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1331 ioch = spdk_io_channel_get_ctx(ftl_get_io_channel(dev)); 1332 1333 memset(hdr, 0, spdk_bdev_get_block_size(bdev)); 1334 1335 hdr->phase = (uint8_t)nv_cache->phase; 1336 hdr->size = spdk_bdev_get_num_blocks(bdev); 1337 hdr->uuid = dev->uuid; 1338 hdr->version = FTL_NV_CACHE_HEADER_VERSION; 1339 hdr->current_addr = shutdown ? nv_cache->current_addr : FTL_LBA_INVALID; 1340 hdr->checksum = spdk_crc32c_update(hdr, offsetof(struct ftl_nv_cache_header, checksum), 0); 1341 1342 return spdk_bdev_write_blocks(nv_cache->bdev_desc, ioch->cache_ioch, hdr, 0, 1, 1343 cb_fn, cb_arg); 1344 } 1345 1346 int 1347 ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn, void *cb_arg) 1348 { 1349 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1350 struct ftl_io_channel *ioch; 1351 struct spdk_bdev *bdev; 1352 1353 ioch = spdk_io_channel_get_ctx(ftl_get_io_channel(dev)); 1354 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1355 1356 return spdk_bdev_write_zeroes_blocks(nv_cache->bdev_desc, ioch->cache_ioch, 1, 1357 spdk_bdev_get_num_blocks(bdev) - 1, 1358 cb_fn, cb_arg); 1359 } 1360 1361 static void 1362 ftl_write_fail(struct ftl_io *io, int status) 1363 { 1364 struct ftl_rwb_batch *batch = io->rwb_batch; 1365 struct spdk_ftl_dev *dev = io->dev; 1366 struct ftl_rwb_entry *entry; 1367 struct ftl_band *band; 1368 char buf[128]; 1369 1370 entry = ftl_rwb_batch_first_entry(batch); 1371 1372 band = ftl_band_from_addr(io->dev, entry->addr); 1373 SPDK_ERRLOG("Write failed @addr: %s, status: %d\n", 1374 ftl_addr2str(entry->addr, buf, sizeof(buf)), status); 1375 1376 /* Close the band and, halt wptr and defrag */ 1377 ftl_halt_writes(dev, band); 1378 1379 ftl_rwb_foreach(entry, batch) { 1380 /* Invalidate meta set by process_writes() */ 1381 ftl_invalidate_addr(dev, entry->addr); 1382 } 1383 1384 /* Reset the batch back to the the RWB to resend it later */ 1385 ftl_rwb_batch_revert(batch); 1386 } 1387 1388 static void 1389 ftl_write_cb(struct ftl_io *io, void *arg, int status) 1390 { 1391 struct spdk_ftl_dev *dev = io->dev; 1392 struct ftl_rwb_batch *batch = io->rwb_batch; 1393 struct ftl_rwb_entry *entry; 1394 struct ftl_band *band; 1395 struct ftl_addr prev_addr, addr = io->addr; 1396 1397 if (status) { 1398 ftl_write_fail(io, status); 1399 return; 1400 } 1401 1402 assert(io->num_blocks == dev->xfer_size); 1403 assert(!(io->flags & FTL_IO_MD)); 1404 1405 ftl_rwb_foreach(entry, batch) { 1406 band = entry->band; 1407 if (!(entry->flags & FTL_IO_PAD)) { 1408 /* Verify that the LBA is set for user blocks */ 1409 assert(entry->lba != FTL_LBA_INVALID); 1410 } 1411 1412 if (band != NULL) { 1413 assert(band->num_reloc_blocks > 0); 1414 band->num_reloc_blocks--; 1415 } 1416 1417 entry->addr = addr; 1418 if (entry->lba != FTL_LBA_INVALID) { 1419 pthread_spin_lock(&entry->lock); 1420 prev_addr = ftl_l2p_get(dev, entry->lba); 1421 1422 /* If the l2p was updated in the meantime, don't update band's metadata */ 1423 if (ftl_addr_cached(prev_addr) && prev_addr.cache_offset == entry->pos) { 1424 /* Setting entry's cache bit needs to be done after metadata */ 1425 /* within the band is updated to make sure that writes */ 1426 /* invalidating the entry clear the metadata as well */ 1427 ftl_band_set_addr(io->band, entry->lba, entry->addr); 1428 ftl_rwb_entry_set_valid(entry); 1429 } 1430 pthread_spin_unlock(&entry->lock); 1431 } 1432 1433 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write addr:%lu, lba:%lu\n", 1434 entry->addr.offset, entry->lba); 1435 1436 addr = ftl_band_next_addr(io->band, addr, 1); 1437 } 1438 1439 ftl_process_flush(dev, batch); 1440 ftl_rwb_batch_release(batch); 1441 } 1442 1443 static void 1444 ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) 1445 { 1446 if (!ftl_rwb_entry_internal(entry)) { 1447 dev->stats.write_user++; 1448 } 1449 dev->stats.write_total++; 1450 } 1451 1452 static void 1453 ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, 1454 struct ftl_addr addr) 1455 { 1456 struct ftl_addr prev_addr; 1457 struct ftl_rwb_entry *prev; 1458 struct ftl_band *band; 1459 int valid; 1460 1461 prev_addr = ftl_l2p_get(dev, entry->lba); 1462 if (ftl_addr_invalid(prev_addr)) { 1463 ftl_l2p_set(dev, entry->lba, addr); 1464 return; 1465 } 1466 1467 /* If the L2P's physical address is different than what we expected we don't need to */ 1468 /* do anything (someone's already overwritten our data). */ 1469 if (ftl_rwb_entry_weak(entry) && !ftl_addr_cmp(prev_addr, entry->addr)) { 1470 return; 1471 } 1472 1473 if (ftl_addr_cached(prev_addr)) { 1474 assert(!ftl_rwb_entry_weak(entry)); 1475 prev = ftl_rwb_entry_from_offset(dev->rwb, prev_addr.cache_offset); 1476 pthread_spin_lock(&prev->lock); 1477 1478 /* Re-read the L2P under the lock to protect against updates */ 1479 /* to this LBA from other threads */ 1480 prev_addr = ftl_l2p_get(dev, entry->lba); 1481 1482 /* If the entry is no longer in cache, another write has been */ 1483 /* scheduled in the meantime, so we have to invalidate its LBA */ 1484 if (!ftl_addr_cached(prev_addr)) { 1485 ftl_invalidate_addr(dev, prev_addr); 1486 } 1487 1488 /* If previous entry is part of cache, remove and invalidate it */ 1489 if (ftl_rwb_entry_valid(prev)) { 1490 ftl_invalidate_addr(dev, prev->addr); 1491 ftl_rwb_entry_invalidate(prev); 1492 } 1493 1494 ftl_l2p_set(dev, entry->lba, addr); 1495 pthread_spin_unlock(&prev->lock); 1496 return; 1497 } 1498 1499 /* Lock the band containing previous physical address. This assures atomic changes to */ 1500 /* the L2P as wall as metadata. The valid bits in metadata are used to */ 1501 /* check weak writes validity. */ 1502 band = ftl_band_from_addr(dev, prev_addr); 1503 pthread_spin_lock(&band->lba_map.lock); 1504 1505 valid = ftl_invalidate_addr_unlocked(dev, prev_addr); 1506 1507 /* If the address has been invalidated already, we don't want to update */ 1508 /* the L2P for weak writes, as it means the write is no longer valid. */ 1509 if (!ftl_rwb_entry_weak(entry) || valid) { 1510 ftl_l2p_set(dev, entry->lba, addr); 1511 } 1512 1513 pthread_spin_unlock(&band->lba_map.lock); 1514 } 1515 1516 static struct ftl_io * 1517 ftl_io_init_child_write(struct ftl_io *parent, struct ftl_addr addr, ftl_io_fn cb) 1518 { 1519 struct ftl_io *io; 1520 struct spdk_ftl_dev *dev = parent->dev; 1521 struct ftl_io_init_opts opts = { 1522 .dev = dev, 1523 .io = NULL, 1524 .parent = parent, 1525 .rwb_batch = NULL, 1526 .band = parent->band, 1527 .size = sizeof(struct ftl_io), 1528 .flags = 0, 1529 .type = parent->type, 1530 .num_blocks = dev->xfer_size, 1531 .cb_fn = cb, 1532 .iovcnt = 0, 1533 }; 1534 1535 io = ftl_io_init_internal(&opts); 1536 if (!io) { 1537 return NULL; 1538 } 1539 1540 io->addr = addr; 1541 1542 return io; 1543 } 1544 1545 static void 1546 ftl_io_child_write_cb(struct ftl_io *io, void *ctx, int status) 1547 { 1548 struct ftl_zone *zone; 1549 struct ftl_wptr *wptr; 1550 1551 zone = ftl_band_zone_from_addr(io->band, io->addr); 1552 wptr = ftl_wptr_from_band(io->band); 1553 1554 zone->busy = false; 1555 zone->info.write_pointer += io->num_blocks; 1556 1557 if (zone->info.write_pointer == zone->info.zone_id + zone->info.capacity) { 1558 zone->info.state = SPDK_BDEV_ZONE_STATE_FULL; 1559 } 1560 1561 /* If some other write on the same band failed the write pointer would already be freed */ 1562 if (spdk_likely(wptr)) { 1563 wptr->num_outstanding--; 1564 } 1565 } 1566 1567 static int 1568 ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io) 1569 { 1570 struct spdk_ftl_dev *dev = io->dev; 1571 struct ftl_io_channel *ioch; 1572 struct ftl_io *child; 1573 struct ftl_addr addr; 1574 int rc; 1575 1576 ioch = spdk_io_channel_get_ctx(io->ioch); 1577 1578 if (spdk_likely(!wptr->direct_mode)) { 1579 addr = wptr->addr; 1580 } else { 1581 assert(io->flags & FTL_IO_DIRECT_ACCESS); 1582 assert(ftl_addr_get_band(dev, io->addr) == wptr->band->id); 1583 addr = io->addr; 1584 } 1585 1586 /* Split IO to child requests and release zone immediately after child is completed */ 1587 child = ftl_io_init_child_write(io, addr, ftl_io_child_write_cb); 1588 if (!child) { 1589 return -EAGAIN; 1590 } 1591 1592 wptr->num_outstanding++; 1593 1594 if (ftl_is_append_supported(dev)) { 1595 rc = spdk_bdev_zone_appendv(dev->base_bdev_desc, ioch->base_ioch, 1596 child->iov, child->iov_cnt, 1597 ftl_addr_get_zone_slba(dev, addr), 1598 dev->xfer_size, ftl_io_cmpl_cb, child); 1599 } else { 1600 rc = spdk_bdev_writev_blocks(dev->base_bdev_desc, ioch->base_ioch, 1601 child->iov, child->iov_cnt, addr.offset, 1602 dev->xfer_size, ftl_io_cmpl_cb, child); 1603 } 1604 1605 if (rc) { 1606 wptr->num_outstanding--; 1607 ftl_io_fail(child, rc); 1608 ftl_io_complete(child); 1609 SPDK_ERRLOG("spdk_bdev_write_blocks_with_md failed with status:%d, addr:%lu\n", 1610 rc, addr.offset); 1611 return -EIO; 1612 } 1613 1614 ftl_io_inc_req(child); 1615 ftl_io_advance(child, dev->xfer_size); 1616 1617 return 0; 1618 } 1619 1620 static int 1621 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) 1622 { 1623 struct spdk_ftl_dev *dev = io->dev; 1624 int rc = 0; 1625 1626 assert(io->num_blocks % dev->xfer_size == 0); 1627 /* Only one child write make sense in case of user write */ 1628 assert((io->flags & FTL_IO_MD) || io->iov_cnt == 1); 1629 1630 while (io->iov_pos < io->iov_cnt) { 1631 /* There are no guarantees of the order of completion of NVMe IO submission queue */ 1632 /* so wait until zone is not busy before submitting another write */ 1633 if (!ftl_is_append_supported(dev) && wptr->zone->busy) { 1634 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, ioch_entry); 1635 rc = -EAGAIN; 1636 break; 1637 } 1638 1639 rc = ftl_submit_child_write(wptr, io); 1640 if (spdk_unlikely(rc)) { 1641 if (rc == -EAGAIN) { 1642 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, ioch_entry); 1643 } else { 1644 ftl_io_fail(io, rc); 1645 } 1646 break; 1647 } 1648 1649 ftl_trace_submission(dev, io, wptr->addr, dev->xfer_size); 1650 ftl_wptr_advance(wptr, dev->xfer_size); 1651 } 1652 1653 if (ftl_io_done(io)) { 1654 /* Parent IO will complete after all children are completed */ 1655 ftl_io_complete(io); 1656 } 1657 1658 return rc; 1659 } 1660 1661 static void 1662 ftl_flush_pad_batch(struct spdk_ftl_dev *dev) 1663 { 1664 struct ftl_rwb *rwb = dev->rwb; 1665 size_t size, num_entries; 1666 1667 size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + 1668 ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); 1669 1670 /* There must be something in the RWB, otherwise the flush */ 1671 /* wouldn't be waiting for anything */ 1672 assert(size > 0); 1673 1674 /* Only add padding when there's less than xfer size */ 1675 /* entries in the buffer. Otherwise we just have to wait */ 1676 /* for the entries to become ready. */ 1677 num_entries = ftl_rwb_get_active_batches(dev->rwb) * dev->xfer_size; 1678 if (size < num_entries) { 1679 ftl_rwb_pad(dev, num_entries - (size % num_entries)); 1680 } 1681 } 1682 1683 static int 1684 ftl_wptr_process_writes(struct ftl_wptr *wptr) 1685 { 1686 struct spdk_ftl_dev *dev = wptr->dev; 1687 struct ftl_rwb_batch *batch; 1688 struct ftl_rwb_entry *entry; 1689 struct ftl_io *io; 1690 1691 if (spdk_unlikely(!TAILQ_EMPTY(&wptr->pending_queue))) { 1692 io = TAILQ_FIRST(&wptr->pending_queue); 1693 TAILQ_REMOVE(&wptr->pending_queue, io, ioch_entry); 1694 1695 if (ftl_submit_write(wptr, io) == -EAGAIN) { 1696 return 0; 1697 } 1698 } 1699 1700 /* Make sure the band is prepared for writing */ 1701 if (!ftl_wptr_ready(wptr)) { 1702 return 0; 1703 } 1704 1705 if (dev->halt) { 1706 ftl_wptr_process_shutdown(wptr); 1707 } 1708 1709 if (spdk_unlikely(wptr->flush)) { 1710 ftl_wptr_pad_band(wptr); 1711 } 1712 1713 batch = ftl_rwb_pop(dev->rwb); 1714 if (!batch) { 1715 /* If there are queued flush requests we need to pad the RWB to */ 1716 /* force out remaining entries */ 1717 if (!LIST_EMPTY(&dev->flush_list)) { 1718 ftl_flush_pad_batch(dev); 1719 } 1720 1721 return 0; 1722 } 1723 1724 io = ftl_io_rwb_init(dev, wptr->addr, wptr->band, batch, ftl_write_cb); 1725 if (!io) { 1726 goto error; 1727 } 1728 1729 ftl_rwb_foreach(entry, batch) { 1730 /* Update band's relocation stats if the IO comes from reloc */ 1731 if (entry->flags & FTL_IO_WEAK) { 1732 if (!spdk_bit_array_get(wptr->band->reloc_bitmap, entry->band->id)) { 1733 spdk_bit_array_set(wptr->band->reloc_bitmap, entry->band->id); 1734 entry->band->num_reloc_bands++; 1735 } 1736 } 1737 1738 ftl_trace_rwb_pop(dev, entry); 1739 ftl_update_rwb_stats(dev, entry); 1740 } 1741 1742 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write addr:%lx\n", wptr->addr.offset); 1743 1744 if (ftl_submit_write(wptr, io)) { 1745 /* TODO: we need some recovery here */ 1746 assert(0 && "Write submit failed"); 1747 if (ftl_io_done(io)) { 1748 ftl_io_free(io); 1749 } 1750 } 1751 1752 return dev->xfer_size; 1753 error: 1754 ftl_rwb_batch_revert(batch); 1755 return 0; 1756 } 1757 1758 static int 1759 ftl_process_writes(struct spdk_ftl_dev *dev) 1760 { 1761 struct ftl_wptr *wptr, *twptr; 1762 size_t num_active = 0; 1763 enum ftl_band_state state; 1764 1765 LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { 1766 ftl_wptr_process_writes(wptr); 1767 state = wptr->band->state; 1768 1769 if (state != FTL_BAND_STATE_FULL && 1770 state != FTL_BAND_STATE_CLOSING && 1771 state != FTL_BAND_STATE_CLOSED) { 1772 num_active++; 1773 } 1774 } 1775 1776 if (num_active < 1) { 1777 ftl_add_wptr(dev); 1778 } 1779 1780 return 0; 1781 } 1782 1783 static void 1784 ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) 1785 { 1786 memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); 1787 1788 if (ftl_rwb_entry_weak(entry)) { 1789 entry->band = ftl_band_from_addr(io->dev, io->addr); 1790 entry->addr = ftl_band_next_addr(entry->band, io->addr, io->pos); 1791 entry->band->num_reloc_blocks++; 1792 } 1793 1794 entry->trace = io->trace; 1795 entry->lba = ftl_io_current_lba(io); 1796 1797 if (entry->md) { 1798 memcpy(entry->md, &entry->lba, sizeof(entry->lba)); 1799 } 1800 } 1801 1802 static int 1803 ftl_rwb_fill(struct ftl_io *io) 1804 { 1805 struct spdk_ftl_dev *dev = io->dev; 1806 struct ftl_io_channel *ioch; 1807 struct ftl_rwb_entry *entry; 1808 struct ftl_addr addr = { .cached = 1 }; 1809 int flags = ftl_rwb_flags_from_io(io); 1810 1811 ioch = spdk_io_channel_get_ctx(io->ioch); 1812 1813 while (io->pos < io->num_blocks) { 1814 if (ftl_io_current_lba(io) == FTL_LBA_INVALID) { 1815 ftl_io_advance(io, 1); 1816 continue; 1817 } 1818 1819 entry = ftl_acquire_entry(dev, flags); 1820 if (!entry) { 1821 TAILQ_INSERT_TAIL(&ioch->retry_queue, io, ioch_entry); 1822 return 0; 1823 } 1824 1825 ftl_rwb_entry_fill(entry, io); 1826 1827 addr.cache_offset = entry->pos; 1828 1829 ftl_trace_rwb_fill(dev, io); 1830 ftl_update_l2p(dev, entry, addr); 1831 ftl_io_advance(io, 1); 1832 1833 /* Needs to be done after L2P is updated to avoid race with */ 1834 /* write completion callback when it's processed faster than */ 1835 /* L2P is set in update_l2p(). */ 1836 ftl_rwb_push(entry); 1837 } 1838 1839 if (ftl_io_done(io)) { 1840 if (ftl_dev_has_nv_cache(dev) && !(io->flags & FTL_IO_BYPASS_CACHE)) { 1841 ftl_write_nv_cache(io); 1842 } else { 1843 TAILQ_INSERT_TAIL(&ioch->write_cmpl_queue, io, ioch_entry); 1844 } 1845 } 1846 1847 return 0; 1848 } 1849 1850 static bool 1851 ftl_dev_needs_defrag(struct spdk_ftl_dev *dev) 1852 { 1853 const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 1854 1855 if (ftl_reloc_is_halted(dev->reloc)) { 1856 return false; 1857 } 1858 1859 if (ftl_reloc_is_defrag_active(dev->reloc)) { 1860 return false; 1861 } 1862 1863 if (dev->num_free <= limit->thld) { 1864 return true; 1865 } 1866 1867 return false; 1868 } 1869 1870 static double 1871 ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid) 1872 { 1873 size_t usable, valid, invalid; 1874 double vld_ratio; 1875 1876 /* If the band doesn't have any usable blocks it's of no use */ 1877 usable = ftl_band_num_usable_blocks(band); 1878 if (usable == 0) { 1879 return 0.0; 1880 } 1881 1882 valid = threshold_valid ? (usable - *threshold_valid) : band->lba_map.num_vld; 1883 invalid = usable - valid; 1884 1885 /* Add one to avoid division by 0 */ 1886 vld_ratio = (double)invalid / (double)(valid + 1); 1887 return vld_ratio * ftl_band_age(band); 1888 } 1889 1890 static bool 1891 ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev) 1892 { 1893 struct spdk_ftl_conf *conf = &dev->conf; 1894 size_t thld_vld; 1895 1896 /* If we're in dire need of free bands, every band is worth defragging */ 1897 if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) { 1898 return true; 1899 } 1900 1901 thld_vld = (ftl_band_num_usable_blocks(band) * conf->invalid_thld) / 100; 1902 1903 return band->merit > ftl_band_calc_merit(band, &thld_vld); 1904 } 1905 1906 static struct ftl_band * 1907 ftl_select_defrag_band(struct spdk_ftl_dev *dev) 1908 { 1909 struct ftl_band *band, *mband = NULL; 1910 double merit = 0; 1911 1912 LIST_FOREACH(band, &dev->shut_bands, list_entry) { 1913 assert(band->state == FTL_BAND_STATE_CLOSED); 1914 band->merit = ftl_band_calc_merit(band, NULL); 1915 if (band->merit > merit) { 1916 merit = band->merit; 1917 mband = band; 1918 } 1919 } 1920 1921 if (mband && !ftl_band_needs_defrag(mband, dev)) { 1922 mband = NULL; 1923 } 1924 1925 return mband; 1926 } 1927 1928 static void 1929 ftl_process_relocs(struct spdk_ftl_dev *dev) 1930 { 1931 struct ftl_band *band; 1932 1933 if (ftl_dev_needs_defrag(dev)) { 1934 band = ftl_select_defrag_band(dev); 1935 if (band) { 1936 ftl_reloc_add(dev->reloc, band, 0, ftl_get_num_blocks_in_band(dev), 0, true); 1937 ftl_trace_defrag_band(dev, band); 1938 } 1939 } 1940 1941 ftl_reloc(dev->reloc); 1942 } 1943 1944 int 1945 ftl_current_limit(const struct spdk_ftl_dev *dev) 1946 { 1947 return dev->limit; 1948 } 1949 1950 void 1951 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 1952 { 1953 attrs->uuid = dev->uuid; 1954 attrs->num_blocks = dev->num_lbas; 1955 attrs->block_size = FTL_BLOCK_SIZE; 1956 attrs->num_zones = ftl_get_num_zones(dev); 1957 attrs->zone_size = ftl_get_num_blocks_in_zone(dev); 1958 attrs->conf = dev->conf; 1959 attrs->base_bdev = spdk_bdev_get_name(spdk_bdev_desc_get_bdev(dev->base_bdev_desc)); 1960 1961 attrs->cache_bdev = NULL; 1962 if (dev->nv_cache.bdev_desc) { 1963 attrs->cache_bdev = spdk_bdev_get_name( 1964 spdk_bdev_desc_get_bdev(dev->nv_cache.bdev_desc)); 1965 } 1966 } 1967 1968 static void 1969 _ftl_io_write(void *ctx) 1970 { 1971 ftl_io_write((struct ftl_io *)ctx); 1972 } 1973 1974 static int 1975 ftl_submit_write_leaf(struct ftl_io *io) 1976 { 1977 int rc; 1978 1979 rc = ftl_submit_write(ftl_wptr_from_band(io->band), io); 1980 if (rc == -EAGAIN) { 1981 /* EAGAIN means that the request was put on the pending queue */ 1982 return 0; 1983 } 1984 1985 return rc; 1986 } 1987 1988 void 1989 ftl_io_write(struct ftl_io *io) 1990 { 1991 struct spdk_ftl_dev *dev = io->dev; 1992 1993 /* For normal IOs we just need to copy the data onto the rwb */ 1994 if (!(io->flags & FTL_IO_MD)) { 1995 ftl_io_call_foreach_child(io, ftl_rwb_fill); 1996 } else { 1997 /* Metadata has its own buffer, so it doesn't have to be copied, so just */ 1998 /* send it the the core thread and schedule the write immediately */ 1999 if (ftl_check_core_thread(dev)) { 2000 ftl_io_call_foreach_child(io, ftl_submit_write_leaf); 2001 } else { 2002 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); 2003 } 2004 } 2005 } 2006 2007 int 2008 spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 2009 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 2010 { 2011 struct ftl_io *io; 2012 2013 if (iov_cnt == 0) { 2014 return -EINVAL; 2015 } 2016 2017 if (lba_cnt == 0) { 2018 return -EINVAL; 2019 } 2020 2021 if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) { 2022 return -EINVAL; 2023 } 2024 2025 if (!dev->initialized) { 2026 return -EBUSY; 2027 } 2028 2029 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 2030 if (!io) { 2031 return -ENOMEM; 2032 } 2033 2034 ftl_io_write(io); 2035 2036 return 0; 2037 } 2038 2039 void 2040 ftl_io_read(struct ftl_io *io) 2041 { 2042 ftl_io_call_foreach_child(io, ftl_submit_read); 2043 } 2044 2045 int 2046 spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 2047 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 2048 { 2049 struct ftl_io *io; 2050 2051 if (iov_cnt == 0) { 2052 return -EINVAL; 2053 } 2054 2055 if (lba_cnt == 0) { 2056 return -EINVAL; 2057 } 2058 2059 if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) { 2060 return -EINVAL; 2061 } 2062 2063 if (!dev->initialized) { 2064 return -EBUSY; 2065 } 2066 2067 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 2068 if (!io) { 2069 return -ENOMEM; 2070 } 2071 2072 ftl_io_read(io); 2073 return 0; 2074 } 2075 2076 static struct ftl_flush * 2077 ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 2078 { 2079 struct ftl_flush *flush; 2080 struct ftl_rwb *rwb = dev->rwb; 2081 2082 flush = calloc(1, sizeof(*flush)); 2083 if (!flush) { 2084 return NULL; 2085 } 2086 2087 flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); 2088 if (!flush->bmap) { 2089 goto error; 2090 } 2091 2092 flush->dev = dev; 2093 flush->cb.fn = cb_fn; 2094 flush->cb.ctx = cb_arg; 2095 2096 return flush; 2097 error: 2098 free(flush); 2099 return NULL; 2100 } 2101 2102 static void 2103 _ftl_flush(void *ctx) 2104 { 2105 struct ftl_flush *flush = ctx; 2106 struct spdk_ftl_dev *dev = flush->dev; 2107 struct ftl_rwb *rwb = dev->rwb; 2108 struct ftl_rwb_batch *batch; 2109 2110 /* Attach flush object to all non-empty batches */ 2111 ftl_rwb_foreach_batch(batch, rwb) { 2112 if (!ftl_rwb_batch_empty(batch)) { 2113 spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); 2114 flush->num_req++; 2115 } 2116 } 2117 2118 LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); 2119 2120 /* If the RWB was already empty, the flush can be completed right away */ 2121 if (!flush->num_req) { 2122 ftl_complete_flush(flush); 2123 } 2124 } 2125 2126 int 2127 ftl_flush_rwb(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 2128 { 2129 struct ftl_flush *flush; 2130 2131 flush = ftl_flush_init(dev, cb_fn, cb_arg); 2132 if (!flush) { 2133 return -ENOMEM; 2134 } 2135 2136 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); 2137 return 0; 2138 } 2139 2140 int 2141 spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 2142 { 2143 if (!dev->initialized) { 2144 return -EBUSY; 2145 } 2146 2147 return ftl_flush_rwb(dev, cb_fn, cb_arg); 2148 } 2149 2150 bool 2151 ftl_addr_is_written(struct ftl_band *band, struct ftl_addr addr) 2152 { 2153 struct ftl_zone *zone = ftl_band_zone_from_addr(band, addr); 2154 2155 return addr.offset < zone->info.write_pointer; 2156 } 2157 2158 static void ftl_process_media_event(struct spdk_ftl_dev *dev, struct spdk_bdev_media_event event); 2159 2160 static void 2161 _ftl_process_media_event(void *ctx) 2162 { 2163 struct ftl_media_event *event = ctx; 2164 struct spdk_ftl_dev *dev = event->dev; 2165 2166 ftl_process_media_event(dev, event->event); 2167 spdk_mempool_put(dev->media_events_pool, event); 2168 } 2169 2170 static void 2171 ftl_process_media_event(struct spdk_ftl_dev *dev, struct spdk_bdev_media_event event) 2172 { 2173 struct ftl_band *band; 2174 struct ftl_addr addr = { .offset = event.offset }; 2175 size_t block_off; 2176 2177 if (!ftl_check_core_thread(dev)) { 2178 struct ftl_media_event *media_event; 2179 2180 media_event = spdk_mempool_get(dev->media_events_pool); 2181 if (!media_event) { 2182 SPDK_ERRLOG("Media event lost due to lack of memory"); 2183 return; 2184 } 2185 2186 media_event->dev = dev; 2187 media_event->event = event; 2188 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_process_media_event, 2189 media_event); 2190 return; 2191 } 2192 2193 band = ftl_band_from_addr(dev, addr); 2194 block_off = ftl_band_block_offset_from_addr(band, addr); 2195 2196 ftl_reloc_add(dev->reloc, band, block_off, event.num_blocks, 0, false); 2197 } 2198 2199 void 2200 ftl_get_media_events(struct spdk_ftl_dev *dev) 2201 { 2202 #define FTL_MAX_MEDIA_EVENTS 128 2203 struct spdk_bdev_media_event events[FTL_MAX_MEDIA_EVENTS]; 2204 size_t num_events, i; 2205 2206 if (!dev->initialized) { 2207 return; 2208 } 2209 2210 do { 2211 num_events = spdk_bdev_get_media_events(dev->base_bdev_desc, 2212 events, FTL_MAX_MEDIA_EVENTS); 2213 2214 for (i = 0; i < num_events; ++i) { 2215 ftl_process_media_event(dev, events[i]); 2216 } 2217 2218 } while (num_events); 2219 } 2220 2221 int 2222 ftl_io_channel_poll(void *arg) 2223 { 2224 struct ftl_io_channel *ch = arg; 2225 struct ftl_io *io; 2226 TAILQ_HEAD(, ftl_io) retry_queue; 2227 2228 if (TAILQ_EMPTY(&ch->write_cmpl_queue) && TAILQ_EMPTY(&ch->retry_queue)) { 2229 return 0; 2230 } 2231 2232 while (!TAILQ_EMPTY(&ch->write_cmpl_queue)) { 2233 io = TAILQ_FIRST(&ch->write_cmpl_queue); 2234 TAILQ_REMOVE(&ch->write_cmpl_queue, io, ioch_entry); 2235 ftl_io_complete(io); 2236 } 2237 2238 /* 2239 * Create local copy of the retry queue to prevent from infinite retrying if IO will be 2240 * inserted to the retry queue again 2241 */ 2242 TAILQ_INIT(&retry_queue); 2243 TAILQ_SWAP(&ch->retry_queue, &retry_queue, ftl_io, ioch_entry); 2244 2245 while (!TAILQ_EMPTY(&retry_queue)) { 2246 io = TAILQ_FIRST(&retry_queue); 2247 TAILQ_REMOVE(&retry_queue, io, ioch_entry); 2248 if (io->type == FTL_IO_WRITE) { 2249 ftl_io_write(io); 2250 } else { 2251 ftl_io_read(io); 2252 } 2253 } 2254 2255 return 1; 2256 } 2257 2258 int 2259 ftl_task_core(void *ctx) 2260 { 2261 struct ftl_thread *thread = ctx; 2262 struct spdk_ftl_dev *dev = thread->dev; 2263 2264 if (dev->halt) { 2265 if (ftl_shutdown_complete(dev)) { 2266 spdk_poller_unregister(&thread->poller); 2267 return 0; 2268 } 2269 } 2270 2271 ftl_process_writes(dev); 2272 ftl_process_relocs(dev); 2273 2274 return 0; 2275 } 2276 2277 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) 2278