1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/likely.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/nvme.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/string.h" 40 #include "spdk_internal/log.h" 41 #include "spdk/ftl.h" 42 #include "spdk/crc32.h" 43 44 #include "ftl_core.h" 45 #include "ftl_band.h" 46 #include "ftl_io.h" 47 #include "ftl_anm.h" 48 #include "ftl_rwb.h" 49 #include "ftl_debug.h" 50 #include "ftl_reloc.h" 51 52 struct ftl_band_flush { 53 struct spdk_ftl_dev *dev; 54 /* Number of bands left to be flushed */ 55 size_t num_bands; 56 /* User callback */ 57 spdk_ftl_fn cb_fn; 58 /* Callback's argument */ 59 void *cb_arg; 60 /* List link */ 61 LIST_ENTRY(ftl_band_flush) list_entry; 62 }; 63 64 struct ftl_wptr { 65 /* Owner device */ 66 struct spdk_ftl_dev *dev; 67 68 /* Current PPA */ 69 struct ftl_ppa ppa; 70 71 /* Band currently being written to */ 72 struct ftl_band *band; 73 74 /* Current logical block's offset */ 75 uint64_t offset; 76 77 /* Current erase block */ 78 struct ftl_chunk *chunk; 79 80 /* Pending IO queue */ 81 TAILQ_HEAD(, ftl_io) pending_queue; 82 83 /* List link */ 84 LIST_ENTRY(ftl_wptr) list_entry; 85 86 /* 87 * If setup in direct mode, there will be no offset or band state update after IO. 88 * The PPA is not assigned by wptr, and is instead taken directly from the request. 89 */ 90 bool direct_mode; 91 92 /* Number of outstanding write requests */ 93 uint32_t num_outstanding; 94 95 /* Marks that the band related to this wptr needs to be closed as soon as possible */ 96 bool flush; 97 }; 98 99 struct ftl_flush { 100 /* Owner device */ 101 struct spdk_ftl_dev *dev; 102 103 /* Number of batches to wait for */ 104 size_t num_req; 105 106 /* Callback */ 107 struct { 108 spdk_ftl_fn fn; 109 void *ctx; 110 } cb; 111 112 /* Batch bitmap */ 113 struct spdk_bit_array *bmap; 114 115 /* List link */ 116 LIST_ENTRY(ftl_flush) list_entry; 117 }; 118 119 static int 120 ftl_rwb_flags_from_io(const struct ftl_io *io) 121 { 122 int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; 123 return io->flags & valid_flags; 124 } 125 126 static int 127 ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) 128 { 129 return entry->flags & FTL_IO_WEAK; 130 } 131 132 static void 133 ftl_wptr_free(struct ftl_wptr *wptr) 134 { 135 if (!wptr) { 136 return; 137 } 138 139 free(wptr); 140 } 141 142 static void 143 ftl_remove_wptr(struct ftl_wptr *wptr) 144 { 145 struct spdk_ftl_dev *dev = wptr->dev; 146 struct ftl_band_flush *flush, *tmp; 147 148 if (spdk_unlikely(wptr->flush)) { 149 LIST_FOREACH_SAFE(flush, &dev->band_flush_list, list_entry, tmp) { 150 assert(flush->num_bands > 0); 151 if (--flush->num_bands == 0) { 152 flush->cb_fn(flush->cb_arg, 0); 153 LIST_REMOVE(flush, list_entry); 154 free(flush); 155 } 156 } 157 } 158 159 LIST_REMOVE(wptr, list_entry); 160 ftl_wptr_free(wptr); 161 } 162 163 static void 164 ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) 165 { 166 struct ftl_io *io = arg; 167 168 if (spdk_nvme_cpl_is_error(status)) { 169 ftl_io_process_error(io, status); 170 } 171 172 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_DISK); 173 174 ftl_io_dec_req(io); 175 if (ftl_io_done(io)) { 176 ftl_io_complete(io); 177 } 178 } 179 180 static void 181 ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) 182 { 183 struct ftl_wptr *wptr = NULL; 184 185 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 186 if (wptr->band == band) { 187 break; 188 } 189 } 190 191 /* If the band already has the high_prio flag set, other writes must */ 192 /* have failed earlier, so it's already taken care of. */ 193 if (band->high_prio) { 194 assert(wptr == NULL); 195 return; 196 } 197 198 ftl_band_write_failed(band); 199 ftl_remove_wptr(wptr); 200 } 201 202 static struct ftl_wptr * 203 ftl_wptr_from_band(struct ftl_band *band) 204 { 205 struct spdk_ftl_dev *dev = band->dev; 206 struct ftl_wptr *wptr = NULL; 207 208 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 209 if (wptr->band == band) { 210 return wptr; 211 } 212 } 213 214 return NULL; 215 } 216 217 static void 218 ftl_md_write_fail(struct ftl_io *io, int status) 219 { 220 struct ftl_band *band = io->band; 221 struct ftl_wptr *wptr; 222 char buf[128]; 223 224 wptr = ftl_wptr_from_band(band); 225 assert(wptr); 226 227 SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", 228 ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); 229 230 ftl_halt_writes(io->dev, band); 231 } 232 233 static void 234 ftl_md_write_cb(struct ftl_io *io, void *arg, int status) 235 { 236 struct spdk_ftl_dev *dev = io->dev; 237 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 238 struct ftl_band *band = io->band; 239 struct ftl_wptr *wptr; 240 size_t id; 241 242 wptr = ftl_wptr_from_band(band); 243 assert(wptr); 244 245 if (status) { 246 ftl_md_write_fail(io, status); 247 return; 248 } 249 250 ftl_band_set_next_state(band); 251 if (band->state == FTL_BAND_STATE_CLOSED) { 252 if (ftl_dev_has_nv_cache(dev)) { 253 pthread_spin_lock(&nv_cache->lock); 254 nv_cache->num_available += ftl_band_user_lbks(band); 255 256 if (spdk_unlikely(nv_cache->num_available > nv_cache->num_data_blocks)) { 257 nv_cache->num_available = nv_cache->num_data_blocks; 258 } 259 pthread_spin_unlock(&nv_cache->lock); 260 } 261 262 /* 263 * Go through the reloc_bitmap, checking for all the bands that had its data moved 264 * onto current band and update their counters to allow them to be used for writing 265 * (once they're closed and empty). 266 */ 267 for (id = 0; id < ftl_dev_num_bands(dev); ++id) { 268 if (spdk_bit_array_get(band->reloc_bitmap, id)) { 269 assert(dev->bands[id].num_reloc_bands > 0); 270 dev->bands[id].num_reloc_bands--; 271 272 spdk_bit_array_clear(band->reloc_bitmap, id); 273 } 274 } 275 276 ftl_remove_wptr(wptr); 277 } 278 } 279 280 static int 281 ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 282 { 283 struct spdk_ftl_dev *dev = io->dev; 284 size_t lbk_cnt, max_lbks; 285 286 assert(ftl_io_mode_ppa(io)); 287 assert(io->iov_pos < io->iov_cnt); 288 289 if (io->pos == 0) { 290 *ppa = io->ppa; 291 } else { 292 *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, io->pos); 293 } 294 295 assert(!ftl_ppa_invalid(*ppa)); 296 297 /* Metadata has to be read in the way it's written (jumping across */ 298 /* the chunks in xfer_size increments) */ 299 if (io->flags & FTL_IO_MD) { 300 max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); 301 lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); 302 assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); 303 } else { 304 lbk_cnt = ftl_io_iovec_len_left(io); 305 } 306 307 return lbk_cnt; 308 } 309 310 static int 311 ftl_wptr_close_band(struct ftl_wptr *wptr) 312 { 313 struct ftl_band *band = wptr->band; 314 315 ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); 316 317 return ftl_band_write_tail_md(band, ftl_md_write_cb); 318 } 319 320 static int 321 ftl_wptr_open_band(struct ftl_wptr *wptr) 322 { 323 struct ftl_band *band = wptr->band; 324 325 assert(ftl_band_chunk_is_first(band, wptr->chunk)); 326 assert(band->lba_map.num_vld == 0); 327 328 ftl_band_clear_lba_map(band); 329 330 assert(band->state == FTL_BAND_STATE_PREP); 331 ftl_band_set_state(band, FTL_BAND_STATE_OPENING); 332 333 return ftl_band_write_head_md(band, ftl_md_write_cb); 334 } 335 336 static int 337 ftl_submit_erase(struct ftl_io *io) 338 { 339 struct spdk_ftl_dev *dev = io->dev; 340 struct ftl_band *band = io->band; 341 struct ftl_ppa ppa = io->ppa; 342 struct ftl_chunk *chunk; 343 uint64_t ppa_packed; 344 int rc = 0; 345 size_t i; 346 347 for (i = 0; i < io->lbk_cnt; ++i) { 348 if (i != 0) { 349 chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); 350 assert(chunk->state == FTL_CHUNK_STATE_CLOSED || 351 chunk->state == FTL_CHUNK_STATE_VACANT); 352 ppa = chunk->start_ppa; 353 } 354 355 assert(ppa.lbk == 0); 356 ppa_packed = ftl_ppa_addr_pack(dev, ppa); 357 358 ftl_trace_submission(dev, io, ppa, 1); 359 rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), 360 &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); 361 if (spdk_unlikely(rc)) { 362 ftl_io_fail(io, rc); 363 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); 364 break; 365 } 366 367 ftl_io_inc_req(io); 368 ftl_io_advance(io, 1); 369 } 370 371 if (ftl_io_done(io)) { 372 ftl_io_complete(io); 373 } 374 375 return rc; 376 } 377 378 static void 379 _ftl_io_erase(void *ctx) 380 { 381 ftl_io_erase((struct ftl_io *)ctx); 382 } 383 384 static bool 385 ftl_check_core_thread(const struct spdk_ftl_dev *dev) 386 { 387 return dev->core_thread.thread == spdk_get_thread(); 388 } 389 390 static bool 391 ftl_check_read_thread(const struct spdk_ftl_dev *dev) 392 { 393 return dev->read_thread.thread == spdk_get_thread(); 394 } 395 396 struct spdk_io_channel * 397 ftl_get_io_channel(const struct spdk_ftl_dev *dev) 398 { 399 if (ftl_check_core_thread(dev)) { 400 return dev->core_thread.ioch; 401 } 402 if (ftl_check_read_thread(dev)) { 403 return dev->read_thread.ioch; 404 } 405 406 assert(0); 407 return NULL; 408 } 409 410 411 int 412 ftl_io_erase(struct ftl_io *io) 413 { 414 struct spdk_ftl_dev *dev = io->dev; 415 416 if (ftl_check_core_thread(dev)) { 417 return ftl_submit_erase(io); 418 } 419 420 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); 421 return 0; 422 } 423 424 static struct ftl_band * 425 ftl_next_write_band(struct spdk_ftl_dev *dev) 426 { 427 struct ftl_band *band; 428 429 /* Find a free band that has all of its data moved onto other closed bands */ 430 LIST_FOREACH(band, &dev->free_bands, list_entry) { 431 assert(band->state == FTL_BAND_STATE_FREE); 432 if (band->num_reloc_bands == 0 && band->num_reloc_blocks == 0) { 433 break; 434 } 435 } 436 437 if (spdk_unlikely(!band)) { 438 return NULL; 439 } 440 441 if (ftl_band_erase(band)) { 442 /* TODO: handle erase failure */ 443 return NULL; 444 } 445 446 return band; 447 } 448 449 static struct ftl_band * 450 ftl_next_wptr_band(struct spdk_ftl_dev *dev) 451 { 452 struct ftl_band *band; 453 454 if (!dev->next_band) { 455 band = ftl_next_write_band(dev); 456 } else { 457 assert(dev->next_band->state == FTL_BAND_STATE_PREP); 458 band = dev->next_band; 459 dev->next_band = NULL; 460 } 461 462 return band; 463 } 464 465 static struct ftl_wptr * 466 ftl_wptr_init(struct ftl_band *band) 467 { 468 struct spdk_ftl_dev *dev = band->dev; 469 struct ftl_wptr *wptr; 470 471 wptr = calloc(1, sizeof(*wptr)); 472 if (!wptr) { 473 return NULL; 474 } 475 476 wptr->dev = dev; 477 wptr->band = band; 478 wptr->chunk = CIRCLEQ_FIRST(&band->chunks); 479 wptr->ppa = wptr->chunk->start_ppa; 480 TAILQ_INIT(&wptr->pending_queue); 481 482 return wptr; 483 } 484 485 static int 486 ftl_add_direct_wptr(struct ftl_band *band) 487 { 488 struct spdk_ftl_dev *dev = band->dev; 489 struct ftl_wptr *wptr; 490 491 assert(band->state == FTL_BAND_STATE_OPEN); 492 493 wptr = ftl_wptr_init(band); 494 if (!wptr) { 495 return -1; 496 } 497 498 wptr->direct_mode = true; 499 500 if (ftl_band_alloc_lba_map(band)) { 501 ftl_wptr_free(wptr); 502 return -1; 503 } 504 505 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 506 507 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: direct band %u\n", band->id); 508 ftl_trace_write_band(dev, band); 509 return 0; 510 } 511 512 static void 513 ftl_close_direct_wptr(struct ftl_band *band) 514 { 515 struct ftl_wptr *wptr = ftl_wptr_from_band(band); 516 517 assert(wptr); 518 assert(wptr->direct_mode); 519 assert(band->state == FTL_BAND_STATE_CLOSED); 520 521 ftl_band_release_lba_map(band); 522 523 ftl_remove_wptr(wptr); 524 } 525 526 int 527 ftl_band_set_direct_access(struct ftl_band *band, bool access) 528 { 529 if (access) { 530 return ftl_add_direct_wptr(band); 531 } else { 532 ftl_close_direct_wptr(band); 533 return 0; 534 } 535 } 536 537 static int 538 ftl_add_wptr(struct spdk_ftl_dev *dev) 539 { 540 struct ftl_band *band; 541 struct ftl_wptr *wptr; 542 543 band = ftl_next_wptr_band(dev); 544 if (!band) { 545 return -1; 546 } 547 548 wptr = ftl_wptr_init(band); 549 if (!wptr) { 550 return -1; 551 } 552 553 if (ftl_band_write_prep(band)) { 554 ftl_wptr_free(wptr); 555 return -1; 556 } 557 558 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 559 560 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); 561 ftl_trace_write_band(dev, band); 562 return 0; 563 } 564 565 static void 566 ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) 567 { 568 struct ftl_band *band = wptr->band; 569 struct spdk_ftl_dev *dev = wptr->dev; 570 struct spdk_ftl_conf *conf = &dev->conf; 571 size_t next_thld; 572 573 if (spdk_unlikely(wptr->direct_mode)) { 574 return; 575 } 576 577 wptr->offset += xfer_size; 578 next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; 579 580 if (ftl_band_full(band, wptr->offset)) { 581 ftl_band_set_state(band, FTL_BAND_STATE_FULL); 582 } 583 584 wptr->chunk->busy = true; 585 wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); 586 wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); 587 588 assert(!ftl_ppa_invalid(wptr->ppa)); 589 590 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", 591 wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); 592 593 if (wptr->offset >= next_thld && !dev->next_band) { 594 dev->next_band = ftl_next_write_band(dev); 595 } 596 } 597 598 static size_t 599 ftl_wptr_user_lbks_left(const struct ftl_wptr *wptr) 600 { 601 return ftl_band_user_lbks_left(wptr->band, wptr->offset); 602 } 603 604 static int 605 ftl_wptr_ready(struct ftl_wptr *wptr) 606 { 607 struct ftl_band *band = wptr->band; 608 609 /* TODO: add handling of empty bands */ 610 611 if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { 612 /* Erasing band may fail after it was assigned to wptr. */ 613 if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { 614 ftl_wptr_advance(wptr, wptr->dev->xfer_size); 615 } 616 return 0; 617 } 618 619 /* If we're in the process of writing metadata, wait till it is */ 620 /* completed. */ 621 /* TODO: we should probably change bands once we're writing tail md */ 622 if (ftl_band_state_changing(band)) { 623 return 0; 624 } 625 626 if (band->state == FTL_BAND_STATE_FULL) { 627 if (wptr->num_outstanding == 0) { 628 if (ftl_wptr_close_band(wptr)) { 629 /* TODO: need recovery here */ 630 assert(false); 631 } 632 } 633 634 return 0; 635 } 636 637 if (band->state != FTL_BAND_STATE_OPEN) { 638 if (ftl_wptr_open_band(wptr)) { 639 /* TODO: need recovery here */ 640 assert(false); 641 } 642 643 return 0; 644 } 645 646 return 1; 647 } 648 649 int 650 ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 651 { 652 struct ftl_wptr *wptr; 653 struct ftl_band_flush *flush; 654 655 assert(ftl_get_core_thread(dev) == spdk_get_thread()); 656 657 flush = calloc(1, sizeof(*flush)); 658 if (spdk_unlikely(!flush)) { 659 return -ENOMEM; 660 } 661 662 LIST_INSERT_HEAD(&dev->band_flush_list, flush, list_entry); 663 664 flush->cb_fn = cb_fn; 665 flush->cb_arg = cb_arg; 666 flush->dev = dev; 667 668 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 669 wptr->flush = true; 670 flush->num_bands++; 671 } 672 673 return 0; 674 } 675 676 static const struct spdk_ftl_limit * 677 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 678 { 679 assert(type < SPDK_FTL_LIMIT_MAX); 680 return &dev->conf.limits[type]; 681 } 682 683 static bool 684 ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 685 { 686 struct ftl_ppa ppa; 687 688 /* If the LBA is invalid don't bother checking the md and l2p */ 689 if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { 690 return false; 691 } 692 693 ppa = ftl_l2p_get(dev, entry->lba); 694 if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { 695 return false; 696 } 697 698 return true; 699 } 700 701 static void 702 ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 703 { 704 pthread_spin_lock(&entry->lock); 705 706 if (!ftl_rwb_entry_valid(entry)) { 707 goto unlock; 708 } 709 710 /* If the l2p wasn't updated and still points at the entry, fill it with the */ 711 /* on-disk PPA and clear the cache status bit. Otherwise, skip the l2p update */ 712 /* and just clear the cache status. */ 713 if (!ftl_cache_lba_valid(dev, entry)) { 714 goto clear; 715 } 716 717 ftl_l2p_set(dev, entry->lba, entry->ppa); 718 clear: 719 ftl_rwb_entry_invalidate(entry); 720 unlock: 721 pthread_spin_unlock(&entry->lock); 722 } 723 724 static struct ftl_rwb_entry * 725 ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) 726 { 727 struct ftl_rwb_entry *entry; 728 729 entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); 730 if (!entry) { 731 return NULL; 732 } 733 734 ftl_evict_cache_entry(dev, entry); 735 736 entry->flags = flags; 737 return entry; 738 } 739 740 static void 741 ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) 742 { 743 struct ftl_rwb_entry *entry; 744 int flags = FTL_IO_PAD | FTL_IO_INTERNAL; 745 746 for (size_t i = 0; i < size; ++i) { 747 entry = ftl_acquire_entry(dev, flags); 748 if (!entry) { 749 break; 750 } 751 752 entry->lba = FTL_LBA_INVALID; 753 entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); 754 memset(entry->data, 0, FTL_BLOCK_SIZE); 755 ftl_rwb_push(entry); 756 } 757 } 758 759 static void 760 ftl_remove_free_bands(struct spdk_ftl_dev *dev) 761 { 762 while (!LIST_EMPTY(&dev->free_bands)) { 763 LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); 764 } 765 766 dev->next_band = NULL; 767 } 768 769 static void 770 ftl_wptr_pad_band(struct ftl_wptr *wptr) 771 { 772 struct spdk_ftl_dev *dev = wptr->dev; 773 size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + 774 ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); 775 size_t blocks_left, rwb_size, pad_size; 776 777 blocks_left = ftl_wptr_user_lbks_left(wptr); 778 rwb_size = ftl_rwb_size(dev->rwb) - size; 779 pad_size = spdk_min(blocks_left, rwb_size); 780 781 /* Pad write buffer until band is full */ 782 ftl_rwb_pad(dev, pad_size); 783 } 784 785 static void 786 ftl_wptr_process_shutdown(struct ftl_wptr *wptr) 787 { 788 struct spdk_ftl_dev *dev = wptr->dev; 789 size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + 790 ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); 791 size_t num_active = dev->xfer_size * ftl_rwb_get_active_batches(dev->rwb); 792 793 num_active = num_active ? num_active : dev->xfer_size; 794 if (size >= num_active) { 795 return; 796 } 797 798 /* If we reach this point we need to remove free bands */ 799 /* and pad current wptr band to the end */ 800 if (ftl_rwb_get_active_batches(dev->rwb) <= 1) { 801 ftl_remove_free_bands(dev); 802 } 803 804 ftl_wptr_pad_band(wptr); 805 } 806 807 static int 808 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 809 { 810 return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && 811 LIST_EMPTY(&dev->wptr_list) && TAILQ_EMPTY(&dev->retry_queue); 812 } 813 814 void 815 ftl_apply_limits(struct spdk_ftl_dev *dev) 816 { 817 const struct spdk_ftl_limit *limit; 818 struct ftl_stats *stats = &dev->stats; 819 size_t rwb_limit[FTL_RWB_TYPE_MAX]; 820 int i; 821 822 ftl_rwb_get_limits(dev->rwb, rwb_limit); 823 824 /* Clear existing limit */ 825 dev->limit = SPDK_FTL_LIMIT_MAX; 826 827 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 828 limit = ftl_get_limit(dev, i); 829 830 if (dev->num_free <= limit->thld) { 831 rwb_limit[FTL_RWB_TYPE_USER] = 832 (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; 833 stats->limits[i]++; 834 dev->limit = i; 835 goto apply; 836 } 837 } 838 839 /* Clear the limits, since we don't need to apply them anymore */ 840 rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); 841 apply: 842 ftl_trace_limits(dev, rwb_limit, dev->num_free); 843 ftl_rwb_set_limits(dev->rwb, rwb_limit); 844 } 845 846 static int 847 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 848 { 849 struct ftl_band *band = ftl_band_from_ppa(dev, ppa); 850 struct ftl_lba_map *lba_map = &band->lba_map; 851 uint64_t offset; 852 853 offset = ftl_band_lbkoff_from_ppa(band, ppa); 854 855 /* The bit might be already cleared if two writes are scheduled to the */ 856 /* same LBA at the same time */ 857 if (spdk_bit_array_get(lba_map->vld, offset)) { 858 assert(lba_map->num_vld > 0); 859 spdk_bit_array_clear(lba_map->vld, offset); 860 lba_map->num_vld--; 861 return 1; 862 } 863 864 return 0; 865 } 866 867 int 868 ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 869 { 870 struct ftl_band *band; 871 int rc; 872 873 assert(!ftl_ppa_cached(ppa)); 874 band = ftl_band_from_ppa(dev, ppa); 875 876 pthread_spin_lock(&band->lba_map.lock); 877 rc = ftl_invalidate_addr_unlocked(dev, ppa); 878 pthread_spin_unlock(&band->lba_map.lock); 879 880 return rc; 881 } 882 883 static int 884 ftl_read_retry(int rc) 885 { 886 return rc == -EAGAIN; 887 } 888 889 static int 890 ftl_read_canceled(int rc) 891 { 892 return rc == -EFAULT || rc == 0; 893 } 894 895 static void 896 ftl_add_to_retry_queue(struct ftl_io *io) 897 { 898 if (!(io->flags & FTL_IO_RETRY)) { 899 io->flags |= FTL_IO_RETRY; 900 TAILQ_INSERT_TAIL(&io->dev->retry_queue, io, retry_entry); 901 } 902 } 903 904 static int 905 ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, 906 struct ftl_ppa ppa, void *buf) 907 { 908 struct ftl_rwb *rwb = io->dev->rwb; 909 struct ftl_rwb_entry *entry; 910 struct ftl_ppa nppa; 911 int rc = 0; 912 913 entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); 914 pthread_spin_lock(&entry->lock); 915 916 nppa = ftl_l2p_get(io->dev, lba); 917 if (ppa.ppa != nppa.ppa) { 918 rc = -1; 919 goto out; 920 } 921 922 memcpy(buf, entry->data, FTL_BLOCK_SIZE); 923 out: 924 pthread_spin_unlock(&entry->lock); 925 return rc; 926 } 927 928 static int 929 ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 930 { 931 struct spdk_ftl_dev *dev = io->dev; 932 struct ftl_ppa next_ppa; 933 size_t i; 934 935 *ppa = ftl_l2p_get(dev, ftl_io_current_lba(io)); 936 937 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", 938 ppa->ppa, ftl_io_current_lba(io)); 939 940 /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ 941 if (ftl_ppa_invalid(*ppa)) { 942 return -EFAULT; 943 } 944 945 if (ftl_ppa_cached(*ppa)) { 946 if (!ftl_ppa_cache_read(io, ftl_io_current_lba(io), *ppa, ftl_io_iovec_addr(io))) { 947 return 0; 948 } 949 950 /* If the state changed, we have to re-read the l2p */ 951 return -EAGAIN; 952 } 953 954 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 955 next_ppa = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i)); 956 957 if (ftl_ppa_invalid(next_ppa) || ftl_ppa_cached(next_ppa)) { 958 break; 959 } 960 961 if (ftl_ppa_addr_pack(dev, *ppa) + i != ftl_ppa_addr_pack(dev, next_ppa)) { 962 break; 963 } 964 } 965 966 return i; 967 } 968 969 static int 970 ftl_submit_read(struct ftl_io *io) 971 { 972 struct spdk_ftl_dev *dev = io->dev; 973 struct ftl_ppa ppa; 974 int rc = 0, lbk_cnt; 975 976 assert(LIST_EMPTY(&io->children)); 977 978 while (io->pos < io->lbk_cnt) { 979 if (ftl_io_mode_ppa(io)) { 980 lbk_cnt = rc = ftl_ppa_read_next_ppa(io, &ppa); 981 } else { 982 lbk_cnt = rc = ftl_lba_read_next_ppa(io, &ppa); 983 } 984 985 /* We might need to retry the read from scratch (e.g. */ 986 /* because write was under way and completed before */ 987 /* we could read it from rwb */ 988 if (ftl_read_retry(rc)) { 989 continue; 990 } 991 992 /* We don't have to schedule the read, as it was read from cache */ 993 if (ftl_read_canceled(rc)) { 994 ftl_io_advance(io, 1); 995 ftl_trace_completion(io->dev, io, rc ? FTL_TRACE_COMPLETION_INVALID : 996 FTL_TRACE_COMPLETION_CACHE); 997 rc = 0; 998 continue; 999 } 1000 1001 assert(lbk_cnt > 0); 1002 1003 ftl_trace_submission(dev, io, ppa, lbk_cnt); 1004 rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), 1005 ftl_io_iovec_addr(io), 1006 ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, 1007 ftl_io_cmpl_cb, io, 0); 1008 if (spdk_unlikely(rc)) { 1009 if (rc == -ENOMEM) { 1010 ftl_add_to_retry_queue(io); 1011 } else { 1012 ftl_io_fail(io, rc); 1013 } 1014 break; 1015 } 1016 1017 ftl_io_inc_req(io); 1018 ftl_io_advance(io, lbk_cnt); 1019 } 1020 1021 /* If we didn't have to read anything from the device, */ 1022 /* complete the request right away */ 1023 if (ftl_io_done(io)) { 1024 ftl_io_complete(io); 1025 } 1026 1027 return rc; 1028 } 1029 1030 static void 1031 ftl_complete_flush(struct ftl_flush *flush) 1032 { 1033 assert(flush->num_req == 0); 1034 LIST_REMOVE(flush, list_entry); 1035 1036 flush->cb.fn(flush->cb.ctx, 0); 1037 1038 spdk_bit_array_free(&flush->bmap); 1039 free(flush); 1040 } 1041 1042 static void 1043 ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) 1044 { 1045 struct ftl_flush *flush, *tflush; 1046 size_t offset; 1047 1048 LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { 1049 offset = ftl_rwb_batch_get_offset(batch); 1050 1051 if (spdk_bit_array_get(flush->bmap, offset)) { 1052 spdk_bit_array_clear(flush->bmap, offset); 1053 if (!(--flush->num_req)) { 1054 ftl_complete_flush(flush); 1055 } 1056 } 1057 } 1058 } 1059 1060 static void 1061 ftl_nv_cache_wrap_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1062 { 1063 struct ftl_nv_cache *nv_cache = cb_arg; 1064 1065 if (!success) { 1066 SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n"); 1067 /* TODO: go into read-only mode */ 1068 assert(0); 1069 } 1070 1071 pthread_spin_lock(&nv_cache->lock); 1072 nv_cache->ready = true; 1073 pthread_spin_unlock(&nv_cache->lock); 1074 1075 spdk_bdev_free_io(bdev_io); 1076 } 1077 1078 static void 1079 ftl_nv_cache_wrap(void *ctx) 1080 { 1081 struct ftl_nv_cache *nv_cache = ctx; 1082 int rc; 1083 1084 rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_wrap_cb, nv_cache); 1085 if (spdk_unlikely(rc != 0)) { 1086 SPDK_ERRLOG("Unable to write non-volatile cache metadata header: %s\n", 1087 spdk_strerror(-rc)); 1088 /* TODO: go into read-only mode */ 1089 assert(0); 1090 } 1091 } 1092 1093 static uint64_t 1094 ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks, unsigned int *phase) 1095 { 1096 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1097 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1098 uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID; 1099 1100 cache_size = spdk_bdev_get_num_blocks(bdev); 1101 1102 pthread_spin_lock(&nv_cache->lock); 1103 if (spdk_unlikely(nv_cache->num_available == 0 || !nv_cache->ready)) { 1104 goto out; 1105 } 1106 1107 num_available = spdk_min(nv_cache->num_available, *num_lbks); 1108 num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt); 1109 1110 if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) { 1111 *num_lbks = cache_size - nv_cache->current_addr; 1112 } else { 1113 *num_lbks = num_available; 1114 } 1115 1116 cache_addr = nv_cache->current_addr; 1117 nv_cache->current_addr += *num_lbks; 1118 nv_cache->num_available -= *num_lbks; 1119 *phase = nv_cache->phase; 1120 1121 if (nv_cache->current_addr == spdk_bdev_get_num_blocks(bdev)) { 1122 nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET; 1123 nv_cache->phase = ftl_nv_cache_next_phase(nv_cache->phase); 1124 nv_cache->ready = false; 1125 spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_nv_cache_wrap, nv_cache); 1126 } 1127 out: 1128 pthread_spin_unlock(&nv_cache->lock); 1129 return cache_addr; 1130 } 1131 1132 static struct ftl_io * 1133 ftl_alloc_io_nv_cache(struct ftl_io *parent, size_t num_lbks) 1134 { 1135 struct ftl_io_init_opts opts = { 1136 .dev = parent->dev, 1137 .parent = parent, 1138 .data = ftl_io_iovec_addr(parent), 1139 .lbk_cnt = num_lbks, 1140 .flags = parent->flags | FTL_IO_CACHE, 1141 }; 1142 1143 return ftl_io_init_internal(&opts); 1144 } 1145 1146 static void 1147 ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1148 { 1149 struct ftl_io *io = cb_arg; 1150 struct ftl_nv_cache *nv_cache = &io->dev->nv_cache; 1151 1152 if (spdk_unlikely(!success)) { 1153 SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->ppa.ppa); 1154 io->status = -EIO; 1155 } 1156 1157 ftl_io_dec_req(io); 1158 if (ftl_io_done(io)) { 1159 spdk_mempool_put(nv_cache->md_pool, io->md); 1160 ftl_io_complete(io); 1161 } 1162 1163 spdk_bdev_free_io(bdev_io); 1164 } 1165 1166 static void 1167 ftl_submit_nv_cache(void *ctx) 1168 { 1169 struct ftl_io *io = ctx; 1170 struct spdk_ftl_dev *dev = io->dev; 1171 struct spdk_thread *thread; 1172 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 1173 struct ftl_io_channel *ioch; 1174 int rc; 1175 1176 ioch = spdk_io_channel_get_ctx(io->ioch); 1177 thread = spdk_io_channel_get_thread(io->ioch); 1178 1179 rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch, 1180 ftl_io_iovec_addr(io), io->md, io->ppa.ppa, 1181 io->lbk_cnt, ftl_nv_cache_submit_cb, io); 1182 if (rc == -ENOMEM) { 1183 spdk_thread_send_msg(thread, ftl_submit_nv_cache, io); 1184 return; 1185 } else if (rc) { 1186 SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n", 1187 spdk_strerror(-rc), io->ppa.ppa, io->lbk_cnt); 1188 spdk_mempool_put(nv_cache->md_pool, io->md); 1189 io->status = -EIO; 1190 ftl_io_complete(io); 1191 return; 1192 } 1193 1194 ftl_io_advance(io, io->lbk_cnt); 1195 ftl_io_inc_req(io); 1196 } 1197 1198 static void 1199 ftl_nv_cache_fill_md(struct ftl_io *io, unsigned int phase) 1200 { 1201 struct spdk_bdev *bdev; 1202 struct ftl_nv_cache *nv_cache = &io->dev->nv_cache; 1203 uint64_t lbk_off, lba; 1204 void *md_buf = io->md; 1205 1206 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1207 1208 for (lbk_off = 0; lbk_off < io->lbk_cnt; ++lbk_off) { 1209 lba = ftl_nv_cache_pack_lba(ftl_io_get_lba(io, lbk_off), phase); 1210 memcpy(md_buf, &lba, sizeof(lba)); 1211 md_buf += spdk_bdev_get_md_size(bdev); 1212 } 1213 } 1214 1215 static void 1216 _ftl_write_nv_cache(void *ctx) 1217 { 1218 struct ftl_io *child, *io = ctx; 1219 struct spdk_ftl_dev *dev = io->dev; 1220 struct spdk_thread *thread; 1221 unsigned int phase; 1222 uint64_t num_lbks; 1223 1224 thread = spdk_io_channel_get_thread(io->ioch); 1225 1226 while (io->pos < io->lbk_cnt) { 1227 num_lbks = ftl_io_iovec_len_left(io); 1228 1229 child = ftl_alloc_io_nv_cache(io, num_lbks); 1230 if (spdk_unlikely(!child)) { 1231 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1232 return; 1233 } 1234 1235 child->md = spdk_mempool_get(dev->nv_cache.md_pool); 1236 if (spdk_unlikely(!child->md)) { 1237 ftl_io_free(child); 1238 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1239 break; 1240 } 1241 1242 /* Reserve area on the write buffer cache */ 1243 child->ppa.ppa = ftl_reserve_nv_cache(&dev->nv_cache, &num_lbks, &phase); 1244 if (child->ppa.ppa == FTL_LBA_INVALID) { 1245 spdk_mempool_put(dev->nv_cache.md_pool, child->md); 1246 ftl_io_free(child); 1247 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1248 break; 1249 } 1250 1251 /* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */ 1252 if (spdk_unlikely(num_lbks != ftl_io_iovec_len_left(io))) { 1253 ftl_io_shrink_iovec(child, num_lbks); 1254 } 1255 1256 ftl_nv_cache_fill_md(child, phase); 1257 ftl_submit_nv_cache(child); 1258 } 1259 1260 if (ftl_io_done(io)) { 1261 ftl_io_complete(io); 1262 } 1263 } 1264 1265 static void 1266 ftl_write_nv_cache(struct ftl_io *parent) 1267 { 1268 ftl_io_reset(parent); 1269 parent->flags |= FTL_IO_CACHE; 1270 _ftl_write_nv_cache(parent); 1271 } 1272 1273 int 1274 ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown, 1275 spdk_bdev_io_completion_cb cb_fn, void *cb_arg) 1276 { 1277 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1278 struct ftl_nv_cache_header *hdr = nv_cache->dma_buf; 1279 struct spdk_bdev *bdev; 1280 struct ftl_io_channel *ioch; 1281 1282 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1283 ioch = spdk_io_channel_get_ctx(ftl_get_io_channel(dev)); 1284 1285 memset(hdr, 0, spdk_bdev_get_block_size(bdev)); 1286 1287 hdr->phase = (uint8_t)nv_cache->phase; 1288 hdr->size = spdk_bdev_get_num_blocks(bdev); 1289 hdr->uuid = dev->uuid; 1290 hdr->version = FTL_NV_CACHE_HEADER_VERSION; 1291 hdr->current_addr = shutdown ? nv_cache->current_addr : FTL_LBA_INVALID; 1292 hdr->checksum = spdk_crc32c_update(hdr, offsetof(struct ftl_nv_cache_header, checksum), 0); 1293 1294 return spdk_bdev_write_blocks(nv_cache->bdev_desc, ioch->cache_ioch, hdr, 0, 1, 1295 cb_fn, cb_arg); 1296 } 1297 1298 int 1299 ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn, void *cb_arg) 1300 { 1301 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 1302 struct ftl_io_channel *ioch; 1303 struct spdk_bdev *bdev; 1304 1305 ioch = spdk_io_channel_get_ctx(ftl_get_io_channel(dev)); 1306 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1307 1308 return spdk_bdev_write_zeroes_blocks(nv_cache->bdev_desc, ioch->cache_ioch, 1, 1309 spdk_bdev_get_num_blocks(bdev) - 1, 1310 cb_fn, cb_arg); 1311 } 1312 1313 static void 1314 ftl_write_fail(struct ftl_io *io, int status) 1315 { 1316 struct ftl_rwb_batch *batch = io->rwb_batch; 1317 struct spdk_ftl_dev *dev = io->dev; 1318 struct ftl_rwb_entry *entry; 1319 struct ftl_band *band; 1320 char buf[128]; 1321 1322 entry = ftl_rwb_batch_first_entry(batch); 1323 1324 band = ftl_band_from_ppa(io->dev, entry->ppa); 1325 SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", 1326 ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); 1327 1328 /* Close the band and, halt wptr and defrag */ 1329 ftl_halt_writes(dev, band); 1330 1331 ftl_rwb_foreach(entry, batch) { 1332 /* Invalidate meta set by process_writes() */ 1333 ftl_invalidate_addr(dev, entry->ppa); 1334 } 1335 1336 /* Reset the batch back to the the RWB to resend it later */ 1337 ftl_rwb_batch_revert(batch); 1338 } 1339 1340 static void 1341 ftl_write_cb(struct ftl_io *io, void *arg, int status) 1342 { 1343 struct spdk_ftl_dev *dev = io->dev; 1344 struct ftl_rwb_batch *batch = io->rwb_batch; 1345 struct ftl_rwb_entry *entry; 1346 struct ftl_band *band; 1347 1348 if (status) { 1349 ftl_write_fail(io, status); 1350 return; 1351 } 1352 1353 assert(io->lbk_cnt == dev->xfer_size); 1354 ftl_rwb_foreach(entry, batch) { 1355 band = entry->band; 1356 if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { 1357 /* Verify that the LBA is set for user lbks */ 1358 assert(entry->lba != FTL_LBA_INVALID); 1359 } 1360 1361 if (band != NULL) { 1362 assert(band->num_reloc_blocks > 0); 1363 band->num_reloc_blocks--; 1364 } 1365 1366 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", 1367 entry->ppa.ppa, entry->lba); 1368 } 1369 1370 ftl_process_flush(dev, batch); 1371 ftl_rwb_batch_release(batch); 1372 } 1373 1374 static void 1375 ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) 1376 { 1377 if (!ftl_rwb_entry_internal(entry)) { 1378 dev->stats.write_user++; 1379 } 1380 dev->stats.write_total++; 1381 } 1382 1383 static void 1384 ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, 1385 struct ftl_ppa ppa) 1386 { 1387 struct ftl_ppa prev_ppa; 1388 struct ftl_rwb_entry *prev; 1389 struct ftl_band *band; 1390 int valid; 1391 1392 prev_ppa = ftl_l2p_get(dev, entry->lba); 1393 if (ftl_ppa_invalid(prev_ppa)) { 1394 ftl_l2p_set(dev, entry->lba, ppa); 1395 return; 1396 } 1397 1398 /* If the L2P's PPA is different than what we expected we don't need to */ 1399 /* do anything (someone's already overwritten our data). */ 1400 if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { 1401 return; 1402 } 1403 1404 if (ftl_ppa_cached(prev_ppa)) { 1405 assert(!ftl_rwb_entry_weak(entry)); 1406 prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); 1407 pthread_spin_lock(&prev->lock); 1408 1409 /* Re-read the L2P under the lock to protect against updates */ 1410 /* to this LBA from other threads */ 1411 prev_ppa = ftl_l2p_get(dev, entry->lba); 1412 1413 /* If the entry is no longer in cache, another write has been */ 1414 /* scheduled in the meantime, so we have to invalidate its LBA */ 1415 if (!ftl_ppa_cached(prev_ppa)) { 1416 ftl_invalidate_addr(dev, prev_ppa); 1417 } 1418 1419 /* If previous entry is part of cache, remove and invalidate it */ 1420 if (ftl_rwb_entry_valid(prev)) { 1421 ftl_invalidate_addr(dev, prev->ppa); 1422 ftl_rwb_entry_invalidate(prev); 1423 } 1424 1425 ftl_l2p_set(dev, entry->lba, ppa); 1426 pthread_spin_unlock(&prev->lock); 1427 return; 1428 } 1429 1430 /* Lock the band containing previous PPA. This assures atomic changes to */ 1431 /* the L2P as wall as metadata. The valid bits in metadata are used to */ 1432 /* check weak writes validity. */ 1433 band = ftl_band_from_ppa(dev, prev_ppa); 1434 pthread_spin_lock(&band->lba_map.lock); 1435 1436 valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); 1437 1438 /* If the address has been invalidated already, we don't want to update */ 1439 /* the L2P for weak writes, as it means the write is no longer valid. */ 1440 if (!ftl_rwb_entry_weak(entry) || valid) { 1441 ftl_l2p_set(dev, entry->lba, ppa); 1442 } 1443 1444 pthread_spin_unlock(&band->lba_map.lock); 1445 } 1446 1447 static struct ftl_io * 1448 ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa, 1449 void *data, void *md, ftl_io_fn cb) 1450 { 1451 struct ftl_io *io; 1452 struct spdk_ftl_dev *dev = parent->dev; 1453 struct ftl_io_init_opts opts = { 1454 .dev = dev, 1455 .io = NULL, 1456 .parent = parent, 1457 .rwb_batch = NULL, 1458 .band = parent->band, 1459 .size = sizeof(struct ftl_io), 1460 .flags = 0, 1461 .type = FTL_IO_WRITE, 1462 .lbk_cnt = dev->xfer_size, 1463 .cb_fn = cb, 1464 .data = data, 1465 .md = md, 1466 }; 1467 1468 io = ftl_io_init_internal(&opts); 1469 if (!io) { 1470 return NULL; 1471 } 1472 1473 io->ppa = ppa; 1474 1475 return io; 1476 } 1477 1478 static void 1479 ftl_io_child_write_cb(struct ftl_io *io, void *ctx, int status) 1480 { 1481 struct ftl_chunk *chunk; 1482 struct ftl_wptr *wptr; 1483 1484 chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); 1485 wptr = ftl_wptr_from_band(io->band); 1486 1487 chunk->busy = false; 1488 chunk->write_offset += io->lbk_cnt; 1489 1490 /* If some other write on the same band failed the write pointer would already be freed */ 1491 if (spdk_likely(wptr)) { 1492 wptr->num_outstanding--; 1493 } 1494 } 1495 1496 static int 1497 ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt) 1498 { 1499 struct spdk_ftl_dev *dev = io->dev; 1500 struct ftl_io *child; 1501 int rc; 1502 struct ftl_ppa ppa; 1503 1504 if (spdk_likely(!wptr->direct_mode)) { 1505 ppa = wptr->ppa; 1506 } else { 1507 assert(io->flags & FTL_IO_DIRECT_ACCESS); 1508 assert(io->ppa.chk == wptr->band->id); 1509 ppa = io->ppa; 1510 } 1511 1512 /* Split IO to child requests and release chunk immediately after child is completed */ 1513 child = ftl_io_init_child_write(io, ppa, ftl_io_iovec_addr(io), 1514 ftl_io_get_md(io), ftl_io_child_write_cb); 1515 if (!child) { 1516 return -EAGAIN; 1517 } 1518 1519 wptr->num_outstanding++; 1520 rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), 1521 ftl_io_iovec_addr(child), child->md, 1522 ftl_ppa_addr_pack(dev, ppa), 1523 lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0); 1524 if (rc) { 1525 wptr->num_outstanding--; 1526 ftl_io_fail(child, rc); 1527 ftl_io_complete(child); 1528 SPDK_ERRLOG("spdk_nvme_ns_cmd_write_with_md failed with status:%d, ppa:%lu\n", 1529 rc, ppa.ppa); 1530 return -EIO; 1531 } 1532 1533 ftl_io_inc_req(child); 1534 ftl_io_advance(child, lbk_cnt); 1535 1536 return 0; 1537 } 1538 1539 static int 1540 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) 1541 { 1542 struct spdk_ftl_dev *dev = io->dev; 1543 int rc = 0; 1544 1545 assert(io->lbk_cnt % dev->xfer_size == 0); 1546 1547 while (io->iov_pos < io->iov_cnt) { 1548 /* There are no guarantees of the order of completion of NVMe IO submission queue */ 1549 /* so wait until chunk is not busy before submitting another write */ 1550 if (wptr->chunk->busy) { 1551 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, retry_entry); 1552 rc = -EAGAIN; 1553 break; 1554 } 1555 1556 rc = ftl_submit_child_write(wptr, io, dev->xfer_size); 1557 if (spdk_unlikely(rc)) { 1558 if (rc == -EAGAIN) { 1559 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, retry_entry); 1560 } else { 1561 ftl_io_fail(io, rc); 1562 } 1563 break; 1564 } 1565 1566 ftl_trace_submission(dev, io, wptr->ppa, dev->xfer_size); 1567 ftl_wptr_advance(wptr, dev->xfer_size); 1568 } 1569 1570 if (ftl_io_done(io)) { 1571 /* Parent IO will complete after all children are completed */ 1572 ftl_io_complete(io); 1573 } 1574 1575 return rc; 1576 } 1577 1578 static void 1579 ftl_flush_pad_batch(struct spdk_ftl_dev *dev) 1580 { 1581 struct ftl_rwb *rwb = dev->rwb; 1582 size_t size, num_entries; 1583 1584 size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + 1585 ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); 1586 1587 /* There must be something in the RWB, otherwise the flush */ 1588 /* wouldn't be waiting for anything */ 1589 assert(size > 0); 1590 1591 /* Only add padding when there's less than xfer size */ 1592 /* entries in the buffer. Otherwise we just have to wait */ 1593 /* for the entries to become ready. */ 1594 num_entries = ftl_rwb_get_active_batches(dev->rwb) * dev->xfer_size; 1595 if (size < num_entries) { 1596 ftl_rwb_pad(dev, num_entries - (size % num_entries)); 1597 } 1598 } 1599 1600 static int 1601 ftl_wptr_process_writes(struct ftl_wptr *wptr) 1602 { 1603 struct spdk_ftl_dev *dev = wptr->dev; 1604 struct ftl_rwb_batch *batch; 1605 struct ftl_rwb_entry *entry; 1606 struct ftl_io *io; 1607 struct ftl_ppa ppa, prev_ppa; 1608 1609 if (spdk_unlikely(!TAILQ_EMPTY(&wptr->pending_queue))) { 1610 io = TAILQ_FIRST(&wptr->pending_queue); 1611 TAILQ_REMOVE(&wptr->pending_queue, io, retry_entry); 1612 1613 if (ftl_submit_write(wptr, io) == -EAGAIN) { 1614 return 0; 1615 } 1616 } 1617 1618 /* Make sure the band is prepared for writing */ 1619 if (!ftl_wptr_ready(wptr)) { 1620 return 0; 1621 } 1622 1623 if (dev->halt) { 1624 ftl_wptr_process_shutdown(wptr); 1625 } 1626 1627 if (spdk_unlikely(wptr->flush)) { 1628 ftl_wptr_pad_band(wptr); 1629 } 1630 1631 batch = ftl_rwb_pop(dev->rwb); 1632 if (!batch) { 1633 /* If there are queued flush requests we need to pad the RWB to */ 1634 /* force out remaining entries */ 1635 if (!LIST_EMPTY(&dev->flush_list)) { 1636 ftl_flush_pad_batch(dev); 1637 } 1638 1639 return 0; 1640 } 1641 1642 io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); 1643 if (!io) { 1644 goto error; 1645 } 1646 1647 ppa = wptr->ppa; 1648 ftl_rwb_foreach(entry, batch) { 1649 /* Update band's relocation stats if the IO comes from reloc */ 1650 if (entry->flags & FTL_IO_WEAK) { 1651 if (!spdk_bit_array_get(wptr->band->reloc_bitmap, entry->band->id)) { 1652 spdk_bit_array_set(wptr->band->reloc_bitmap, entry->band->id); 1653 entry->band->num_reloc_bands++; 1654 } 1655 } 1656 1657 entry->ppa = ppa; 1658 if (entry->lba != FTL_LBA_INVALID) { 1659 pthread_spin_lock(&entry->lock); 1660 prev_ppa = ftl_l2p_get(dev, entry->lba); 1661 1662 /* If the l2p was updated in the meantime, don't update band's metadata */ 1663 if (ftl_ppa_cached(prev_ppa) && prev_ppa.offset == entry->pos) { 1664 /* Setting entry's cache bit needs to be done after metadata */ 1665 /* within the band is updated to make sure that writes */ 1666 /* invalidating the entry clear the metadata as well */ 1667 ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); 1668 ftl_rwb_entry_set_valid(entry); 1669 } 1670 pthread_spin_unlock(&entry->lock); 1671 } 1672 1673 ftl_trace_rwb_pop(dev, entry); 1674 ftl_update_rwb_stats(dev, entry); 1675 1676 ppa = ftl_band_next_ppa(wptr->band, ppa, 1); 1677 } 1678 1679 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, 1680 ftl_ppa_addr_pack(dev, wptr->ppa)); 1681 1682 if (ftl_submit_write(wptr, io)) { 1683 /* TODO: we need some recovery here */ 1684 assert(0 && "Write submit failed"); 1685 if (ftl_io_done(io)) { 1686 ftl_io_free(io); 1687 } 1688 } 1689 1690 return dev->xfer_size; 1691 error: 1692 ftl_rwb_batch_revert(batch); 1693 return 0; 1694 } 1695 1696 static int 1697 ftl_process_writes(struct spdk_ftl_dev *dev) 1698 { 1699 struct ftl_wptr *wptr, *twptr; 1700 size_t num_active = 0; 1701 enum ftl_band_state state; 1702 1703 LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { 1704 ftl_wptr_process_writes(wptr); 1705 state = wptr->band->state; 1706 1707 if (state != FTL_BAND_STATE_FULL && 1708 state != FTL_BAND_STATE_CLOSING && 1709 state != FTL_BAND_STATE_CLOSED) { 1710 num_active++; 1711 } 1712 } 1713 1714 if (num_active < 1) { 1715 ftl_add_wptr(dev); 1716 } 1717 1718 return 0; 1719 } 1720 1721 static void 1722 ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) 1723 { 1724 memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); 1725 1726 if (ftl_rwb_entry_weak(entry)) { 1727 entry->band = ftl_band_from_ppa(io->dev, io->ppa); 1728 entry->ppa = ftl_band_next_ppa(entry->band, io->ppa, io->pos); 1729 entry->band->num_reloc_blocks++; 1730 } 1731 1732 entry->trace = io->trace; 1733 entry->lba = ftl_io_current_lba(io); 1734 1735 if (entry->md) { 1736 memcpy(entry->md, &entry->lba, sizeof(entry->lba)); 1737 } 1738 } 1739 1740 static int 1741 ftl_rwb_fill(struct ftl_io *io) 1742 { 1743 struct spdk_ftl_dev *dev = io->dev; 1744 struct ftl_rwb_entry *entry; 1745 struct ftl_ppa ppa = { .cached = 1 }; 1746 int flags = ftl_rwb_flags_from_io(io); 1747 1748 while (io->pos < io->lbk_cnt) { 1749 if (ftl_io_current_lba(io) == FTL_LBA_INVALID) { 1750 ftl_io_advance(io, 1); 1751 continue; 1752 } 1753 1754 entry = ftl_acquire_entry(dev, flags); 1755 if (!entry) { 1756 return -EAGAIN; 1757 } 1758 1759 ftl_rwb_entry_fill(entry, io); 1760 1761 ppa.offset = entry->pos; 1762 1763 ftl_trace_rwb_fill(dev, io); 1764 ftl_update_l2p(dev, entry, ppa); 1765 ftl_io_advance(io, 1); 1766 1767 /* Needs to be done after L2P is updated to avoid race with */ 1768 /* write completion callback when it's processed faster than */ 1769 /* L2P is set in update_l2p(). */ 1770 ftl_rwb_push(entry); 1771 } 1772 1773 if (ftl_io_done(io)) { 1774 if (ftl_dev_has_nv_cache(dev) && !(io->flags & FTL_IO_BYPASS_CACHE)) { 1775 ftl_write_nv_cache(io); 1776 } else { 1777 ftl_io_complete(io); 1778 } 1779 } 1780 1781 return 0; 1782 } 1783 1784 static bool 1785 ftl_dev_needs_defrag(struct spdk_ftl_dev *dev) 1786 { 1787 const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 1788 1789 if (ftl_reloc_is_halted(dev->reloc)) { 1790 return false; 1791 } 1792 1793 if (dev->df_band) { 1794 return false; 1795 } 1796 1797 if (dev->num_free <= limit->thld) { 1798 return true; 1799 } 1800 1801 return false; 1802 } 1803 1804 static double 1805 ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid) 1806 { 1807 size_t usable, valid, invalid; 1808 double vld_ratio; 1809 1810 /* If the band doesn't have any usable lbks it's of no use */ 1811 usable = ftl_band_num_usable_lbks(band); 1812 if (usable == 0) { 1813 return 0.0; 1814 } 1815 1816 valid = threshold_valid ? (usable - *threshold_valid) : band->lba_map.num_vld; 1817 invalid = usable - valid; 1818 1819 /* Add one to avoid division by 0 */ 1820 vld_ratio = (double)invalid / (double)(valid + 1); 1821 return vld_ratio * ftl_band_age(band); 1822 } 1823 1824 static bool 1825 ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev) 1826 { 1827 struct spdk_ftl_conf *conf = &dev->conf; 1828 size_t thld_vld; 1829 1830 /* If we're in dire need of free bands, every band is worth defragging */ 1831 if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) { 1832 return true; 1833 } 1834 1835 thld_vld = (ftl_band_num_usable_lbks(band) * conf->invalid_thld) / 100; 1836 1837 return band->merit > ftl_band_calc_merit(band, &thld_vld); 1838 } 1839 1840 static struct ftl_band * 1841 ftl_select_defrag_band(struct spdk_ftl_dev *dev) 1842 { 1843 struct ftl_band *band, *mband = NULL; 1844 double merit = 0; 1845 1846 LIST_FOREACH(band, &dev->shut_bands, list_entry) { 1847 assert(band->state == FTL_BAND_STATE_CLOSED); 1848 band->merit = ftl_band_calc_merit(band, NULL); 1849 if (band->merit > merit) { 1850 merit = band->merit; 1851 mband = band; 1852 } 1853 } 1854 1855 if (mband && !ftl_band_needs_defrag(mband, dev)) { 1856 mband = NULL; 1857 } 1858 1859 return mband; 1860 } 1861 1862 static void 1863 ftl_process_relocs(struct spdk_ftl_dev *dev) 1864 { 1865 struct ftl_band *band; 1866 1867 if (ftl_dev_needs_defrag(dev)) { 1868 band = dev->df_band = ftl_select_defrag_band(dev); 1869 1870 if (band) { 1871 ftl_reloc_add(dev->reloc, band, 0, ftl_num_band_lbks(dev), 0); 1872 ftl_trace_defrag_band(dev, band); 1873 } 1874 } 1875 1876 ftl_reloc(dev->reloc); 1877 } 1878 1879 int 1880 ftl_current_limit(const struct spdk_ftl_dev *dev) 1881 { 1882 return dev->limit; 1883 } 1884 1885 void 1886 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 1887 { 1888 attrs->uuid = dev->uuid; 1889 attrs->lbk_cnt = dev->num_lbas; 1890 attrs->lbk_size = FTL_BLOCK_SIZE; 1891 attrs->range = dev->range; 1892 attrs->cache_bdev_desc = dev->nv_cache.bdev_desc; 1893 attrs->num_chunks = dev->geo.num_chk; 1894 attrs->chunk_size = dev->geo.clba; 1895 attrs->conf = dev->conf; 1896 } 1897 1898 static void 1899 _ftl_io_write(void *ctx) 1900 { 1901 ftl_io_write((struct ftl_io *)ctx); 1902 } 1903 1904 static int 1905 ftl_rwb_fill_leaf(struct ftl_io *io) 1906 { 1907 int rc; 1908 1909 rc = ftl_rwb_fill(io); 1910 if (rc == -EAGAIN) { 1911 spdk_thread_send_msg(spdk_io_channel_get_thread(io->ioch), 1912 _ftl_io_write, io); 1913 return 0; 1914 } 1915 1916 return rc; 1917 } 1918 1919 static int 1920 ftl_submit_write_leaf(struct ftl_io *io) 1921 { 1922 int rc; 1923 1924 rc = ftl_submit_write(ftl_wptr_from_band(io->band), io); 1925 if (rc == -EAGAIN) { 1926 /* EAGAIN means that the request was put on the pending queue */ 1927 return 0; 1928 } 1929 1930 return rc; 1931 } 1932 1933 void 1934 ftl_io_write(struct ftl_io *io) 1935 { 1936 struct spdk_ftl_dev *dev = io->dev; 1937 1938 /* For normal IOs we just need to copy the data onto the rwb */ 1939 if (!(io->flags & FTL_IO_MD)) { 1940 ftl_io_call_foreach_child(io, ftl_rwb_fill_leaf); 1941 } else { 1942 /* Metadata has its own buffer, so it doesn't have to be copied, so just */ 1943 /* send it the the core thread and schedule the write immediately */ 1944 if (ftl_check_core_thread(dev)) { 1945 ftl_io_call_foreach_child(io, ftl_submit_write_leaf); 1946 } else { 1947 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); 1948 } 1949 } 1950 } 1951 1952 int 1953 spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1954 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1955 { 1956 struct ftl_io *io; 1957 1958 if (iov_cnt == 0) { 1959 return -EINVAL; 1960 } 1961 1962 if (lba_cnt == 0) { 1963 return -EINVAL; 1964 } 1965 1966 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1967 return -EINVAL; 1968 } 1969 1970 if (!dev->initialized) { 1971 return -EBUSY; 1972 } 1973 1974 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 1975 if (!io) { 1976 return -ENOMEM; 1977 } 1978 1979 ftl_io_write(io); 1980 1981 return 0; 1982 } 1983 1984 static int 1985 ftl_io_read_leaf(struct ftl_io *io) 1986 { 1987 int rc; 1988 1989 rc = ftl_submit_read(io); 1990 if (rc == -ENOMEM) { 1991 /* ENOMEM means that the request was put on a pending queue */ 1992 return 0; 1993 } 1994 1995 return rc; 1996 } 1997 1998 static void 1999 _ftl_io_read(void *arg) 2000 { 2001 ftl_io_read((struct ftl_io *)arg); 2002 } 2003 2004 void 2005 ftl_io_read(struct ftl_io *io) 2006 { 2007 struct spdk_ftl_dev *dev = io->dev; 2008 2009 if (ftl_check_read_thread(dev)) { 2010 ftl_io_call_foreach_child(io, ftl_io_read_leaf); 2011 } else { 2012 spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_io_read, io); 2013 } 2014 } 2015 2016 int 2017 spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 2018 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 2019 { 2020 struct ftl_io *io; 2021 2022 if (iov_cnt == 0) { 2023 return -EINVAL; 2024 } 2025 2026 if (lba_cnt == 0) { 2027 return -EINVAL; 2028 } 2029 2030 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 2031 return -EINVAL; 2032 } 2033 2034 if (!dev->initialized) { 2035 return -EBUSY; 2036 } 2037 2038 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 2039 if (!io) { 2040 return -ENOMEM; 2041 } 2042 2043 ftl_io_read(io); 2044 return 0; 2045 } 2046 2047 static struct ftl_flush * 2048 ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 2049 { 2050 struct ftl_flush *flush; 2051 struct ftl_rwb *rwb = dev->rwb; 2052 2053 flush = calloc(1, sizeof(*flush)); 2054 if (!flush) { 2055 return NULL; 2056 } 2057 2058 flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); 2059 if (!flush->bmap) { 2060 goto error; 2061 } 2062 2063 flush->dev = dev; 2064 flush->cb.fn = cb_fn; 2065 flush->cb.ctx = cb_arg; 2066 2067 return flush; 2068 error: 2069 free(flush); 2070 return NULL; 2071 } 2072 2073 static void 2074 _ftl_flush(void *ctx) 2075 { 2076 struct ftl_flush *flush = ctx; 2077 struct spdk_ftl_dev *dev = flush->dev; 2078 struct ftl_rwb *rwb = dev->rwb; 2079 struct ftl_rwb_batch *batch; 2080 2081 /* Attach flush object to all non-empty batches */ 2082 ftl_rwb_foreach_batch(batch, rwb) { 2083 if (!ftl_rwb_batch_empty(batch)) { 2084 spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); 2085 flush->num_req++; 2086 } 2087 } 2088 2089 LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); 2090 2091 /* If the RWB was already empty, the flush can be completed right away */ 2092 if (!flush->num_req) { 2093 ftl_complete_flush(flush); 2094 } 2095 } 2096 2097 int 2098 spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 2099 { 2100 struct ftl_flush *flush; 2101 2102 if (!dev->initialized) { 2103 return -EBUSY; 2104 } 2105 2106 flush = ftl_flush_init(dev, cb_fn, cb_arg); 2107 if (!flush) { 2108 return -ENOMEM; 2109 } 2110 2111 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); 2112 return 0; 2113 } 2114 2115 static void 2116 _ftl_process_anm_event(void *ctx) 2117 { 2118 ftl_process_anm_event((struct ftl_anm_event *)ctx); 2119 } 2120 2121 void 2122 ftl_process_anm_event(struct ftl_anm_event *event) 2123 { 2124 struct spdk_ftl_dev *dev = event->dev; 2125 struct ftl_band *band; 2126 size_t lbkoff; 2127 2128 if (!ftl_check_core_thread(dev)) { 2129 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_process_anm_event, event); 2130 return; 2131 } 2132 2133 band = ftl_band_from_ppa(dev, event->ppa); 2134 lbkoff = ftl_band_lbkoff_from_ppa(band, event->ppa); 2135 2136 ftl_reloc_add(dev->reloc, band, lbkoff, event->num_lbks, 0); 2137 ftl_anm_event_complete(event); 2138 } 2139 2140 bool 2141 ftl_ppa_is_written(struct ftl_band *band, struct ftl_ppa ppa) 2142 { 2143 struct ftl_chunk *chunk = ftl_band_chunk_from_ppa(band, ppa); 2144 2145 return ppa.lbk < chunk->write_offset; 2146 } 2147 2148 static void 2149 ftl_process_retry_queue(struct spdk_ftl_dev *dev) 2150 { 2151 struct ftl_io *io; 2152 int rc; 2153 2154 while (!TAILQ_EMPTY(&dev->retry_queue)) { 2155 io = TAILQ_FIRST(&dev->retry_queue); 2156 2157 /* Retry only if IO is still healthy */ 2158 if (spdk_likely(io->status == 0)) { 2159 rc = ftl_submit_read(io); 2160 if (rc == -ENOMEM) { 2161 break; 2162 } 2163 } 2164 2165 io->flags &= ~FTL_IO_RETRY; 2166 TAILQ_REMOVE(&dev->retry_queue, io, retry_entry); 2167 2168 if (ftl_io_done(io)) { 2169 ftl_io_complete(io); 2170 } 2171 } 2172 } 2173 2174 int 2175 ftl_task_read(void *ctx) 2176 { 2177 struct ftl_thread *thread = ctx; 2178 struct spdk_ftl_dev *dev = thread->dev; 2179 struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); 2180 size_t num_completed; 2181 2182 if (dev->halt) { 2183 if (ftl_shutdown_complete(dev)) { 2184 spdk_poller_unregister(&thread->poller); 2185 return 0; 2186 } 2187 } 2188 2189 num_completed = spdk_nvme_qpair_process_completions(qpair, 0); 2190 2191 if (num_completed && !TAILQ_EMPTY(&dev->retry_queue)) { 2192 ftl_process_retry_queue(dev); 2193 } 2194 2195 return num_completed; 2196 } 2197 2198 int 2199 ftl_task_core(void *ctx) 2200 { 2201 struct ftl_thread *thread = ctx; 2202 struct spdk_ftl_dev *dev = thread->dev; 2203 struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); 2204 2205 if (dev->halt) { 2206 if (ftl_shutdown_complete(dev)) { 2207 spdk_poller_unregister(&thread->poller); 2208 return 0; 2209 } 2210 } 2211 2212 ftl_process_writes(dev); 2213 spdk_nvme_qpair_process_completions(qpair, 0); 2214 ftl_process_relocs(dev); 2215 2216 return 0; 2217 } 2218 2219 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) 2220