1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/likely.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/nvme.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk/string.h" 40 #include "spdk_internal/log.h" 41 #include "spdk/ftl.h" 42 43 #include "ftl_core.h" 44 #include "ftl_band.h" 45 #include "ftl_io.h" 46 #include "ftl_anm.h" 47 #include "ftl_rwb.h" 48 #include "ftl_debug.h" 49 #include "ftl_reloc.h" 50 51 struct ftl_wptr { 52 /* Owner device */ 53 struct spdk_ftl_dev *dev; 54 55 /* Current PPA */ 56 struct ftl_ppa ppa; 57 58 /* Band currently being written to */ 59 struct ftl_band *band; 60 61 /* Current logical block's offset */ 62 uint64_t offset; 63 64 /* Current erase block */ 65 struct ftl_chunk *chunk; 66 67 /* Pending IO queue */ 68 TAILQ_HEAD(, ftl_io) pending_queue; 69 70 /* List link */ 71 LIST_ENTRY(ftl_wptr) list_entry; 72 73 /* 74 * If setup in direct mode, there will be no offset or band state update after IO. 75 * The PPA is not assigned by wptr, and is instead taken directly from the request. 76 */ 77 bool direct_mode; 78 }; 79 80 struct ftl_flush { 81 /* Owner device */ 82 struct spdk_ftl_dev *dev; 83 84 /* Number of batches to wait for */ 85 size_t num_req; 86 87 /* Callback */ 88 struct { 89 spdk_ftl_fn fn; 90 void *ctx; 91 } cb; 92 93 /* Batch bitmap */ 94 struct spdk_bit_array *bmap; 95 96 /* List link */ 97 LIST_ENTRY(ftl_flush) list_entry; 98 }; 99 100 static int 101 ftl_rwb_flags_from_io(const struct ftl_io *io) 102 { 103 int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; 104 return io->flags & valid_flags; 105 } 106 107 static int 108 ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) 109 { 110 return entry->flags & FTL_IO_WEAK; 111 } 112 113 static void 114 ftl_wptr_free(struct ftl_wptr *wptr) 115 { 116 if (!wptr) { 117 return; 118 } 119 120 free(wptr); 121 } 122 123 static void 124 ftl_remove_wptr(struct ftl_wptr *wptr) 125 { 126 LIST_REMOVE(wptr, list_entry); 127 ftl_wptr_free(wptr); 128 } 129 130 static void 131 ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) 132 { 133 struct ftl_io *io = arg; 134 135 if (spdk_nvme_cpl_is_error(status)) { 136 ftl_io_process_error(io, status); 137 } 138 139 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_DISK); 140 141 ftl_io_dec_req(io); 142 143 if (ftl_io_done(io)) { 144 ftl_io_complete(io); 145 } 146 } 147 148 static void 149 ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) 150 { 151 struct ftl_wptr *wptr = NULL; 152 153 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 154 if (wptr->band == band) { 155 break; 156 } 157 } 158 159 /* If the band already has the high_prio flag set, other writes must */ 160 /* have failed earlier, so it's already taken care of. */ 161 if (band->high_prio) { 162 assert(wptr == NULL); 163 return; 164 } 165 166 ftl_band_write_failed(band); 167 ftl_remove_wptr(wptr); 168 } 169 170 static struct ftl_wptr * 171 ftl_wptr_from_band(struct ftl_band *band) 172 { 173 struct spdk_ftl_dev *dev = band->dev; 174 struct ftl_wptr *wptr = NULL; 175 176 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 177 if (wptr->band == band) { 178 return wptr; 179 } 180 } 181 182 return NULL; 183 } 184 185 static void 186 ftl_md_write_fail(struct ftl_io *io, int status) 187 { 188 struct ftl_band *band = io->band; 189 struct ftl_wptr *wptr; 190 char buf[128]; 191 192 wptr = ftl_wptr_from_band(band); 193 194 SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", 195 ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); 196 197 ftl_halt_writes(io->dev, band); 198 } 199 200 static void 201 ftl_md_write_cb(struct ftl_io *io, void *arg, int status) 202 { 203 struct spdk_ftl_dev *dev = io->dev; 204 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 205 struct ftl_wptr *wptr; 206 struct spdk_bdev *bdev; 207 208 wptr = ftl_wptr_from_band(io->band); 209 210 if (status) { 211 ftl_md_write_fail(io, status); 212 return; 213 } 214 215 ftl_band_set_next_state(io->band); 216 if (io->band->state == FTL_BAND_STATE_CLOSED) { 217 if (nv_cache->bdev_desc) { 218 bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 219 220 pthread_spin_lock(&nv_cache->lock); 221 nv_cache->num_available += ftl_band_user_lbks(io->band); 222 223 if (spdk_unlikely(nv_cache->num_available > spdk_bdev_get_num_blocks(bdev))) { 224 nv_cache->num_available = spdk_bdev_get_num_blocks(bdev); 225 } 226 pthread_spin_unlock(&nv_cache->lock); 227 } 228 229 ftl_remove_wptr(wptr); 230 } 231 } 232 233 static int 234 ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 235 { 236 struct spdk_ftl_dev *dev = io->dev; 237 size_t lbk_cnt, max_lbks; 238 239 assert(ftl_io_mode_ppa(io)); 240 assert(io->iov_pos < io->iov_cnt); 241 242 if (io->pos == 0) { 243 *ppa = io->ppa; 244 } else { 245 *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, io->pos); 246 } 247 248 assert(!ftl_ppa_invalid(*ppa)); 249 250 /* Metadata has to be read in the way it's written (jumping across */ 251 /* the chunks in xfer_size increments) */ 252 if (io->flags & FTL_IO_MD) { 253 max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); 254 lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); 255 assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); 256 } else { 257 lbk_cnt = ftl_io_iovec_len_left(io); 258 } 259 260 return lbk_cnt; 261 } 262 263 static int 264 ftl_wptr_close_band(struct ftl_wptr *wptr) 265 { 266 struct ftl_band *band = wptr->band; 267 268 ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); 269 band->tail_md_ppa = wptr->ppa; 270 271 return ftl_band_write_tail_md(band, ftl_md_write_cb); 272 } 273 274 static int 275 ftl_wptr_open_band(struct ftl_wptr *wptr) 276 { 277 struct ftl_band *band = wptr->band; 278 279 assert(ftl_band_chunk_is_first(band, wptr->chunk)); 280 assert(band->lba_map.num_vld == 0); 281 282 ftl_band_clear_lba_map(band); 283 284 assert(band->state == FTL_BAND_STATE_PREP); 285 ftl_band_set_state(band, FTL_BAND_STATE_OPENING); 286 287 return ftl_band_write_head_md(band, ftl_md_write_cb); 288 } 289 290 static int 291 ftl_submit_erase(struct ftl_io *io) 292 { 293 struct spdk_ftl_dev *dev = io->dev; 294 struct ftl_band *band = io->band; 295 struct ftl_ppa ppa = io->ppa; 296 struct ftl_chunk *chunk; 297 uint64_t ppa_packed; 298 int rc = 0; 299 size_t i; 300 301 for (i = 0; i < io->lbk_cnt; ++i) { 302 if (i != 0) { 303 chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); 304 assert(chunk->state == FTL_CHUNK_STATE_CLOSED || 305 chunk->state == FTL_CHUNK_STATE_VACANT); 306 ppa = chunk->start_ppa; 307 } 308 309 assert(ppa.lbk == 0); 310 ppa_packed = ftl_ppa_addr_pack(dev, ppa); 311 312 ftl_trace_submission(dev, io, ppa, 1); 313 rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), 314 &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); 315 if (spdk_unlikely(rc)) { 316 ftl_io_fail(io, rc); 317 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); 318 break; 319 } 320 321 ftl_io_inc_req(io); 322 ftl_io_advance(io, 1); 323 } 324 325 if (ftl_io_done(io)) { 326 ftl_io_complete(io); 327 } 328 329 return rc; 330 } 331 332 static void 333 _ftl_io_erase(void *ctx) 334 { 335 ftl_io_erase((struct ftl_io *)ctx); 336 } 337 338 static bool 339 ftl_check_core_thread(const struct spdk_ftl_dev *dev) 340 { 341 return dev->core_thread.thread == spdk_get_thread(); 342 } 343 344 static bool 345 ftl_check_read_thread(const struct spdk_ftl_dev *dev) 346 { 347 return dev->read_thread.thread == spdk_get_thread(); 348 } 349 350 int 351 ftl_io_erase(struct ftl_io *io) 352 { 353 struct spdk_ftl_dev *dev = io->dev; 354 355 if (ftl_check_core_thread(dev)) { 356 return ftl_submit_erase(io); 357 } 358 359 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); 360 return 0; 361 } 362 363 static struct ftl_band * 364 ftl_next_write_band(struct spdk_ftl_dev *dev) 365 { 366 struct ftl_band *band; 367 368 band = LIST_FIRST(&dev->free_bands); 369 if (!band) { 370 return NULL; 371 } 372 assert(band->state == FTL_BAND_STATE_FREE); 373 374 if (ftl_band_erase(band)) { 375 /* TODO: handle erase failure */ 376 return NULL; 377 } 378 379 return band; 380 } 381 382 static struct ftl_band * 383 ftl_next_wptr_band(struct spdk_ftl_dev *dev) 384 { 385 struct ftl_band *band; 386 387 if (!dev->next_band) { 388 band = ftl_next_write_band(dev); 389 } else { 390 assert(dev->next_band->state == FTL_BAND_STATE_PREP); 391 band = dev->next_band; 392 dev->next_band = NULL; 393 } 394 395 return band; 396 } 397 398 static struct ftl_wptr * 399 ftl_wptr_init(struct ftl_band *band) 400 { 401 struct spdk_ftl_dev *dev = band->dev; 402 struct ftl_wptr *wptr; 403 404 wptr = calloc(1, sizeof(*wptr)); 405 if (!wptr) { 406 return NULL; 407 } 408 409 wptr->dev = dev; 410 wptr->band = band; 411 wptr->chunk = CIRCLEQ_FIRST(&band->chunks); 412 wptr->ppa = wptr->chunk->start_ppa; 413 TAILQ_INIT(&wptr->pending_queue); 414 415 return wptr; 416 } 417 418 static int 419 ftl_add_direct_wptr(struct ftl_band *band) 420 { 421 struct spdk_ftl_dev *dev = band->dev; 422 struct ftl_wptr *wptr; 423 424 assert(band->state == FTL_BAND_STATE_OPEN); 425 426 wptr = ftl_wptr_init(band); 427 if (!wptr) { 428 return -1; 429 } 430 431 wptr->direct_mode = true; 432 433 if (ftl_band_alloc_lba_map(band)) { 434 ftl_wptr_free(wptr); 435 return -1; 436 } 437 438 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 439 440 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: direct band %u\n", band->id); 441 ftl_trace_write_band(dev, band); 442 return 0; 443 } 444 445 static void 446 ftl_close_direct_wptr(struct ftl_band *band) 447 { 448 struct ftl_wptr *wptr = ftl_wptr_from_band(band); 449 450 assert(wptr->direct_mode); 451 assert(band->state == FTL_BAND_STATE_CLOSED); 452 453 ftl_band_release_lba_map(band); 454 455 ftl_remove_wptr(wptr); 456 } 457 458 int 459 ftl_band_set_direct_access(struct ftl_band *band, bool access) 460 { 461 if (access) { 462 return ftl_add_direct_wptr(band); 463 } else { 464 ftl_close_direct_wptr(band); 465 return 0; 466 } 467 } 468 469 static int 470 ftl_add_wptr(struct spdk_ftl_dev *dev) 471 { 472 struct ftl_band *band; 473 struct ftl_wptr *wptr; 474 475 band = ftl_next_wptr_band(dev); 476 if (!band) { 477 return -1; 478 } 479 480 wptr = ftl_wptr_init(band); 481 if (!wptr) { 482 return -1; 483 } 484 485 if (ftl_band_write_prep(band)) { 486 ftl_wptr_free(wptr); 487 return -1; 488 } 489 490 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 491 492 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); 493 ftl_trace_write_band(dev, band); 494 return 0; 495 } 496 497 static void 498 ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) 499 { 500 struct ftl_band *band = wptr->band; 501 struct spdk_ftl_dev *dev = wptr->dev; 502 struct spdk_ftl_conf *conf = &dev->conf; 503 size_t next_thld; 504 505 if (spdk_unlikely(wptr->direct_mode)) { 506 return; 507 } 508 509 wptr->offset += xfer_size; 510 next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; 511 512 if (ftl_band_full(band, wptr->offset)) { 513 ftl_band_set_state(band, FTL_BAND_STATE_FULL); 514 } 515 516 wptr->chunk->busy = true; 517 wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); 518 wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); 519 520 assert(!ftl_ppa_invalid(wptr->ppa)); 521 522 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", 523 wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); 524 525 if (wptr->offset >= next_thld && !dev->next_band) { 526 dev->next_band = ftl_next_write_band(dev); 527 } 528 } 529 530 static size_t 531 ftl_wptr_user_lbks_left(const struct ftl_wptr *wptr) 532 { 533 return ftl_band_user_lbks_left(wptr->band, wptr->offset); 534 } 535 536 static int 537 ftl_wptr_ready(struct ftl_wptr *wptr) 538 { 539 struct ftl_band *band = wptr->band; 540 541 /* TODO: add handling of empty bands */ 542 543 if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { 544 /* Erasing band may fail after it was assigned to wptr. */ 545 if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { 546 ftl_wptr_advance(wptr, wptr->dev->xfer_size); 547 } 548 return 0; 549 } 550 551 /* If we're in the process of writing metadata, wait till it is */ 552 /* completed. */ 553 /* TODO: we should probably change bands once we're writing tail md */ 554 if (ftl_band_state_changing(band)) { 555 return 0; 556 } 557 558 if (band->state == FTL_BAND_STATE_FULL) { 559 if (ftl_wptr_close_band(wptr)) { 560 /* TODO: need recovery here */ 561 assert(false); 562 } 563 return 0; 564 } 565 566 if (band->state != FTL_BAND_STATE_OPEN) { 567 if (ftl_wptr_open_band(wptr)) { 568 /* TODO: need recovery here */ 569 assert(false); 570 } 571 return 0; 572 } 573 574 return 1; 575 } 576 577 static const struct spdk_ftl_limit * 578 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 579 { 580 assert(type < SPDK_FTL_LIMIT_MAX); 581 return &dev->conf.defrag.limits[type]; 582 } 583 584 static bool 585 ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 586 { 587 struct ftl_ppa ppa; 588 589 /* If the LBA is invalid don't bother checking the md and l2p */ 590 if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { 591 return false; 592 } 593 594 ppa = ftl_l2p_get(dev, entry->lba); 595 if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { 596 return false; 597 } 598 599 return true; 600 } 601 602 static void 603 ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 604 { 605 pthread_spin_lock(&entry->lock); 606 607 if (!ftl_rwb_entry_valid(entry)) { 608 goto unlock; 609 } 610 611 /* If the l2p wasn't updated and still points at the entry, fill it with the */ 612 /* on-disk PPA and clear the cache status bit. Otherwise, skip the l2p update */ 613 /* and just clear the cache status. */ 614 if (!ftl_cache_lba_valid(dev, entry)) { 615 goto clear; 616 } 617 618 ftl_l2p_set(dev, entry->lba, entry->ppa); 619 clear: 620 ftl_rwb_entry_invalidate(entry); 621 unlock: 622 pthread_spin_unlock(&entry->lock); 623 } 624 625 static struct ftl_rwb_entry * 626 ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) 627 { 628 struct ftl_rwb_entry *entry; 629 630 entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); 631 if (!entry) { 632 return NULL; 633 } 634 635 ftl_evict_cache_entry(dev, entry); 636 637 entry->flags = flags; 638 return entry; 639 } 640 641 static void 642 ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) 643 { 644 struct ftl_rwb_entry *entry; 645 int flags = FTL_IO_PAD | FTL_IO_INTERNAL; 646 647 for (size_t i = 0; i < size; ++i) { 648 entry = ftl_acquire_entry(dev, flags); 649 if (!entry) { 650 break; 651 } 652 653 entry->lba = FTL_LBA_INVALID; 654 entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); 655 memset(entry->data, 0, FTL_BLOCK_SIZE); 656 ftl_rwb_push(entry); 657 } 658 } 659 660 static void 661 ftl_remove_free_bands(struct spdk_ftl_dev *dev) 662 { 663 while (!LIST_EMPTY(&dev->free_bands)) { 664 LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); 665 } 666 667 dev->next_band = NULL; 668 } 669 670 static void 671 ftl_wptr_process_shutdown(struct ftl_wptr *wptr) 672 { 673 struct spdk_ftl_dev *dev = wptr->dev; 674 size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + 675 ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); 676 size_t num_active = dev->xfer_size * ftl_rwb_get_active_batches(dev->rwb); 677 size_t band_length, rwb_free_space, pad_length; 678 679 num_active = num_active ? num_active : dev->xfer_size; 680 if (size >= num_active) { 681 return; 682 } 683 684 /* If we reach this point we need to remove free bands */ 685 /* and pad current wptr band to the end */ 686 if (ftl_rwb_get_active_batches(dev->rwb) <= 1) { 687 ftl_remove_free_bands(dev); 688 } 689 690 band_length = ftl_wptr_user_lbks_left(wptr); 691 rwb_free_space = ftl_rwb_size(dev->rwb) - size; 692 pad_length = spdk_min(band_length, rwb_free_space); 693 694 /* Pad write buffer until band is full */ 695 ftl_rwb_pad(dev, pad_length); 696 } 697 698 static int 699 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 700 { 701 return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && 702 LIST_EMPTY(&dev->wptr_list); 703 } 704 705 void 706 ftl_apply_limits(struct spdk_ftl_dev *dev) 707 { 708 const struct spdk_ftl_limit *limit; 709 struct ftl_stats *stats = &dev->stats; 710 size_t rwb_limit[FTL_RWB_TYPE_MAX]; 711 int i; 712 713 ftl_rwb_get_limits(dev->rwb, rwb_limit); 714 715 /* Clear existing limit */ 716 dev->limit = SPDK_FTL_LIMIT_MAX; 717 718 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 719 limit = ftl_get_limit(dev, i); 720 721 if (dev->num_free <= limit->thld) { 722 rwb_limit[FTL_RWB_TYPE_USER] = 723 (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; 724 stats->limits[i]++; 725 dev->limit = i; 726 goto apply; 727 } 728 } 729 730 /* Clear the limits, since we don't need to apply them anymore */ 731 rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); 732 apply: 733 ftl_trace_limits(dev, rwb_limit, dev->num_free); 734 ftl_rwb_set_limits(dev->rwb, rwb_limit); 735 } 736 737 static int 738 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 739 { 740 struct ftl_band *band = ftl_band_from_ppa(dev, ppa); 741 struct ftl_lba_map *lba_map = &band->lba_map; 742 uint64_t offset; 743 744 offset = ftl_band_lbkoff_from_ppa(band, ppa); 745 746 /* The bit might be already cleared if two writes are scheduled to the */ 747 /* same LBA at the same time */ 748 if (spdk_bit_array_get(lba_map->vld, offset)) { 749 assert(lba_map->num_vld > 0); 750 spdk_bit_array_clear(lba_map->vld, offset); 751 lba_map->num_vld--; 752 return 1; 753 } 754 755 return 0; 756 } 757 758 int 759 ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 760 { 761 struct ftl_band *band; 762 int rc; 763 764 assert(!ftl_ppa_cached(ppa)); 765 band = ftl_band_from_ppa(dev, ppa); 766 767 pthread_spin_lock(&band->lba_map.lock); 768 rc = ftl_invalidate_addr_unlocked(dev, ppa); 769 pthread_spin_unlock(&band->lba_map.lock); 770 771 return rc; 772 } 773 774 static int 775 ftl_read_retry(int rc) 776 { 777 return rc == -EAGAIN; 778 } 779 780 static int 781 ftl_read_canceled(int rc) 782 { 783 return rc == -EFAULT || rc == 0; 784 } 785 786 static void 787 ftl_add_to_retry_queue(struct ftl_io *io) 788 { 789 if (!(io->flags & FTL_IO_RETRY)) { 790 io->flags |= FTL_IO_RETRY; 791 TAILQ_INSERT_TAIL(&io->dev->retry_queue, io, retry_entry); 792 } 793 } 794 795 static int 796 ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, 797 struct ftl_ppa ppa, void *buf) 798 { 799 struct ftl_rwb *rwb = io->dev->rwb; 800 struct ftl_rwb_entry *entry; 801 struct ftl_ppa nppa; 802 int rc = 0; 803 804 entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); 805 pthread_spin_lock(&entry->lock); 806 807 nppa = ftl_l2p_get(io->dev, lba); 808 if (ppa.ppa != nppa.ppa) { 809 rc = -1; 810 goto out; 811 } 812 813 memcpy(buf, entry->data, FTL_BLOCK_SIZE); 814 out: 815 pthread_spin_unlock(&entry->lock); 816 return rc; 817 } 818 819 static int 820 ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 821 { 822 struct spdk_ftl_dev *dev = io->dev; 823 struct ftl_ppa next_ppa; 824 size_t i; 825 826 *ppa = ftl_l2p_get(dev, ftl_io_current_lba(io)); 827 828 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", 829 ppa->ppa, ftl_io_current_lba(io)); 830 831 /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ 832 if (ftl_ppa_invalid(*ppa)) { 833 return -EFAULT; 834 } 835 836 if (ftl_ppa_cached(*ppa)) { 837 if (!ftl_ppa_cache_read(io, ftl_io_current_lba(io), *ppa, ftl_io_iovec_addr(io))) { 838 return 0; 839 } 840 841 /* If the state changed, we have to re-read the l2p */ 842 return -EAGAIN; 843 } 844 845 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 846 next_ppa = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i)); 847 848 if (ftl_ppa_invalid(next_ppa) || ftl_ppa_cached(next_ppa)) { 849 break; 850 } 851 852 if (ftl_ppa_addr_pack(dev, *ppa) + i != ftl_ppa_addr_pack(dev, next_ppa)) { 853 break; 854 } 855 } 856 857 return i; 858 } 859 860 static int 861 ftl_submit_read(struct ftl_io *io) 862 { 863 struct spdk_ftl_dev *dev = io->dev; 864 struct ftl_ppa ppa; 865 int rc = 0, lbk_cnt; 866 867 assert(LIST_EMPTY(&io->children)); 868 869 while (io->pos < io->lbk_cnt) { 870 if (ftl_io_mode_ppa(io)) { 871 lbk_cnt = rc = ftl_ppa_read_next_ppa(io, &ppa); 872 } else { 873 lbk_cnt = rc = ftl_lba_read_next_ppa(io, &ppa); 874 } 875 876 /* We might need to retry the read from scratch (e.g. */ 877 /* because write was under way and completed before */ 878 /* we could read it from rwb */ 879 if (ftl_read_retry(rc)) { 880 continue; 881 } 882 883 /* We don't have to schedule the read, as it was read from cache */ 884 if (ftl_read_canceled(rc)) { 885 ftl_io_advance(io, 1); 886 ftl_trace_completion(io->dev, io, rc ? FTL_TRACE_COMPLETION_INVALID : 887 FTL_TRACE_COMPLETION_CACHE); 888 rc = 0; 889 continue; 890 } 891 892 assert(lbk_cnt > 0); 893 894 ftl_trace_submission(dev, io, ppa, lbk_cnt); 895 rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), 896 ftl_io_iovec_addr(io), 897 ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, 898 ftl_io_cmpl_cb, io, 0); 899 if (spdk_unlikely(rc)) { 900 if (rc == -ENOMEM) { 901 ftl_add_to_retry_queue(io); 902 } else { 903 ftl_io_fail(io, rc); 904 } 905 break; 906 } 907 908 ftl_io_inc_req(io); 909 ftl_io_advance(io, lbk_cnt); 910 } 911 912 /* If we didn't have to read anything from the device, */ 913 /* complete the request right away */ 914 if (ftl_io_done(io)) { 915 ftl_io_complete(io); 916 } 917 918 return rc; 919 } 920 921 static void 922 ftl_complete_flush(struct ftl_flush *flush) 923 { 924 assert(flush->num_req == 0); 925 LIST_REMOVE(flush, list_entry); 926 927 flush->cb.fn(flush->cb.ctx, 0); 928 929 spdk_bit_array_free(&flush->bmap); 930 free(flush); 931 } 932 933 static void 934 ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) 935 { 936 struct ftl_flush *flush, *tflush; 937 size_t offset; 938 939 LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { 940 offset = ftl_rwb_batch_get_offset(batch); 941 942 if (spdk_bit_array_get(flush->bmap, offset)) { 943 spdk_bit_array_clear(flush->bmap, offset); 944 if (!(--flush->num_req)) { 945 ftl_complete_flush(flush); 946 } 947 } 948 } 949 } 950 951 static uint64_t 952 ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_lbks) 953 { 954 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 955 struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache); 956 uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID; 957 958 cache_size = spdk_bdev_get_num_blocks(bdev); 959 960 pthread_spin_lock(&nv_cache->lock); 961 if (spdk_unlikely(nv_cache->num_available == 0)) { 962 goto out; 963 } 964 965 num_available = spdk_min(nv_cache->num_available, *num_lbks); 966 num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt); 967 968 if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) { 969 *num_lbks = cache_size - nv_cache->current_addr; 970 } else { 971 *num_lbks = num_available; 972 } 973 974 cache_addr = nv_cache->current_addr; 975 nv_cache->current_addr += *num_lbks; 976 nv_cache->num_available -= *num_lbks; 977 978 if (nv_cache->current_addr == spdk_bdev_get_num_blocks(bdev)) { 979 nv_cache->current_addr = 0; 980 } 981 out: 982 pthread_spin_unlock(&nv_cache->lock); 983 return cache_addr; 984 } 985 986 static struct ftl_io * 987 ftl_alloc_io_nv_cache(struct ftl_io *parent, size_t num_lbks) 988 { 989 struct ftl_io_init_opts opts = { 990 .dev = parent->dev, 991 .parent = parent, 992 .data = ftl_io_iovec_addr(parent), 993 .lbk_cnt = num_lbks, 994 .flags = FTL_IO_CACHE, 995 }; 996 997 return ftl_io_init_internal(&opts); 998 } 999 1000 static void 1001 ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 1002 { 1003 struct ftl_io *io = cb_arg; 1004 struct ftl_nv_cache *nv_cache = &io->dev->nv_cache; 1005 1006 if (spdk_unlikely(!success)) { 1007 SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->ppa.ppa); 1008 io->status = -EIO; 1009 } 1010 1011 ftl_io_dec_req(io); 1012 if (ftl_io_done(io)) { 1013 spdk_mempool_put(nv_cache->md_pool, io->md); 1014 ftl_io_complete(io); 1015 } 1016 1017 spdk_bdev_free_io(bdev_io); 1018 } 1019 1020 static void 1021 ftl_submit_nv_cache(void *ctx) 1022 { 1023 struct ftl_io *io = ctx; 1024 struct spdk_ftl_dev *dev = io->dev; 1025 struct spdk_thread *thread; 1026 struct ftl_nv_cache *nv_cache = &dev->nv_cache; 1027 struct ftl_io_channel *ioch; 1028 int rc; 1029 1030 ioch = spdk_io_channel_get_ctx(io->ioch); 1031 thread = spdk_io_channel_get_thread(io->ioch); 1032 1033 rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch, 1034 ftl_io_iovec_addr(io), io->md, io->ppa.ppa, 1035 io->lbk_cnt, ftl_nv_cache_submit_cb, io); 1036 if (rc == -ENOMEM) { 1037 spdk_thread_send_msg(thread, ftl_submit_nv_cache, io); 1038 return; 1039 } else if (rc) { 1040 SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n", 1041 spdk_strerror(-rc), io->ppa.ppa, io->lbk_cnt); 1042 spdk_mempool_put(nv_cache->md_pool, io->md); 1043 io->status = -EIO; 1044 ftl_io_complete(io); 1045 return; 1046 } 1047 1048 ftl_io_advance(io, io->lbk_cnt); 1049 ftl_io_inc_req(io); 1050 } 1051 1052 static void 1053 ftl_nv_cache_fill_md(struct ftl_nv_cache *nv_cache, struct ftl_io *io) 1054 { 1055 struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc); 1056 void *md_buf = io->md; 1057 size_t lbk_off; 1058 1059 for (lbk_off = 0; lbk_off < io->lbk_cnt; ++lbk_off) { 1060 *(uint64_t *)md_buf = ftl_io_get_lba(io, lbk_off); 1061 md_buf = (char *)md_buf + spdk_bdev_get_md_size(bdev); 1062 } 1063 } 1064 1065 static void 1066 _ftl_write_nv_cache(void *ctx) 1067 { 1068 struct ftl_io *child, *io = ctx; 1069 struct spdk_ftl_dev *dev = io->dev; 1070 struct spdk_thread *thread; 1071 uint64_t num_lbks; 1072 1073 thread = spdk_io_channel_get_thread(io->ioch); 1074 1075 while (io->pos < io->lbk_cnt) { 1076 num_lbks = ftl_io_iovec_len_left(io); 1077 1078 child = ftl_alloc_io_nv_cache(io, num_lbks); 1079 if (spdk_unlikely(!child)) { 1080 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1081 return; 1082 } 1083 1084 child->md = spdk_mempool_get(dev->nv_cache.md_pool); 1085 if (spdk_unlikely(!child->md)) { 1086 ftl_io_free(child); 1087 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1088 break; 1089 } 1090 1091 /* Reserve area on the write buffer cache */ 1092 child->ppa.ppa = ftl_reserve_nv_cache(&dev->nv_cache, &num_lbks); 1093 if (child->ppa.ppa == FTL_LBA_INVALID) { 1094 spdk_mempool_put(dev->nv_cache.md_pool, child->md); 1095 ftl_io_free(child); 1096 spdk_thread_send_msg(thread, _ftl_write_nv_cache, io); 1097 break; 1098 } 1099 1100 /* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */ 1101 if (spdk_unlikely(num_lbks != ftl_io_iovec_len_left(io))) { 1102 ftl_io_shrink_iovec(child, num_lbks); 1103 } 1104 1105 ftl_nv_cache_fill_md(&dev->nv_cache, child); 1106 ftl_submit_nv_cache(child); 1107 } 1108 1109 if (ftl_io_done(io)) { 1110 ftl_io_complete(io); 1111 } 1112 } 1113 1114 static void 1115 ftl_write_nv_cache(struct ftl_io *parent) 1116 { 1117 ftl_io_reset(parent); 1118 parent->flags |= FTL_IO_CACHE; 1119 _ftl_write_nv_cache(parent); 1120 } 1121 1122 static void 1123 ftl_write_fail(struct ftl_io *io, int status) 1124 { 1125 struct ftl_rwb_batch *batch = io->rwb_batch; 1126 struct spdk_ftl_dev *dev = io->dev; 1127 struct ftl_rwb_entry *entry; 1128 struct ftl_band *band; 1129 char buf[128]; 1130 1131 entry = ftl_rwb_batch_first_entry(batch); 1132 1133 band = ftl_band_from_ppa(io->dev, entry->ppa); 1134 SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", 1135 ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); 1136 1137 /* Close the band and, halt wptr and defrag */ 1138 ftl_halt_writes(dev, band); 1139 1140 ftl_rwb_foreach(entry, batch) { 1141 /* Invalidate meta set by process_writes() */ 1142 ftl_invalidate_addr(dev, entry->ppa); 1143 } 1144 1145 /* Reset the batch back to the the RWB to resend it later */ 1146 ftl_rwb_batch_revert(batch); 1147 } 1148 1149 static void 1150 ftl_write_cb(struct ftl_io *io, void *arg, int status) 1151 { 1152 struct spdk_ftl_dev *dev = io->dev; 1153 struct ftl_rwb_batch *batch = io->rwb_batch; 1154 struct ftl_rwb_entry *entry; 1155 1156 if (status) { 1157 ftl_write_fail(io, status); 1158 return; 1159 } 1160 1161 assert(io->lbk_cnt == dev->xfer_size); 1162 ftl_rwb_foreach(entry, batch) { 1163 if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { 1164 /* Verify that the LBA is set for user lbks */ 1165 assert(entry->lba != FTL_LBA_INVALID); 1166 } 1167 1168 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", 1169 entry->ppa.ppa, entry->lba); 1170 } 1171 1172 ftl_process_flush(dev, batch); 1173 ftl_rwb_batch_release(batch); 1174 } 1175 1176 static void 1177 ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) 1178 { 1179 if (!ftl_rwb_entry_internal(entry)) { 1180 dev->stats.write_user++; 1181 } 1182 dev->stats.write_total++; 1183 } 1184 1185 static void 1186 ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, 1187 struct ftl_ppa ppa) 1188 { 1189 struct ftl_ppa prev_ppa; 1190 struct ftl_rwb_entry *prev; 1191 struct ftl_band *band; 1192 int valid; 1193 1194 prev_ppa = ftl_l2p_get(dev, entry->lba); 1195 if (ftl_ppa_invalid(prev_ppa)) { 1196 ftl_l2p_set(dev, entry->lba, ppa); 1197 return; 1198 } 1199 1200 /* If the L2P's PPA is different than what we expected we don't need to */ 1201 /* do anything (someone's already overwritten our data). */ 1202 if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { 1203 return; 1204 } 1205 1206 if (ftl_ppa_cached(prev_ppa)) { 1207 assert(!ftl_rwb_entry_weak(entry)); 1208 prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); 1209 pthread_spin_lock(&prev->lock); 1210 1211 /* Re-read the L2P under the lock to protect against updates */ 1212 /* to this LBA from other threads */ 1213 prev_ppa = ftl_l2p_get(dev, entry->lba); 1214 1215 /* If the entry is no longer in cache, another write has been */ 1216 /* scheduled in the meantime, so we have to invalidate its LBA */ 1217 if (!ftl_ppa_cached(prev_ppa)) { 1218 ftl_invalidate_addr(dev, prev_ppa); 1219 } 1220 1221 /* If previous entry is part of cache, remove and invalidate it */ 1222 if (ftl_rwb_entry_valid(prev)) { 1223 ftl_invalidate_addr(dev, prev->ppa); 1224 ftl_rwb_entry_invalidate(prev); 1225 } 1226 1227 ftl_l2p_set(dev, entry->lba, ppa); 1228 pthread_spin_unlock(&prev->lock); 1229 return; 1230 } 1231 1232 /* Lock the band containing previous PPA. This assures atomic changes to */ 1233 /* the L2P as wall as metadata. The valid bits in metadata are used to */ 1234 /* check weak writes validity. */ 1235 band = ftl_band_from_ppa(dev, prev_ppa); 1236 pthread_spin_lock(&band->lba_map.lock); 1237 1238 valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); 1239 1240 /* If the address has been invalidated already, we don't want to update */ 1241 /* the L2P for weak writes, as it means the write is no longer valid. */ 1242 if (!ftl_rwb_entry_weak(entry) || valid) { 1243 ftl_l2p_set(dev, entry->lba, ppa); 1244 } 1245 1246 pthread_spin_unlock(&band->lba_map.lock); 1247 } 1248 1249 static struct ftl_io * 1250 ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa, 1251 void *data, void *md, ftl_io_fn cb) 1252 { 1253 struct ftl_io *io; 1254 struct spdk_ftl_dev *dev = parent->dev; 1255 struct ftl_io_init_opts opts = { 1256 .dev = dev, 1257 .io = NULL, 1258 .parent = parent, 1259 .rwb_batch = NULL, 1260 .band = parent->band, 1261 .size = sizeof(struct ftl_io), 1262 .flags = 0, 1263 .type = FTL_IO_WRITE, 1264 .lbk_cnt = dev->xfer_size, 1265 .cb_fn = cb, 1266 .data = data, 1267 .md = md, 1268 }; 1269 1270 io = ftl_io_init_internal(&opts); 1271 if (!io) { 1272 return NULL; 1273 } 1274 1275 io->ppa = ppa; 1276 1277 return io; 1278 } 1279 1280 static void 1281 ftl_io_child_write_cb(struct ftl_io *io, void *ctx, int status) 1282 { 1283 struct ftl_chunk *chunk; 1284 1285 chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); 1286 chunk->busy = false; 1287 } 1288 1289 static int 1290 ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt) 1291 { 1292 struct spdk_ftl_dev *dev = io->dev; 1293 struct ftl_io *child; 1294 int rc; 1295 struct ftl_ppa ppa; 1296 1297 if (spdk_likely(!wptr->direct_mode)) { 1298 ppa = wptr->ppa; 1299 } else { 1300 assert(io->flags & FTL_IO_DIRECT_ACCESS); 1301 assert(io->ppa.chk == wptr->band->id); 1302 ppa = io->ppa; 1303 } 1304 1305 /* Split IO to child requests and release chunk immediately after child is completed */ 1306 child = ftl_io_init_child_write(io, ppa, ftl_io_iovec_addr(io), 1307 ftl_io_get_md(io), ftl_io_child_write_cb); 1308 if (!child) { 1309 return -EAGAIN; 1310 } 1311 1312 rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), 1313 ftl_io_iovec_addr(child), child->md, 1314 ftl_ppa_addr_pack(dev, ppa), 1315 lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0); 1316 if (rc) { 1317 ftl_io_fail(child, rc); 1318 ftl_io_complete(child); 1319 SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n", 1320 rc, ppa.ppa); 1321 1322 return -EIO; 1323 } 1324 1325 ftl_io_inc_req(child); 1326 ftl_io_advance(child, lbk_cnt); 1327 1328 return 0; 1329 } 1330 1331 static int 1332 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) 1333 { 1334 struct spdk_ftl_dev *dev = io->dev; 1335 int rc = 0; 1336 1337 assert(io->lbk_cnt % dev->xfer_size == 0); 1338 1339 while (io->iov_pos < io->iov_cnt) { 1340 /* There are no guarantees of the order of completion of NVMe IO submission queue */ 1341 /* so wait until chunk is not busy before submitting another write */ 1342 if (wptr->chunk->busy) { 1343 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, retry_entry); 1344 rc = -EAGAIN; 1345 break; 1346 } 1347 1348 rc = ftl_submit_child_write(wptr, io, dev->xfer_size); 1349 if (spdk_unlikely(rc)) { 1350 if (rc == -EAGAIN) { 1351 TAILQ_INSERT_TAIL(&wptr->pending_queue, io, retry_entry); 1352 } else { 1353 ftl_io_fail(io, rc); 1354 } 1355 break; 1356 } 1357 1358 ftl_trace_submission(dev, io, wptr->ppa, dev->xfer_size); 1359 ftl_wptr_advance(wptr, dev->xfer_size); 1360 } 1361 1362 if (ftl_io_done(io)) { 1363 /* Parent IO will complete after all children are completed */ 1364 ftl_io_complete(io); 1365 } 1366 1367 return rc; 1368 } 1369 1370 static void 1371 ftl_flush_pad_batch(struct spdk_ftl_dev *dev) 1372 { 1373 struct ftl_rwb *rwb = dev->rwb; 1374 size_t size, num_entries; 1375 1376 size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + 1377 ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); 1378 1379 /* There must be something in the RWB, otherwise the flush */ 1380 /* wouldn't be waiting for anything */ 1381 assert(size > 0); 1382 1383 /* Only add padding when there's less than xfer size */ 1384 /* entries in the buffer. Otherwise we just have to wait */ 1385 /* for the entries to become ready. */ 1386 num_entries = ftl_rwb_get_active_batches(dev->rwb) * dev->xfer_size; 1387 if (size < num_entries) { 1388 ftl_rwb_pad(dev, num_entries - (size % num_entries)); 1389 } 1390 } 1391 1392 static int 1393 ftl_wptr_process_writes(struct ftl_wptr *wptr) 1394 { 1395 struct spdk_ftl_dev *dev = wptr->dev; 1396 struct ftl_rwb_batch *batch; 1397 struct ftl_rwb_entry *entry; 1398 struct ftl_io *io; 1399 struct ftl_ppa ppa, prev_ppa; 1400 1401 if (spdk_unlikely(!TAILQ_EMPTY(&wptr->pending_queue))) { 1402 io = TAILQ_FIRST(&wptr->pending_queue); 1403 TAILQ_REMOVE(&wptr->pending_queue, io, retry_entry); 1404 1405 if (ftl_submit_write(wptr, io) == -EAGAIN) { 1406 return 0; 1407 } 1408 } 1409 1410 /* Make sure the band is prepared for writing */ 1411 if (!ftl_wptr_ready(wptr)) { 1412 return 0; 1413 } 1414 1415 if (dev->halt) { 1416 ftl_wptr_process_shutdown(wptr); 1417 } 1418 1419 batch = ftl_rwb_pop(dev->rwb); 1420 if (!batch) { 1421 /* If there are queued flush requests we need to pad the RWB to */ 1422 /* force out remaining entries */ 1423 if (!LIST_EMPTY(&dev->flush_list)) { 1424 ftl_flush_pad_batch(dev); 1425 } 1426 1427 return 0; 1428 } 1429 1430 io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); 1431 if (!io) { 1432 goto error; 1433 } 1434 1435 ppa = wptr->ppa; 1436 ftl_rwb_foreach(entry, batch) { 1437 entry->ppa = ppa; 1438 1439 if (entry->lba != FTL_LBA_INVALID) { 1440 pthread_spin_lock(&entry->lock); 1441 prev_ppa = ftl_l2p_get(dev, entry->lba); 1442 1443 /* If the l2p was updated in the meantime, don't update band's metadata */ 1444 if (ftl_ppa_cached(prev_ppa) && prev_ppa.offset == entry->pos) { 1445 /* Setting entry's cache bit needs to be done after metadata */ 1446 /* within the band is updated to make sure that writes */ 1447 /* invalidating the entry clear the metadata as well */ 1448 ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); 1449 ftl_rwb_entry_set_valid(entry); 1450 } 1451 pthread_spin_unlock(&entry->lock); 1452 } 1453 1454 ftl_trace_rwb_pop(dev, entry); 1455 ftl_update_rwb_stats(dev, entry); 1456 1457 ppa = ftl_band_next_ppa(wptr->band, ppa, 1); 1458 } 1459 1460 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, 1461 ftl_ppa_addr_pack(dev, wptr->ppa)); 1462 1463 if (ftl_submit_write(wptr, io)) { 1464 /* TODO: we need some recovery here */ 1465 assert(0 && "Write submit failed"); 1466 if (ftl_io_done(io)) { 1467 ftl_io_free(io); 1468 } 1469 } 1470 1471 return dev->xfer_size; 1472 error: 1473 ftl_rwb_batch_revert(batch); 1474 return 0; 1475 } 1476 1477 static int 1478 ftl_process_writes(struct spdk_ftl_dev *dev) 1479 { 1480 struct ftl_wptr *wptr, *twptr; 1481 size_t num_active = 0; 1482 enum ftl_band_state state; 1483 1484 LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { 1485 ftl_wptr_process_writes(wptr); 1486 state = wptr->band->state; 1487 1488 if (state != FTL_BAND_STATE_FULL && 1489 state != FTL_BAND_STATE_CLOSING && 1490 state != FTL_BAND_STATE_CLOSED) { 1491 num_active++; 1492 } 1493 } 1494 1495 if (num_active < 1) { 1496 ftl_add_wptr(dev); 1497 } 1498 1499 return 0; 1500 } 1501 1502 static void 1503 ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) 1504 { 1505 struct ftl_band *band; 1506 1507 memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); 1508 1509 if (ftl_rwb_entry_weak(entry)) { 1510 band = ftl_band_from_ppa(io->dev, io->ppa); 1511 entry->ppa = ftl_band_next_ppa(band, io->ppa, io->pos); 1512 } 1513 1514 entry->trace = io->trace; 1515 entry->lba = ftl_io_current_lba(io); 1516 1517 if (entry->md) { 1518 memcpy(entry->md, &entry->lba, sizeof(entry->lba)); 1519 } 1520 } 1521 1522 static int 1523 ftl_rwb_fill(struct ftl_io *io) 1524 { 1525 struct spdk_ftl_dev *dev = io->dev; 1526 struct ftl_rwb_entry *entry; 1527 struct ftl_ppa ppa = { .cached = 1 }; 1528 int flags = ftl_rwb_flags_from_io(io); 1529 1530 while (io->pos < io->lbk_cnt) { 1531 if (ftl_io_current_lba(io) == FTL_LBA_INVALID) { 1532 ftl_io_advance(io, 1); 1533 continue; 1534 } 1535 1536 entry = ftl_acquire_entry(dev, flags); 1537 if (!entry) { 1538 return -EAGAIN; 1539 } 1540 1541 ftl_rwb_entry_fill(entry, io); 1542 1543 ppa.offset = entry->pos; 1544 1545 ftl_trace_rwb_fill(dev, io); 1546 ftl_update_l2p(dev, entry, ppa); 1547 ftl_io_advance(io, 1); 1548 1549 /* Needs to be done after L2P is updated to avoid race with */ 1550 /* write completion callback when it's processed faster than */ 1551 /* L2P is set in update_l2p(). */ 1552 ftl_rwb_push(entry); 1553 } 1554 1555 if (ftl_io_done(io)) { 1556 if (dev->nv_cache.bdev_desc) { 1557 ftl_write_nv_cache(io); 1558 } else { 1559 ftl_io_complete(io); 1560 } 1561 } 1562 1563 return 0; 1564 } 1565 1566 static bool 1567 ftl_dev_needs_defrag(struct spdk_ftl_dev *dev) 1568 { 1569 const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 1570 1571 if (ftl_reloc_is_halted(dev->reloc)) { 1572 return false; 1573 } 1574 1575 if (dev->df_band) { 1576 return false; 1577 } 1578 1579 if (dev->num_free <= limit->thld) { 1580 return true; 1581 } 1582 1583 return false; 1584 } 1585 1586 static double 1587 ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid) 1588 { 1589 size_t usable, valid, invalid; 1590 double vld_ratio; 1591 1592 /* If the band doesn't have any usable lbks it's of no use */ 1593 usable = ftl_band_num_usable_lbks(band); 1594 if (usable == 0) { 1595 return 0.0; 1596 } 1597 1598 valid = threshold_valid ? (usable - *threshold_valid) : band->lba_map.num_vld; 1599 invalid = usable - valid; 1600 1601 /* Add one to avoid division by 0 */ 1602 vld_ratio = (double)invalid / (double)(valid + 1); 1603 return vld_ratio * ftl_band_age(band); 1604 } 1605 1606 static bool 1607 ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev) 1608 { 1609 struct spdk_ftl_conf *conf = &dev->conf; 1610 size_t thld_vld; 1611 1612 /* If we're in dire need of free bands, every band is worth defragging */ 1613 if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) { 1614 return true; 1615 } 1616 1617 thld_vld = (ftl_band_num_usable_lbks(band) * conf->defrag.invalid_thld) / 100; 1618 1619 return band->merit > ftl_band_calc_merit(band, &thld_vld); 1620 } 1621 1622 static struct ftl_band * 1623 ftl_select_defrag_band(struct spdk_ftl_dev *dev) 1624 { 1625 struct ftl_band *band, *mband = NULL; 1626 double merit = 0; 1627 1628 LIST_FOREACH(band, &dev->shut_bands, list_entry) { 1629 assert(band->state == FTL_BAND_STATE_CLOSED); 1630 band->merit = ftl_band_calc_merit(band, NULL); 1631 if (band->merit > merit) { 1632 merit = band->merit; 1633 mband = band; 1634 } 1635 } 1636 1637 if (mband && !ftl_band_needs_defrag(mband, dev)) { 1638 mband = NULL; 1639 } 1640 1641 return mband; 1642 } 1643 1644 static void 1645 ftl_process_relocs(struct spdk_ftl_dev *dev) 1646 { 1647 struct ftl_band *band; 1648 1649 if (ftl_dev_needs_defrag(dev)) { 1650 band = dev->df_band = ftl_select_defrag_band(dev); 1651 1652 if (band) { 1653 ftl_reloc_add(dev->reloc, band, 0, ftl_num_band_lbks(dev), 0); 1654 ftl_trace_defrag_band(dev, band); 1655 } 1656 } 1657 1658 ftl_reloc(dev->reloc); 1659 } 1660 1661 int 1662 ftl_current_limit(const struct spdk_ftl_dev *dev) 1663 { 1664 return dev->limit; 1665 } 1666 1667 void 1668 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 1669 { 1670 attrs->uuid = dev->uuid; 1671 attrs->lbk_cnt = dev->num_lbas; 1672 attrs->lbk_size = FTL_BLOCK_SIZE; 1673 attrs->range = dev->range; 1674 attrs->cache_bdev_desc = dev->nv_cache.bdev_desc; 1675 attrs->allow_open_bands = dev->conf.allow_open_bands; 1676 attrs->num_chunks = dev->geo.num_chk; 1677 attrs->chunk_size = dev->geo.clba; 1678 } 1679 1680 static void 1681 _ftl_io_write(void *ctx) 1682 { 1683 ftl_io_write((struct ftl_io *)ctx); 1684 } 1685 1686 static int 1687 ftl_rwb_fill_leaf(struct ftl_io *io) 1688 { 1689 int rc; 1690 1691 rc = ftl_rwb_fill(io); 1692 if (rc == -EAGAIN) { 1693 spdk_thread_send_msg(spdk_io_channel_get_thread(io->ioch), 1694 _ftl_io_write, io); 1695 return 0; 1696 } 1697 1698 return rc; 1699 } 1700 1701 static int 1702 ftl_submit_write_leaf(struct ftl_io *io) 1703 { 1704 int rc; 1705 1706 rc = ftl_submit_write(ftl_wptr_from_band(io->band), io); 1707 if (rc == -EAGAIN) { 1708 /* EAGAIN means that the request was put on the pending queue */ 1709 return 0; 1710 } 1711 1712 return rc; 1713 } 1714 1715 void 1716 ftl_io_write(struct ftl_io *io) 1717 { 1718 struct spdk_ftl_dev *dev = io->dev; 1719 1720 /* For normal IOs we just need to copy the data onto the rwb */ 1721 if (!(io->flags & FTL_IO_MD)) { 1722 ftl_io_call_foreach_child(io, ftl_rwb_fill_leaf); 1723 } else { 1724 /* Metadata has its own buffer, so it doesn't have to be copied, so just */ 1725 /* send it the the core thread and schedule the write immediately */ 1726 if (ftl_check_core_thread(dev)) { 1727 ftl_io_call_foreach_child(io, ftl_submit_write_leaf); 1728 } else { 1729 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); 1730 } 1731 } 1732 } 1733 1734 int 1735 spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1736 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1737 { 1738 struct ftl_io *io; 1739 1740 if (iov_cnt == 0) { 1741 return -EINVAL; 1742 } 1743 1744 if (lba_cnt == 0) { 1745 return -EINVAL; 1746 } 1747 1748 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1749 return -EINVAL; 1750 } 1751 1752 if (!dev->initialized) { 1753 return -EBUSY; 1754 } 1755 1756 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 1757 if (!io) { 1758 return -ENOMEM; 1759 } 1760 1761 ftl_io_write(io); 1762 1763 return 0; 1764 } 1765 1766 static int 1767 ftl_io_read_leaf(struct ftl_io *io) 1768 { 1769 int rc; 1770 1771 rc = ftl_submit_read(io); 1772 if (rc == -ENOMEM) { 1773 /* ENOMEM means that the request was put on a pending queue */ 1774 return 0; 1775 } 1776 1777 return rc; 1778 } 1779 1780 static void 1781 _ftl_io_read(void *arg) 1782 { 1783 ftl_io_read((struct ftl_io *)arg); 1784 } 1785 1786 void 1787 ftl_io_read(struct ftl_io *io) 1788 { 1789 struct spdk_ftl_dev *dev = io->dev; 1790 1791 if (ftl_check_read_thread(dev)) { 1792 ftl_io_call_foreach_child(io, ftl_io_read_leaf); 1793 } else { 1794 spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_io_read, io); 1795 } 1796 } 1797 1798 int 1799 spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1800 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1801 { 1802 struct ftl_io *io; 1803 1804 if (iov_cnt == 0) { 1805 return -EINVAL; 1806 } 1807 1808 if (lba_cnt == 0) { 1809 return -EINVAL; 1810 } 1811 1812 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1813 return -EINVAL; 1814 } 1815 1816 if (!dev->initialized) { 1817 return -EBUSY; 1818 } 1819 1820 io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 1821 if (!io) { 1822 return -ENOMEM; 1823 } 1824 1825 ftl_io_read(io); 1826 return 0; 1827 } 1828 1829 static struct ftl_flush * 1830 ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1831 { 1832 struct ftl_flush *flush; 1833 struct ftl_rwb *rwb = dev->rwb; 1834 1835 flush = calloc(1, sizeof(*flush)); 1836 if (!flush) { 1837 return NULL; 1838 } 1839 1840 flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); 1841 if (!flush->bmap) { 1842 goto error; 1843 } 1844 1845 flush->dev = dev; 1846 flush->cb.fn = cb_fn; 1847 flush->cb.ctx = cb_arg; 1848 1849 return flush; 1850 error: 1851 free(flush); 1852 return NULL; 1853 } 1854 1855 static void 1856 _ftl_flush(void *ctx) 1857 { 1858 struct ftl_flush *flush = ctx; 1859 struct spdk_ftl_dev *dev = flush->dev; 1860 struct ftl_rwb *rwb = dev->rwb; 1861 struct ftl_rwb_batch *batch; 1862 1863 /* Attach flush object to all non-empty batches */ 1864 ftl_rwb_foreach_batch(batch, rwb) { 1865 if (!ftl_rwb_batch_empty(batch)) { 1866 spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); 1867 flush->num_req++; 1868 } 1869 } 1870 1871 LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); 1872 1873 /* If the RWB was already empty, the flush can be completed right away */ 1874 if (!flush->num_req) { 1875 ftl_complete_flush(flush); 1876 } 1877 } 1878 1879 int 1880 spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1881 { 1882 struct ftl_flush *flush; 1883 1884 if (!dev->initialized) { 1885 return -EBUSY; 1886 } 1887 1888 flush = ftl_flush_init(dev, cb_fn, cb_arg); 1889 if (!flush) { 1890 return -ENOMEM; 1891 } 1892 1893 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); 1894 return 0; 1895 } 1896 1897 void 1898 ftl_process_anm_event(struct ftl_anm_event *event) 1899 { 1900 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Unconsumed ANM received for dev: %p...\n", event->dev); 1901 ftl_anm_event_complete(event); 1902 } 1903 1904 static void 1905 ftl_process_retry_queue(struct spdk_ftl_dev *dev) 1906 { 1907 struct ftl_io *io; 1908 int rc; 1909 1910 while (!TAILQ_EMPTY(&dev->retry_queue)) { 1911 io = TAILQ_FIRST(&dev->retry_queue); 1912 1913 /* Retry only if IO is still healthy */ 1914 if (spdk_likely(io->status == 0)) { 1915 rc = ftl_submit_read(io); 1916 if (rc == -ENOMEM) { 1917 break; 1918 } 1919 } 1920 1921 io->flags &= ~FTL_IO_RETRY; 1922 TAILQ_REMOVE(&dev->retry_queue, io, retry_entry); 1923 1924 if (ftl_io_done(io)) { 1925 ftl_io_complete(io); 1926 } 1927 } 1928 } 1929 1930 int 1931 ftl_task_read(void *ctx) 1932 { 1933 struct ftl_thread *thread = ctx; 1934 struct spdk_ftl_dev *dev = thread->dev; 1935 struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); 1936 size_t num_completed; 1937 1938 if (dev->halt) { 1939 if (ftl_shutdown_complete(dev)) { 1940 spdk_poller_unregister(&thread->poller); 1941 return 0; 1942 } 1943 } 1944 1945 num_completed = spdk_nvme_qpair_process_completions(qpair, 0); 1946 1947 if (num_completed && !TAILQ_EMPTY(&dev->retry_queue)) { 1948 ftl_process_retry_queue(dev); 1949 } 1950 1951 return num_completed; 1952 } 1953 1954 int 1955 ftl_task_core(void *ctx) 1956 { 1957 struct ftl_thread *thread = ctx; 1958 struct spdk_ftl_dev *dev = thread->dev; 1959 struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); 1960 1961 if (dev->halt) { 1962 if (ftl_shutdown_complete(dev)) { 1963 spdk_poller_unregister(&thread->poller); 1964 return 0; 1965 } 1966 } 1967 1968 ftl_process_writes(dev); 1969 spdk_nvme_qpair_process_completions(qpair, 0); 1970 ftl_process_relocs(dev); 1971 1972 return 0; 1973 } 1974 1975 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) 1976