1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/likely.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/nvme.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk_internal/log.h" 40 #include "spdk/ftl.h" 41 42 #include "ftl_core.h" 43 #include "ftl_band.h" 44 #include "ftl_io.h" 45 #include "ftl_anm.h" 46 #include "ftl_rwb.h" 47 #include "ftl_debug.h" 48 #include "ftl_reloc.h" 49 50 /* Max number of iovecs */ 51 #define FTL_MAX_IOV 1024 52 53 struct ftl_wptr { 54 /* Owner device */ 55 struct spdk_ftl_dev *dev; 56 57 /* Current PPA */ 58 struct ftl_ppa ppa; 59 60 /* Band currently being written to */ 61 struct ftl_band *band; 62 63 /* Current logical block's offset */ 64 uint64_t offset; 65 66 /* Current erase block */ 67 struct ftl_chunk *chunk; 68 69 /* IO that is currently processed */ 70 struct ftl_io *current_io; 71 72 /* List link */ 73 LIST_ENTRY(ftl_wptr) list_entry; 74 }; 75 76 struct ftl_flush { 77 /* Owner device */ 78 struct spdk_ftl_dev *dev; 79 80 /* Number of batches to wait for */ 81 size_t num_req; 82 83 /* Callback */ 84 struct ftl_cb cb; 85 86 /* Batch bitmap */ 87 struct spdk_bit_array *bmap; 88 89 /* List link */ 90 LIST_ENTRY(ftl_flush) list_entry; 91 }; 92 93 typedef int (*ftl_next_ppa_fn)(struct ftl_io *, struct ftl_ppa *); 94 static void _ftl_read(void *); 95 static void _ftl_write(void *); 96 97 static int 98 ftl_rwb_flags_from_io(const struct ftl_io *io) 99 { 100 int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; 101 return io->flags & valid_flags; 102 } 103 104 static int 105 ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) 106 { 107 return entry->flags & FTL_IO_WEAK; 108 } 109 110 static void 111 ftl_wptr_free(struct ftl_wptr *wptr) 112 { 113 if (!wptr) { 114 return; 115 } 116 117 free(wptr); 118 } 119 120 static void 121 ftl_remove_wptr(struct ftl_wptr *wptr) 122 { 123 LIST_REMOVE(wptr, list_entry); 124 ftl_wptr_free(wptr); 125 } 126 127 static void 128 ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) 129 { 130 struct ftl_io *io = arg; 131 132 if (spdk_nvme_cpl_is_error(status)) { 133 ftl_io_process_error(io, status); 134 } 135 136 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_DISK); 137 138 ftl_io_dec_req(io); 139 140 if (ftl_io_done(io)) { 141 ftl_io_complete(io); 142 } 143 } 144 145 static void 146 ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) 147 { 148 struct ftl_wptr *wptr = NULL; 149 150 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 151 if (wptr->band == band) { 152 break; 153 } 154 } 155 156 /* If the band already has the high_prio flag set, other writes must */ 157 /* have failed earlier, so it's already taken care of. */ 158 if (band->high_prio) { 159 assert(wptr == NULL); 160 return; 161 } 162 163 ftl_band_write_failed(band); 164 ftl_remove_wptr(wptr); 165 } 166 167 static struct ftl_wptr * 168 ftl_wptr_from_band(struct ftl_band *band) 169 { 170 struct spdk_ftl_dev *dev = band->dev; 171 struct ftl_wptr *wptr = NULL; 172 173 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 174 if (wptr->band == band) { 175 return wptr; 176 } 177 } 178 179 return NULL; 180 } 181 182 static void 183 ftl_md_write_fail(struct ftl_io *io, int status) 184 { 185 struct ftl_band *band = io->band; 186 struct ftl_wptr *wptr; 187 char buf[128]; 188 189 wptr = ftl_wptr_from_band(band); 190 191 SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", 192 ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); 193 194 ftl_halt_writes(io->dev, band); 195 } 196 197 static void 198 ftl_md_write_cb(void *arg, int status) 199 { 200 struct ftl_io *io = arg; 201 struct ftl_wptr *wptr; 202 203 wptr = ftl_wptr_from_band(io->band); 204 205 if (status) { 206 ftl_md_write_fail(io, status); 207 return; 208 } 209 210 ftl_band_set_next_state(io->band); 211 if (io->band->state == FTL_BAND_STATE_CLOSED) { 212 ftl_remove_wptr(wptr); 213 } 214 } 215 216 static int 217 ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 218 { 219 struct spdk_ftl_dev *dev = io->dev; 220 size_t lbk_cnt, max_lbks; 221 222 assert(ftl_io_mode_ppa(io)); 223 assert(io->iov_pos < io->iov_cnt); 224 225 if (io->pos == 0) { 226 *ppa = io->ppa; 227 } else { 228 *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, io->pos); 229 } 230 231 assert(!ftl_ppa_invalid(*ppa)); 232 233 /* Metadata has to be read in the way it's written (jumping across */ 234 /* the chunks in xfer_size increments) */ 235 if (io->flags & FTL_IO_MD) { 236 max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); 237 lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); 238 assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); 239 } else { 240 lbk_cnt = ftl_io_iovec_len_left(io); 241 } 242 243 return lbk_cnt; 244 } 245 246 static int 247 ftl_wptr_close_band(struct ftl_wptr *wptr) 248 { 249 struct ftl_band *band = wptr->band; 250 251 ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); 252 band->tail_md_ppa = wptr->ppa; 253 254 return ftl_band_write_tail_md(band, band->md.dma_buf, ftl_md_write_cb); 255 } 256 257 static int 258 ftl_wptr_open_band(struct ftl_wptr *wptr) 259 { 260 struct ftl_band *band = wptr->band; 261 262 assert(ftl_band_chunk_is_first(band, wptr->chunk)); 263 assert(band->md.num_vld == 0); 264 265 ftl_band_clear_md(band); 266 267 assert(band->state == FTL_BAND_STATE_PREP); 268 ftl_band_set_state(band, FTL_BAND_STATE_OPENING); 269 270 return ftl_band_write_head_md(band, band->md.dma_buf, ftl_md_write_cb); 271 } 272 273 static int 274 ftl_submit_erase(struct ftl_io *io) 275 { 276 struct spdk_ftl_dev *dev = io->dev; 277 struct ftl_band *band = io->band; 278 struct ftl_ppa ppa = io->ppa; 279 struct ftl_chunk *chunk; 280 uint64_t ppa_packed; 281 int rc = 0; 282 size_t i; 283 284 for (i = 0; i < io->lbk_cnt; ++i) { 285 if (i != 0) { 286 chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); 287 assert(chunk->state == FTL_CHUNK_STATE_CLOSED || 288 chunk->state == FTL_CHUNK_STATE_VACANT); 289 ppa = chunk->start_ppa; 290 } 291 292 assert(ppa.lbk == 0); 293 ppa_packed = ftl_ppa_addr_pack(dev, ppa); 294 295 ftl_trace_submission(dev, io, ppa, 1); 296 rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), 297 &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); 298 if (rc) { 299 ftl_io_fail(io, rc); 300 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); 301 break; 302 } 303 304 ftl_io_inc_req(io); 305 ftl_io_advance(io, 1); 306 } 307 308 if (ftl_io_done(io)) { 309 ftl_io_complete(io); 310 } 311 312 return rc; 313 } 314 315 static void 316 _ftl_io_erase(void *ctx) 317 { 318 ftl_io_erase((struct ftl_io *)ctx); 319 } 320 321 static bool 322 ftl_check_core_thread(const struct spdk_ftl_dev *dev) 323 { 324 return dev->core_thread.thread == spdk_get_thread(); 325 } 326 327 static bool 328 ftl_check_read_thread(const struct spdk_ftl_dev *dev) 329 { 330 return dev->read_thread.thread == spdk_get_thread(); 331 } 332 333 int 334 ftl_io_erase(struct ftl_io *io) 335 { 336 struct spdk_ftl_dev *dev = io->dev; 337 338 if (ftl_check_core_thread(dev)) { 339 return ftl_submit_erase(io); 340 } 341 342 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); 343 return 0; 344 } 345 346 static struct ftl_band * 347 ftl_next_write_band(struct spdk_ftl_dev *dev) 348 { 349 struct ftl_band *band; 350 351 band = LIST_FIRST(&dev->free_bands); 352 if (!band) { 353 return NULL; 354 } 355 assert(band->state == FTL_BAND_STATE_FREE); 356 357 if (ftl_band_erase(band)) { 358 /* TODO: handle erase failure */ 359 return NULL; 360 } 361 362 return band; 363 } 364 365 static struct ftl_band * 366 ftl_next_wptr_band(struct spdk_ftl_dev *dev) 367 { 368 struct ftl_band *band; 369 370 if (!dev->next_band) { 371 band = ftl_next_write_band(dev); 372 } else { 373 assert(dev->next_band->state == FTL_BAND_STATE_PREP); 374 band = dev->next_band; 375 dev->next_band = NULL; 376 } 377 378 return band; 379 } 380 381 static struct ftl_wptr * 382 ftl_wptr_init(struct ftl_band *band) 383 { 384 struct spdk_ftl_dev *dev = band->dev; 385 struct ftl_wptr *wptr; 386 387 wptr = calloc(1, sizeof(*wptr)); 388 if (!wptr) { 389 return NULL; 390 } 391 392 wptr->dev = dev; 393 wptr->band = band; 394 wptr->chunk = CIRCLEQ_FIRST(&band->chunks); 395 wptr->ppa = wptr->chunk->start_ppa; 396 397 return wptr; 398 } 399 400 static int 401 ftl_add_wptr(struct spdk_ftl_dev *dev) 402 { 403 struct ftl_band *band; 404 struct ftl_wptr *wptr; 405 406 band = ftl_next_wptr_band(dev); 407 if (!band) { 408 return -1; 409 } 410 411 wptr = ftl_wptr_init(band); 412 if (!wptr) { 413 return -1; 414 } 415 416 if (ftl_band_write_prep(band)) { 417 ftl_wptr_free(wptr); 418 return -1; 419 } 420 421 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 422 423 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); 424 ftl_trace_write_band(dev, band); 425 return 0; 426 } 427 428 static void 429 ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) 430 { 431 struct ftl_band *band = wptr->band; 432 struct spdk_ftl_dev *dev = wptr->dev; 433 struct spdk_ftl_conf *conf = &dev->conf; 434 size_t next_thld; 435 436 wptr->offset += xfer_size; 437 next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; 438 439 if (ftl_band_full(band, wptr->offset)) { 440 ftl_band_set_state(band, FTL_BAND_STATE_FULL); 441 } 442 443 wptr->chunk->busy = true; 444 wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); 445 wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); 446 447 assert(!ftl_ppa_invalid(wptr->ppa)); 448 449 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", 450 wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); 451 452 if (wptr->offset >= next_thld && !dev->next_band) { 453 dev->next_band = ftl_next_write_band(dev); 454 } 455 } 456 457 static int 458 ftl_wptr_ready(struct ftl_wptr *wptr) 459 { 460 struct ftl_band *band = wptr->band; 461 462 /* TODO: add handling of empty bands */ 463 464 if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { 465 /* Erasing band may fail after it was assigned to wptr. */ 466 if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { 467 ftl_wptr_advance(wptr, wptr->dev->xfer_size); 468 } 469 return 0; 470 } 471 472 /* If we're in the process of writing metadata, wait till it is */ 473 /* completed. */ 474 /* TODO: we should probably change bands once we're writing tail md */ 475 if (ftl_band_state_changing(band)) { 476 return 0; 477 } 478 479 if (band->state == FTL_BAND_STATE_FULL) { 480 if (ftl_wptr_close_band(wptr)) { 481 /* TODO: need recovery here */ 482 assert(false); 483 } 484 return 0; 485 } 486 487 if (band->state != FTL_BAND_STATE_OPEN) { 488 if (ftl_wptr_open_band(wptr)) { 489 /* TODO: need recovery here */ 490 assert(false); 491 } 492 return 0; 493 } 494 495 return 1; 496 } 497 498 static const struct spdk_ftl_limit * 499 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 500 { 501 assert(type < SPDK_FTL_LIMIT_MAX); 502 return &dev->conf.defrag.limits[type]; 503 } 504 505 static bool 506 ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 507 { 508 struct ftl_ppa ppa; 509 510 /* If the LBA is invalid don't bother checking the md and l2p */ 511 if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { 512 return false; 513 } 514 515 ppa = ftl_l2p_get(dev, entry->lba); 516 if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { 517 return false; 518 } 519 520 return true; 521 } 522 523 static void 524 ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 525 { 526 pthread_spin_lock(&entry->lock); 527 528 if (!ftl_rwb_entry_valid(entry)) { 529 goto unlock; 530 } 531 532 /* If the l2p wasn't updated and still points at the entry, fill it with the */ 533 /* on-disk PPA and clear the cache status bit. Otherwise, skip the l2p update */ 534 /* and just clear the cache status. */ 535 if (!ftl_cache_lba_valid(dev, entry)) { 536 goto clear; 537 } 538 539 ftl_l2p_set(dev, entry->lba, entry->ppa); 540 clear: 541 ftl_rwb_entry_invalidate(entry); 542 unlock: 543 pthread_spin_unlock(&entry->lock); 544 } 545 546 static struct ftl_rwb_entry * 547 ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) 548 { 549 struct ftl_rwb_entry *entry; 550 551 entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); 552 if (!entry) { 553 return NULL; 554 } 555 556 ftl_evict_cache_entry(dev, entry); 557 558 entry->flags = flags; 559 return entry; 560 } 561 562 static void 563 ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) 564 { 565 struct ftl_rwb_entry *entry; 566 int flags = FTL_IO_PAD | FTL_IO_INTERNAL; 567 568 for (size_t i = 0; i < size; ++i) { 569 entry = ftl_acquire_entry(dev, flags); 570 if (!entry) { 571 break; 572 } 573 574 entry->lba = FTL_LBA_INVALID; 575 entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); 576 memset(entry->data, 0, FTL_BLOCK_SIZE); 577 ftl_rwb_push(entry); 578 } 579 } 580 581 static void 582 ftl_remove_free_bands(struct spdk_ftl_dev *dev) 583 { 584 while (!LIST_EMPTY(&dev->free_bands)) { 585 LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); 586 } 587 588 dev->next_band = NULL; 589 } 590 591 static void 592 ftl_process_shutdown(struct spdk_ftl_dev *dev) 593 { 594 size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + 595 ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); 596 597 if (size >= dev->xfer_size) { 598 return; 599 } 600 601 /* If we reach this point we need to remove free bands */ 602 /* and pad current wptr band to the end */ 603 ftl_remove_free_bands(dev); 604 605 /* Pad write buffer until band is full */ 606 ftl_rwb_pad(dev, dev->xfer_size - size); 607 } 608 609 static int 610 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 611 { 612 return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && 613 LIST_EMPTY(&dev->wptr_list); 614 } 615 616 void 617 ftl_apply_limits(struct spdk_ftl_dev *dev) 618 { 619 const struct spdk_ftl_limit *limit; 620 struct ftl_stats *stats = &dev->stats; 621 size_t rwb_limit[FTL_RWB_TYPE_MAX]; 622 int i; 623 624 ftl_rwb_get_limits(dev->rwb, rwb_limit); 625 626 /* Clear existing limit */ 627 dev->limit = SPDK_FTL_LIMIT_MAX; 628 629 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 630 limit = ftl_get_limit(dev, i); 631 632 if (dev->num_free <= limit->thld) { 633 rwb_limit[FTL_RWB_TYPE_USER] = 634 (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; 635 stats->limits[i]++; 636 dev->limit = i; 637 goto apply; 638 } 639 } 640 641 /* Clear the limits, since we don't need to apply them anymore */ 642 rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); 643 apply: 644 ftl_trace_limits(dev, rwb_limit, dev->num_free); 645 ftl_rwb_set_limits(dev->rwb, rwb_limit); 646 } 647 648 static int 649 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 650 { 651 struct ftl_band *band = ftl_band_from_ppa(dev, ppa); 652 struct ftl_md *md = &band->md; 653 uint64_t offset; 654 655 offset = ftl_band_lbkoff_from_ppa(band, ppa); 656 657 /* The bit might be already cleared if two writes are scheduled to the */ 658 /* same LBA at the same time */ 659 if (spdk_bit_array_get(md->vld_map, offset)) { 660 assert(md->num_vld > 0); 661 spdk_bit_array_clear(md->vld_map, offset); 662 md->num_vld--; 663 return 1; 664 } 665 666 return 0; 667 } 668 669 int 670 ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 671 { 672 struct ftl_band *band; 673 int rc; 674 675 assert(!ftl_ppa_cached(ppa)); 676 band = ftl_band_from_ppa(dev, ppa); 677 678 pthread_spin_lock(&band->md.lock); 679 rc = ftl_invalidate_addr_unlocked(dev, ppa); 680 pthread_spin_unlock(&band->md.lock); 681 682 return rc; 683 } 684 685 static int 686 ftl_read_retry(int rc) 687 { 688 return rc == -EAGAIN; 689 } 690 691 static int 692 ftl_read_canceled(int rc) 693 { 694 return rc == -EFAULT || rc == 0; 695 } 696 697 static void 698 ftl_add_to_retry_queue(struct ftl_io *io) 699 { 700 if (!(io->flags & FTL_IO_RETRY)) { 701 io->flags |= FTL_IO_RETRY; 702 TAILQ_INSERT_TAIL(&io->dev->retry_queue, io, retry_entry); 703 } 704 } 705 706 static int 707 ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa) 708 { 709 struct spdk_ftl_dev *dev = io->dev; 710 struct ftl_ppa ppa; 711 int rc = 0, lbk_cnt; 712 713 while (io->pos < io->lbk_cnt) { 714 /* We might hit the cache here, if so, skip the read */ 715 lbk_cnt = rc = next_ppa(io, &ppa); 716 717 /* We might need to retry the read from scratch (e.g. */ 718 /* because write was under way and completed before */ 719 /* we could read it from rwb */ 720 if (ftl_read_retry(rc)) { 721 continue; 722 } 723 724 /* We don't have to schedule the read, as it was read from cache */ 725 if (ftl_read_canceled(rc)) { 726 ftl_io_advance(io, 1); 727 ftl_trace_completion(io->dev, io, rc ? FTL_TRACE_COMPLETION_INVALID : 728 FTL_TRACE_COMPLETION_CACHE); 729 rc = 0; 730 continue; 731 } 732 733 assert(lbk_cnt > 0); 734 735 ftl_trace_submission(dev, io, ppa, lbk_cnt); 736 rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), 737 ftl_io_iovec_addr(io), 738 ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, 739 ftl_io_cmpl_cb, io, 0); 740 if (rc == -ENOMEM) { 741 ftl_add_to_retry_queue(io); 742 break; 743 } else if (rc) { 744 ftl_io_fail(io, rc); 745 break; 746 } 747 748 ftl_io_inc_req(io); 749 ftl_io_advance(io, lbk_cnt); 750 } 751 752 /* If we didn't have to read anything from the device, */ 753 /* complete the request right away */ 754 if (ftl_io_done(io)) { 755 ftl_io_complete(io); 756 } 757 758 return rc; 759 } 760 761 static int 762 ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, 763 struct ftl_ppa ppa, void *buf) 764 { 765 struct ftl_rwb *rwb = io->dev->rwb; 766 struct ftl_rwb_entry *entry; 767 struct ftl_ppa nppa; 768 int rc = 0; 769 770 entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); 771 pthread_spin_lock(&entry->lock); 772 773 nppa = ftl_l2p_get(io->dev, lba); 774 if (ppa.ppa != nppa.ppa) { 775 rc = -1; 776 goto out; 777 } 778 779 memcpy(buf, entry->data, FTL_BLOCK_SIZE); 780 out: 781 pthread_spin_unlock(&entry->lock); 782 return rc; 783 } 784 785 static int 786 ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa) 787 { 788 struct spdk_ftl_dev *dev = io->dev; 789 struct ftl_ppa next_ppa; 790 size_t i; 791 792 *ppa = ftl_l2p_get(dev, ftl_io_current_lba(io)); 793 794 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", 795 ppa->ppa, ftl_io_current_lba(io)); 796 797 /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ 798 if (ftl_ppa_invalid(*ppa)) { 799 return -EFAULT; 800 } 801 802 if (ftl_ppa_cached(*ppa)) { 803 if (!ftl_ppa_cache_read(io, ftl_io_current_lba(io), *ppa, ftl_io_iovec_addr(io))) { 804 return 0; 805 } 806 807 /* If the state changed, we have to re-read the l2p */ 808 return -EAGAIN; 809 } 810 811 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 812 next_ppa = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i)); 813 814 if (ftl_ppa_invalid(next_ppa) || ftl_ppa_cached(next_ppa)) { 815 break; 816 } 817 818 if (ftl_ppa_addr_pack(dev, *ppa) + i != ftl_ppa_addr_pack(dev, next_ppa)) { 819 break; 820 } 821 } 822 823 return i; 824 } 825 826 static void 827 ftl_complete_flush(struct ftl_flush *flush) 828 { 829 assert(flush->num_req == 0); 830 LIST_REMOVE(flush, list_entry); 831 832 flush->cb.fn(flush->cb.ctx, 0); 833 834 spdk_bit_array_free(&flush->bmap); 835 free(flush); 836 } 837 838 static void 839 ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) 840 { 841 struct ftl_flush *flush, *tflush; 842 size_t offset; 843 844 LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { 845 offset = ftl_rwb_batch_get_offset(batch); 846 847 if (spdk_bit_array_get(flush->bmap, offset)) { 848 spdk_bit_array_clear(flush->bmap, offset); 849 if (!(--flush->num_req)) { 850 ftl_complete_flush(flush); 851 } 852 } 853 } 854 } 855 856 static void 857 ftl_write_fail(struct ftl_io *io, int status) 858 { 859 struct ftl_rwb_batch *batch = io->rwb_batch; 860 struct spdk_ftl_dev *dev = io->dev; 861 struct ftl_rwb_entry *entry; 862 struct ftl_band *band; 863 char buf[128]; 864 865 entry = ftl_rwb_batch_first_entry(batch); 866 867 band = ftl_band_from_ppa(io->dev, entry->ppa); 868 SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", 869 ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); 870 871 /* Close the band and, halt wptr and defrag */ 872 ftl_halt_writes(dev, band); 873 874 ftl_rwb_foreach(entry, batch) { 875 /* Invalidate meta set by process_writes() */ 876 ftl_invalidate_addr(dev, entry->ppa); 877 } 878 879 /* Reset the batch back to the the RWB to resend it later */ 880 ftl_rwb_batch_revert(batch); 881 } 882 883 static void 884 ftl_write_cb(void *arg, int status) 885 { 886 struct ftl_io *io = arg; 887 struct spdk_ftl_dev *dev = io->dev; 888 struct ftl_rwb_batch *batch = io->rwb_batch; 889 struct ftl_rwb_entry *entry; 890 891 if (status) { 892 ftl_write_fail(io, status); 893 return; 894 } 895 896 assert(io->lbk_cnt == dev->xfer_size); 897 ftl_rwb_foreach(entry, batch) { 898 if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { 899 /* Verify that the LBA is set for user lbks */ 900 assert(entry->lba != FTL_LBA_INVALID); 901 } 902 903 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", 904 entry->ppa.ppa, entry->lba); 905 } 906 907 ftl_process_flush(dev, batch); 908 ftl_rwb_batch_release(batch); 909 } 910 911 static void 912 ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) 913 { 914 if (!ftl_rwb_entry_internal(entry)) { 915 dev->stats.write_user++; 916 } 917 dev->stats.write_total++; 918 } 919 920 static void 921 ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, 922 struct ftl_ppa ppa) 923 { 924 struct ftl_ppa prev_ppa; 925 struct ftl_rwb_entry *prev; 926 struct ftl_band *band; 927 int valid; 928 929 prev_ppa = ftl_l2p_get(dev, entry->lba); 930 if (ftl_ppa_invalid(prev_ppa)) { 931 ftl_l2p_set(dev, entry->lba, ppa); 932 return; 933 } 934 935 /* If the L2P's PPA is different than what we expected we don't need to */ 936 /* do anything (someone's already overwritten our data). */ 937 if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { 938 return; 939 } 940 941 if (ftl_ppa_cached(prev_ppa)) { 942 assert(!ftl_rwb_entry_weak(entry)); 943 prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); 944 pthread_spin_lock(&prev->lock); 945 946 /* Re-read the L2P under the lock to protect against updates */ 947 /* to this LBA from other threads */ 948 prev_ppa = ftl_l2p_get(dev, entry->lba); 949 950 /* If the entry is no longer in cache, another write has been */ 951 /* scheduled in the meantime, so we have to invalidate its LBA */ 952 if (!ftl_ppa_cached(prev_ppa)) { 953 ftl_invalidate_addr(dev, prev_ppa); 954 } 955 956 /* If previous entry is part of cache, remove and invalidate it */ 957 if (ftl_rwb_entry_valid(prev)) { 958 ftl_invalidate_addr(dev, prev->ppa); 959 ftl_rwb_entry_invalidate(prev); 960 } 961 962 ftl_l2p_set(dev, entry->lba, ppa); 963 pthread_spin_unlock(&prev->lock); 964 return; 965 } 966 967 /* Lock the band containing previous PPA. This assures atomic changes to */ 968 /* the L2P as wall as metadata. The valid bits in metadata are used to */ 969 /* check weak writes validity. */ 970 band = ftl_band_from_ppa(dev, prev_ppa); 971 pthread_spin_lock(&band->md.lock); 972 973 valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); 974 975 /* If the address has been invalidated already, we don't want to update */ 976 /* the L2P for weak writes, as it means the write is no longer valid. */ 977 if (!ftl_rwb_entry_weak(entry) || valid) { 978 ftl_l2p_set(dev, entry->lba, ppa); 979 } 980 981 pthread_spin_unlock(&band->md.lock); 982 } 983 984 static struct ftl_io * 985 ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa, 986 void *data, void *md, spdk_ftl_fn cb) 987 { 988 struct ftl_io *io; 989 struct spdk_ftl_dev *dev = parent->dev; 990 struct ftl_io_init_opts opts = { 991 .dev = dev, 992 .io = NULL, 993 .parent = parent, 994 .rwb_batch = NULL, 995 .band = parent->band, 996 .size = sizeof(struct ftl_io), 997 .flags = 0, 998 .type = FTL_IO_WRITE, 999 .iov_cnt = 1, 1000 .req_size = dev->xfer_size, 1001 .fn = cb, 1002 .data = data, 1003 .md = md, 1004 }; 1005 1006 io = ftl_io_init_internal(&opts); 1007 if (!io) { 1008 return NULL; 1009 } 1010 1011 io->ppa = ppa; 1012 1013 return io; 1014 } 1015 1016 static void 1017 ftl_io_child_write_cb(void *ctx, int status) 1018 { 1019 struct ftl_chunk *chunk; 1020 struct ftl_io *io = ctx; 1021 1022 chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); 1023 chunk->busy = false; 1024 } 1025 1026 static int 1027 ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt) 1028 { 1029 struct spdk_ftl_dev *dev = io->dev; 1030 struct ftl_io *child; 1031 struct iovec *iov = ftl_io_iovec(io); 1032 int rc; 1033 1034 /* Split IO to child requests and release chunk immediately after child is completed */ 1035 child = ftl_io_init_child_write(io, wptr->ppa, iov[io->iov_pos].iov_base, 1036 ftl_io_get_md(io), ftl_io_child_write_cb); 1037 if (!child) { 1038 return -EAGAIN; 1039 } 1040 1041 rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), 1042 ftl_io_iovec_addr(child), child->md, 1043 ftl_ppa_addr_pack(dev, wptr->ppa), 1044 lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0); 1045 if (rc) { 1046 ftl_io_fail(child, rc); 1047 ftl_io_complete(child); 1048 SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n", 1049 rc, wptr->ppa.ppa); 1050 1051 return -EIO; 1052 } 1053 1054 ftl_io_inc_req(child); 1055 ftl_io_advance(child, lbk_cnt); 1056 1057 return 0; 1058 } 1059 1060 static int 1061 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) 1062 { 1063 struct spdk_ftl_dev *dev = io->dev; 1064 struct iovec *iov = ftl_io_iovec(io); 1065 int rc = 0; 1066 size_t lbk_cnt; 1067 1068 while (io->iov_pos < io->iov_cnt) { 1069 lbk_cnt = iov[io->iov_pos].iov_len / PAGE_SIZE; 1070 assert(iov[io->iov_pos].iov_len > 0); 1071 assert(lbk_cnt == dev->xfer_size); 1072 1073 /* There are no guarantees of the order of completion of NVMe IO submission queue */ 1074 /* so wait until chunk is not busy before submitting another write */ 1075 if (wptr->chunk->busy) { 1076 wptr->current_io = io; 1077 rc = -EAGAIN; 1078 break; 1079 } 1080 1081 rc = ftl_submit_child_write(wptr, io, lbk_cnt); 1082 1083 if (rc == -EAGAIN) { 1084 wptr->current_io = io; 1085 break; 1086 } else if (rc) { 1087 ftl_io_fail(io, rc); 1088 break; 1089 } 1090 1091 ftl_trace_submission(dev, io, wptr->ppa, lbk_cnt); 1092 1093 /* Update parent iovec */ 1094 ftl_io_advance(io, lbk_cnt); 1095 1096 ftl_wptr_advance(wptr, lbk_cnt); 1097 } 1098 1099 if (ftl_io_done(io)) { 1100 /* Parent IO will complete after all children are completed */ 1101 ftl_io_complete(io); 1102 } 1103 1104 return rc; 1105 } 1106 1107 static void 1108 ftl_flush_pad_batch(struct spdk_ftl_dev *dev) 1109 { 1110 struct ftl_rwb *rwb = dev->rwb; 1111 size_t size; 1112 1113 size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + 1114 ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); 1115 1116 /* There must be something in the RWB, otherwise the flush */ 1117 /* wouldn't be waiting for anything */ 1118 assert(size > 0); 1119 1120 /* Only add padding when there's less than xfer size */ 1121 /* entries in the buffer. Otherwise we just have to wait */ 1122 /* for the entries to become ready. */ 1123 if (size < dev->xfer_size) { 1124 ftl_rwb_pad(dev, dev->xfer_size - (size % dev->xfer_size)); 1125 } 1126 } 1127 1128 static int 1129 ftl_wptr_process_writes(struct ftl_wptr *wptr) 1130 { 1131 struct spdk_ftl_dev *dev = wptr->dev; 1132 struct ftl_rwb_batch *batch; 1133 struct ftl_rwb_entry *entry; 1134 struct ftl_io *io; 1135 struct ftl_ppa ppa, prev_ppa; 1136 1137 if (wptr->current_io) { 1138 if (ftl_submit_write(wptr, wptr->current_io) == -EAGAIN) { 1139 return 0; 1140 } 1141 wptr->current_io = NULL; 1142 } 1143 1144 /* Make sure the band is prepared for writing */ 1145 if (!ftl_wptr_ready(wptr)) { 1146 return 0; 1147 } 1148 1149 if (dev->halt) { 1150 ftl_process_shutdown(dev); 1151 } 1152 1153 batch = ftl_rwb_pop(dev->rwb); 1154 if (!batch) { 1155 /* If there are queued flush requests we need to pad the RWB to */ 1156 /* force out remaining entries */ 1157 if (!LIST_EMPTY(&dev->flush_list)) { 1158 ftl_flush_pad_batch(dev); 1159 } 1160 1161 return 0; 1162 } 1163 1164 io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); 1165 if (!io) { 1166 goto error; 1167 } 1168 1169 ppa = wptr->ppa; 1170 ftl_rwb_foreach(entry, batch) { 1171 entry->ppa = ppa; 1172 1173 if (entry->lba != FTL_LBA_INVALID) { 1174 pthread_spin_lock(&entry->lock); 1175 prev_ppa = ftl_l2p_get(dev, entry->lba); 1176 1177 /* If the l2p was updated in the meantime, don't update band's metadata */ 1178 if (ftl_ppa_cached(prev_ppa) && prev_ppa.offset == entry->pos) { 1179 /* Setting entry's cache bit needs to be done after metadata */ 1180 /* within the band is updated to make sure that writes */ 1181 /* invalidating the entry clear the metadata as well */ 1182 ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); 1183 ftl_rwb_entry_set_valid(entry); 1184 } 1185 pthread_spin_unlock(&entry->lock); 1186 } 1187 1188 ftl_trace_rwb_pop(dev, entry); 1189 ftl_update_rwb_stats(dev, entry); 1190 1191 ppa = ftl_band_next_ppa(wptr->band, ppa, 1); 1192 } 1193 1194 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, 1195 ftl_ppa_addr_pack(dev, wptr->ppa)); 1196 1197 if (ftl_submit_write(wptr, io)) { 1198 /* TODO: we need some recovery here */ 1199 assert(0 && "Write submit failed"); 1200 if (ftl_io_done(io)) { 1201 ftl_io_free(io); 1202 } 1203 } 1204 1205 return dev->xfer_size; 1206 error: 1207 ftl_rwb_batch_revert(batch); 1208 return 0; 1209 } 1210 1211 static int 1212 ftl_process_writes(struct spdk_ftl_dev *dev) 1213 { 1214 struct ftl_wptr *wptr, *twptr; 1215 size_t num_active = 0; 1216 enum ftl_band_state state; 1217 1218 LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { 1219 ftl_wptr_process_writes(wptr); 1220 state = wptr->band->state; 1221 1222 if (state != FTL_BAND_STATE_FULL && 1223 state != FTL_BAND_STATE_CLOSING && 1224 state != FTL_BAND_STATE_CLOSED) { 1225 num_active++; 1226 } 1227 } 1228 1229 if (num_active < 1) { 1230 ftl_add_wptr(dev); 1231 } 1232 1233 return 0; 1234 } 1235 1236 static void 1237 ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) 1238 { 1239 struct ftl_band *band; 1240 1241 memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); 1242 1243 if (ftl_rwb_entry_weak(entry)) { 1244 band = ftl_band_from_ppa(io->dev, io->ppa); 1245 entry->ppa = ftl_band_next_ppa(band, io->ppa, io->pos); 1246 } 1247 1248 entry->trace = io->trace; 1249 1250 if (entry->md) { 1251 memcpy(entry->md, &entry->lba, sizeof(entry->lba)); 1252 } 1253 } 1254 1255 static int 1256 ftl_rwb_fill(struct ftl_io *io) 1257 { 1258 struct spdk_ftl_dev *dev = io->dev; 1259 struct ftl_rwb_entry *entry; 1260 struct ftl_ppa ppa = { .cached = 1 }; 1261 int flags = ftl_rwb_flags_from_io(io); 1262 uint64_t lba; 1263 1264 while (io->pos < io->lbk_cnt) { 1265 lba = ftl_io_current_lba(io); 1266 if (lba == FTL_LBA_INVALID) { 1267 ftl_io_advance(io, 1); 1268 continue; 1269 } 1270 1271 entry = ftl_acquire_entry(dev, flags); 1272 if (!entry) { 1273 return -EAGAIN; 1274 } 1275 1276 entry->lba = lba; 1277 ftl_rwb_entry_fill(entry, io); 1278 1279 ppa.offset = entry->pos; 1280 1281 ftl_trace_rwb_fill(dev, io); 1282 ftl_io_advance(io, 1); 1283 ftl_update_l2p(dev, entry, ppa); 1284 1285 /* Needs to be done after L2P is updated to avoid race with */ 1286 /* write completion callback when it's processed faster than */ 1287 /* L2P is set in update_l2p(). */ 1288 ftl_rwb_push(entry); 1289 } 1290 1291 ftl_io_complete(io); 1292 return 0; 1293 } 1294 1295 static bool 1296 ftl_dev_needs_defrag(struct spdk_ftl_dev *dev) 1297 { 1298 const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 1299 1300 if (ftl_reloc_is_halted(dev->reloc)) { 1301 return false; 1302 } 1303 1304 if (dev->df_band) { 1305 return false; 1306 } 1307 1308 if (dev->num_free <= limit->thld) { 1309 return true; 1310 } 1311 1312 return false; 1313 } 1314 1315 static double 1316 ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid) 1317 { 1318 size_t usable, valid, invalid; 1319 double vld_ratio; 1320 1321 /* If the band doesn't have any usable lbks it's of no use */ 1322 usable = ftl_band_num_usable_lbks(band); 1323 if (usable == 0) { 1324 return 0.0; 1325 } 1326 1327 valid = threshold_valid ? (usable - *threshold_valid) : band->md.num_vld; 1328 invalid = usable - valid; 1329 1330 /* Add one to avoid division by 0 */ 1331 vld_ratio = (double)invalid / (double)(valid + 1); 1332 return vld_ratio * ftl_band_age(band); 1333 } 1334 1335 static bool 1336 ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev) 1337 { 1338 struct spdk_ftl_conf *conf = &dev->conf; 1339 size_t thld_vld; 1340 1341 /* If we're in dire need of free bands, every band is worth defragging */ 1342 if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) { 1343 return true; 1344 } 1345 1346 thld_vld = (ftl_band_num_usable_lbks(band) * conf->defrag.invalid_thld) / 100; 1347 1348 return band->merit > ftl_band_calc_merit(band, &thld_vld); 1349 } 1350 1351 static struct ftl_band * 1352 ftl_select_defrag_band(struct spdk_ftl_dev *dev) 1353 { 1354 struct ftl_band *band, *mband = NULL; 1355 double merit = 0; 1356 1357 LIST_FOREACH(band, &dev->shut_bands, list_entry) { 1358 assert(band->state == FTL_BAND_STATE_CLOSED); 1359 band->merit = ftl_band_calc_merit(band, NULL); 1360 if (band->merit > merit) { 1361 merit = band->merit; 1362 mband = band; 1363 } 1364 } 1365 1366 if (mband && !ftl_band_needs_defrag(mband, dev)) { 1367 mband = NULL; 1368 } 1369 1370 return mband; 1371 } 1372 1373 static void 1374 ftl_process_relocs(struct spdk_ftl_dev *dev) 1375 { 1376 struct ftl_band *band; 1377 1378 if (ftl_dev_needs_defrag(dev)) { 1379 band = dev->df_band = ftl_select_defrag_band(dev); 1380 1381 if (band) { 1382 ftl_reloc_add(dev->reloc, band, 0, ftl_num_band_lbks(dev), 0); 1383 ftl_trace_defrag_band(dev, band); 1384 } 1385 } 1386 1387 ftl_reloc(dev->reloc); 1388 } 1389 1390 int 1391 ftl_current_limit(const struct spdk_ftl_dev *dev) 1392 { 1393 return dev->limit; 1394 } 1395 1396 void 1397 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 1398 { 1399 attrs->uuid = dev->uuid; 1400 attrs->lbk_cnt = dev->num_lbas; 1401 attrs->lbk_size = FTL_BLOCK_SIZE; 1402 attrs->range = dev->range; 1403 attrs->cache_bdev_desc = dev->cache_bdev_desc; 1404 } 1405 1406 static void 1407 _ftl_io_write(void *ctx) 1408 { 1409 ftl_io_write((struct ftl_io *)ctx); 1410 } 1411 1412 int 1413 ftl_io_write(struct ftl_io *io) 1414 { 1415 struct spdk_ftl_dev *dev = io->dev; 1416 1417 /* For normal IOs we just need to copy the data onto the rwb */ 1418 if (!(io->flags & FTL_IO_MD)) { 1419 return ftl_rwb_fill(io); 1420 } 1421 1422 /* Metadata has its own buffer, so it doesn't have to be copied, so just */ 1423 /* send it the the core thread and schedule the write immediately */ 1424 if (ftl_check_core_thread(dev)) { 1425 return ftl_submit_write(ftl_wptr_from_band(io->band), io); 1426 } 1427 1428 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); 1429 1430 return 0; 1431 } 1432 1433 static int 1434 _spdk_ftl_write(struct ftl_io *io) 1435 { 1436 int rc; 1437 1438 rc = ftl_io_write(io); 1439 if (rc == -EAGAIN) { 1440 spdk_thread_send_msg(spdk_io_channel_get_thread(io->ioch), 1441 _ftl_write, io); 1442 return 0; 1443 } 1444 1445 if (rc) { 1446 ftl_io_free(io); 1447 } 1448 1449 return rc; 1450 } 1451 1452 static void 1453 _ftl_write(void *ctx) 1454 { 1455 _spdk_ftl_write(ctx); 1456 } 1457 1458 int 1459 spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1460 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1461 { 1462 struct ftl_io *io; 1463 1464 if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { 1465 return -EINVAL; 1466 } 1467 1468 if (lba_cnt == 0) { 1469 return -EINVAL; 1470 } 1471 1472 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1473 return -EINVAL; 1474 } 1475 1476 if (!dev->initialized) { 1477 return -EBUSY; 1478 } 1479 1480 io = ftl_io_alloc(ch); 1481 if (!io) { 1482 return -ENOMEM; 1483 } 1484 1485 ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 1486 return _spdk_ftl_write(io); 1487 } 1488 1489 int 1490 ftl_io_read(struct ftl_io *io) 1491 { 1492 struct spdk_ftl_dev *dev = io->dev; 1493 ftl_next_ppa_fn next_ppa; 1494 1495 if (ftl_check_read_thread(dev)) { 1496 if (ftl_io_mode_ppa(io)) { 1497 next_ppa = ftl_ppa_read_next_ppa; 1498 } else { 1499 next_ppa = ftl_lba_read_next_ppa; 1500 } 1501 1502 return ftl_submit_read(io, next_ppa); 1503 } 1504 1505 spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_read, io); 1506 return 0; 1507 } 1508 1509 static void 1510 _ftl_read(void *arg) 1511 { 1512 ftl_io_read((struct ftl_io *)arg); 1513 } 1514 1515 int 1516 spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1517 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1518 { 1519 struct ftl_io *io; 1520 1521 if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { 1522 return -EINVAL; 1523 } 1524 1525 if (lba_cnt == 0) { 1526 return -EINVAL; 1527 } 1528 1529 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1530 return -EINVAL; 1531 } 1532 1533 if (!dev->initialized) { 1534 return -EBUSY; 1535 } 1536 1537 io = ftl_io_alloc(ch); 1538 if (!io) { 1539 return -ENOMEM; 1540 } 1541 1542 ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 1543 ftl_io_read(io); 1544 return 0; 1545 } 1546 1547 static struct ftl_flush * 1548 ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1549 { 1550 struct ftl_flush *flush; 1551 struct ftl_rwb *rwb = dev->rwb; 1552 1553 flush = calloc(1, sizeof(*flush)); 1554 if (!flush) { 1555 return NULL; 1556 } 1557 1558 flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); 1559 if (!flush->bmap) { 1560 goto error; 1561 } 1562 1563 flush->dev = dev; 1564 flush->cb.fn = cb_fn; 1565 flush->cb.ctx = cb_arg; 1566 1567 return flush; 1568 error: 1569 free(flush); 1570 return NULL; 1571 } 1572 1573 static void 1574 _ftl_flush(void *ctx) 1575 { 1576 struct ftl_flush *flush = ctx; 1577 struct spdk_ftl_dev *dev = flush->dev; 1578 struct ftl_rwb *rwb = dev->rwb; 1579 struct ftl_rwb_batch *batch; 1580 1581 /* Attach flush object to all non-empty batches */ 1582 ftl_rwb_foreach_batch(batch, rwb) { 1583 if (!ftl_rwb_batch_empty(batch)) { 1584 spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); 1585 flush->num_req++; 1586 } 1587 } 1588 1589 LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); 1590 1591 /* If the RWB was already empty, the flush can be completed right away */ 1592 if (!flush->num_req) { 1593 ftl_complete_flush(flush); 1594 } 1595 } 1596 1597 int 1598 spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1599 { 1600 struct ftl_flush *flush; 1601 1602 if (!dev->initialized) { 1603 return -EBUSY; 1604 } 1605 1606 flush = ftl_flush_init(dev, cb_fn, cb_arg); 1607 if (!flush) { 1608 return -ENOMEM; 1609 } 1610 1611 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); 1612 return 0; 1613 } 1614 1615 void 1616 ftl_process_anm_event(struct ftl_anm_event *event) 1617 { 1618 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Unconsumed ANM received for dev: %p...\n", event->dev); 1619 ftl_anm_event_complete(event); 1620 } 1621 1622 static void 1623 ftl_process_retry_queue(struct spdk_ftl_dev *dev) 1624 { 1625 struct ftl_io *io; 1626 int rc; 1627 1628 while (!TAILQ_EMPTY(&dev->retry_queue)) { 1629 io = TAILQ_FIRST(&dev->retry_queue); 1630 1631 /* Retry only if IO is still healthy */ 1632 if (spdk_likely(io->status == 0)) { 1633 rc = ftl_io_read(io); 1634 if (rc == -ENOMEM) { 1635 break; 1636 } 1637 } 1638 1639 io->flags &= ~FTL_IO_RETRY; 1640 TAILQ_REMOVE(&dev->retry_queue, io, retry_entry); 1641 1642 if (ftl_io_done(io)) { 1643 ftl_io_complete(io); 1644 } 1645 } 1646 } 1647 1648 int 1649 ftl_task_read(void *ctx) 1650 { 1651 struct ftl_thread *thread = ctx; 1652 struct spdk_ftl_dev *dev = thread->dev; 1653 struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); 1654 size_t num_completed; 1655 1656 if (dev->halt) { 1657 if (ftl_shutdown_complete(dev)) { 1658 spdk_poller_unregister(&thread->poller); 1659 return 0; 1660 } 1661 } 1662 1663 num_completed = spdk_nvme_qpair_process_completions(qpair, 0); 1664 1665 if (num_completed && !TAILQ_EMPTY(&dev->retry_queue)) { 1666 ftl_process_retry_queue(dev); 1667 } 1668 1669 return num_completed; 1670 } 1671 1672 int 1673 ftl_task_core(void *ctx) 1674 { 1675 struct ftl_thread *thread = ctx; 1676 struct spdk_ftl_dev *dev = thread->dev; 1677 struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); 1678 1679 if (dev->halt) { 1680 if (ftl_shutdown_complete(dev)) { 1681 spdk_poller_unregister(&thread->poller); 1682 return 0; 1683 } 1684 } 1685 1686 ftl_process_writes(dev); 1687 spdk_nvme_qpair_process_completions(qpair, 0); 1688 ftl_process_relocs(dev); 1689 1690 return 0; 1691 } 1692 1693 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) 1694