1 /*- 2 * BSD LICENSE 3 * 4 * Copyright (c) Intel Corporation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of Intel Corporation nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include "spdk/likely.h" 35 #include "spdk/stdinc.h" 36 #include "spdk/nvme.h" 37 #include "spdk/io_channel.h" 38 #include "spdk/bdev_module.h" 39 #include "spdk_internal/log.h" 40 #include "spdk/ftl.h" 41 42 #include "ftl_core.h" 43 #include "ftl_band.h" 44 #include "ftl_io.h" 45 #include "ftl_anm.h" 46 #include "ftl_rwb.h" 47 #include "ftl_debug.h" 48 #include "ftl_reloc.h" 49 50 /* Max number of iovecs */ 51 #define FTL_MAX_IOV 1024 52 53 struct ftl_wptr { 54 /* Owner device */ 55 struct spdk_ftl_dev *dev; 56 57 /* Current PPA */ 58 struct ftl_ppa ppa; 59 60 /* Band currently being written to */ 61 struct ftl_band *band; 62 63 /* Current logical block's offset */ 64 uint64_t offset; 65 66 /* Current erase block */ 67 struct ftl_chunk *chunk; 68 69 /* IO that is currently processed */ 70 struct ftl_io *current_io; 71 72 /* List link */ 73 LIST_ENTRY(ftl_wptr) list_entry; 74 }; 75 76 struct ftl_flush { 77 /* Owner device */ 78 struct spdk_ftl_dev *dev; 79 80 /* Number of batches to wait for */ 81 size_t num_req; 82 83 /* Callback */ 84 struct ftl_cb cb; 85 86 /* Batch bitmap */ 87 struct spdk_bit_array *bmap; 88 89 /* List link */ 90 LIST_ENTRY(ftl_flush) list_entry; 91 }; 92 93 typedef int (*ftl_next_ppa_fn)(struct ftl_io *, struct ftl_ppa *, size_t, void *); 94 static void _ftl_read(void *); 95 static void _ftl_write(void *); 96 97 static int 98 ftl_rwb_flags_from_io(const struct ftl_io *io) 99 { 100 int valid_flags = FTL_IO_INTERNAL | FTL_IO_WEAK | FTL_IO_PAD; 101 return io->flags & valid_flags; 102 } 103 104 static int 105 ftl_rwb_entry_weak(const struct ftl_rwb_entry *entry) 106 { 107 return entry->flags & FTL_IO_WEAK; 108 } 109 110 static void 111 ftl_wptr_free(struct ftl_wptr *wptr) 112 { 113 if (!wptr) { 114 return; 115 } 116 117 free(wptr); 118 } 119 120 static void 121 ftl_remove_wptr(struct ftl_wptr *wptr) 122 { 123 LIST_REMOVE(wptr, list_entry); 124 ftl_wptr_free(wptr); 125 } 126 127 static void 128 ftl_io_cmpl_cb(void *arg, const struct spdk_nvme_cpl *status) 129 { 130 struct ftl_io *io = arg; 131 132 if (spdk_nvme_cpl_is_error(status)) { 133 ftl_io_process_error(io, status); 134 } 135 136 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_DISK); 137 138 ftl_io_dec_req(io); 139 140 if (ftl_io_done(io)) { 141 ftl_io_complete(io); 142 } 143 } 144 145 static void 146 ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band) 147 { 148 struct ftl_wptr *wptr = NULL; 149 150 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 151 if (wptr->band == band) { 152 break; 153 } 154 } 155 156 /* If the band already has the high_prio flag set, other writes must */ 157 /* have failed earlier, so it's already taken care of. */ 158 if (band->high_prio) { 159 assert(wptr == NULL); 160 return; 161 } 162 163 ftl_band_write_failed(band); 164 ftl_remove_wptr(wptr); 165 } 166 167 static struct ftl_wptr * 168 ftl_wptr_from_band(struct ftl_band *band) 169 { 170 struct spdk_ftl_dev *dev = band->dev; 171 struct ftl_wptr *wptr = NULL; 172 173 LIST_FOREACH(wptr, &dev->wptr_list, list_entry) { 174 if (wptr->band == band) { 175 return wptr; 176 } 177 } 178 179 return NULL; 180 } 181 182 static void 183 ftl_md_write_fail(struct ftl_io *io, int status) 184 { 185 struct ftl_band *band = io->band; 186 struct ftl_wptr *wptr; 187 char buf[128]; 188 189 wptr = ftl_wptr_from_band(band); 190 191 SPDK_ERRLOG("Metadata write failed @ppa: %s, status: %d\n", 192 ftl_ppa2str(wptr->ppa, buf, sizeof(buf)), status); 193 194 ftl_halt_writes(io->dev, band); 195 } 196 197 static void 198 ftl_md_write_cb(void *arg, int status) 199 { 200 struct ftl_io *io = arg; 201 struct ftl_wptr *wptr; 202 203 wptr = ftl_wptr_from_band(io->band); 204 205 if (status) { 206 ftl_md_write_fail(io, status); 207 return; 208 } 209 210 ftl_band_set_next_state(io->band); 211 if (io->band->state == FTL_BAND_STATE_CLOSED) { 212 ftl_remove_wptr(wptr); 213 } 214 } 215 216 static int 217 ftl_ppa_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, 218 size_t lbk, void *ctx) 219 { 220 struct spdk_ftl_dev *dev = io->dev; 221 size_t lbk_cnt, max_lbks; 222 223 assert(ftl_io_mode_ppa(io)); 224 assert(io->iov_pos < io->iov_cnt); 225 226 if (lbk == 0) { 227 *ppa = io->ppa; 228 } else { 229 *ppa = ftl_band_next_xfer_ppa(io->band, io->ppa, lbk); 230 } 231 232 assert(!ftl_ppa_invalid(*ppa)); 233 234 /* Metadata has to be read in the way it's written (jumping across */ 235 /* the chunks in xfer_size increments) */ 236 if (io->flags & FTL_IO_MD) { 237 max_lbks = dev->xfer_size - (ppa->lbk % dev->xfer_size); 238 lbk_cnt = spdk_min(ftl_io_iovec_len_left(io), max_lbks); 239 assert(ppa->lbk / dev->xfer_size == (ppa->lbk + lbk_cnt - 1) / dev->xfer_size); 240 } else { 241 lbk_cnt = ftl_io_iovec_len_left(io); 242 } 243 244 return lbk_cnt; 245 } 246 247 static int 248 ftl_wptr_close_band(struct ftl_wptr *wptr) 249 { 250 struct ftl_band *band = wptr->band; 251 252 ftl_band_set_state(band, FTL_BAND_STATE_CLOSING); 253 band->tail_md_ppa = wptr->ppa; 254 255 return ftl_band_write_tail_md(band, band->md.dma_buf, ftl_md_write_cb); 256 } 257 258 static int 259 ftl_wptr_open_band(struct ftl_wptr *wptr) 260 { 261 struct ftl_band *band = wptr->band; 262 263 assert(ftl_band_chunk_is_first(band, wptr->chunk)); 264 assert(band->md.num_vld == 0); 265 266 ftl_band_clear_md(band); 267 268 assert(band->state == FTL_BAND_STATE_PREP); 269 ftl_band_set_state(band, FTL_BAND_STATE_OPENING); 270 271 return ftl_band_write_head_md(band, band->md.dma_buf, ftl_md_write_cb); 272 } 273 274 static int 275 ftl_submit_erase(struct ftl_io *io) 276 { 277 struct spdk_ftl_dev *dev = io->dev; 278 struct ftl_band *band = io->band; 279 struct ftl_ppa ppa = io->ppa; 280 struct ftl_chunk *chunk; 281 uint64_t ppa_packed; 282 int rc = 0; 283 size_t i; 284 285 for (i = 0; i < io->lbk_cnt; ++i) { 286 if (i != 0) { 287 chunk = ftl_band_next_chunk(band, ftl_band_chunk_from_ppa(band, ppa)); 288 assert(chunk->state == FTL_CHUNK_STATE_CLOSED || 289 chunk->state == FTL_CHUNK_STATE_VACANT); 290 ppa = chunk->start_ppa; 291 } 292 293 assert(ppa.lbk == 0); 294 ppa_packed = ftl_ppa_addr_pack(dev, ppa); 295 296 ftl_trace_submission(dev, io, ppa, 1); 297 rc = spdk_nvme_ocssd_ns_cmd_vector_reset(dev->ns, ftl_get_write_qpair(dev), 298 &ppa_packed, 1, NULL, ftl_io_cmpl_cb, io); 299 if (rc) { 300 ftl_io_fail(io, rc); 301 SPDK_ERRLOG("Vector reset failed with status: %d\n", rc); 302 break; 303 } 304 305 ftl_io_inc_req(io); 306 ftl_io_advance(io, 1); 307 } 308 309 if (ftl_io_done(io)) { 310 ftl_io_complete(io); 311 } 312 313 return rc; 314 } 315 316 static void 317 _ftl_io_erase(void *ctx) 318 { 319 ftl_io_erase((struct ftl_io *)ctx); 320 } 321 322 static bool 323 ftl_check_core_thread(const struct spdk_ftl_dev *dev) 324 { 325 return dev->core_thread.thread == spdk_get_thread(); 326 } 327 328 static bool 329 ftl_check_read_thread(const struct spdk_ftl_dev *dev) 330 { 331 return dev->read_thread.thread == spdk_get_thread(); 332 } 333 334 int 335 ftl_io_erase(struct ftl_io *io) 336 { 337 struct spdk_ftl_dev *dev = io->dev; 338 339 if (ftl_check_core_thread(dev)) { 340 return ftl_submit_erase(io); 341 } 342 343 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_erase, io); 344 return 0; 345 } 346 347 static struct ftl_band * 348 ftl_next_write_band(struct spdk_ftl_dev *dev) 349 { 350 struct ftl_band *band; 351 352 band = LIST_FIRST(&dev->free_bands); 353 if (!band) { 354 return NULL; 355 } 356 assert(band->state == FTL_BAND_STATE_FREE); 357 358 if (ftl_band_erase(band)) { 359 /* TODO: handle erase failure */ 360 return NULL; 361 } 362 363 return band; 364 } 365 366 static struct ftl_band * 367 ftl_next_wptr_band(struct spdk_ftl_dev *dev) 368 { 369 struct ftl_band *band; 370 371 if (!dev->next_band) { 372 band = ftl_next_write_band(dev); 373 } else { 374 assert(dev->next_band->state == FTL_BAND_STATE_PREP); 375 band = dev->next_band; 376 dev->next_band = NULL; 377 } 378 379 return band; 380 } 381 382 static struct ftl_wptr * 383 ftl_wptr_init(struct ftl_band *band) 384 { 385 struct spdk_ftl_dev *dev = band->dev; 386 struct ftl_wptr *wptr; 387 388 wptr = calloc(1, sizeof(*wptr)); 389 if (!wptr) { 390 return NULL; 391 } 392 393 wptr->dev = dev; 394 wptr->band = band; 395 wptr->chunk = CIRCLEQ_FIRST(&band->chunks); 396 wptr->ppa = wptr->chunk->start_ppa; 397 398 return wptr; 399 } 400 401 static int 402 ftl_add_wptr(struct spdk_ftl_dev *dev) 403 { 404 struct ftl_band *band; 405 struct ftl_wptr *wptr; 406 407 band = ftl_next_wptr_band(dev); 408 if (!band) { 409 return -1; 410 } 411 412 wptr = ftl_wptr_init(band); 413 if (!wptr) { 414 return -1; 415 } 416 417 if (ftl_band_write_prep(band)) { 418 ftl_wptr_free(wptr); 419 return -1; 420 } 421 422 LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry); 423 424 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id); 425 ftl_trace_write_band(dev, band); 426 return 0; 427 } 428 429 static void 430 ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size) 431 { 432 struct ftl_band *band = wptr->band; 433 struct spdk_ftl_dev *dev = wptr->dev; 434 struct spdk_ftl_conf *conf = &dev->conf; 435 size_t next_thld; 436 437 wptr->offset += xfer_size; 438 next_thld = (ftl_band_num_usable_lbks(band) * conf->band_thld) / 100; 439 440 if (ftl_band_full(band, wptr->offset)) { 441 ftl_band_set_state(band, FTL_BAND_STATE_FULL); 442 } 443 444 wptr->chunk->busy = true; 445 wptr->ppa = ftl_band_next_xfer_ppa(band, wptr->ppa, xfer_size); 446 wptr->chunk = ftl_band_next_operational_chunk(band, wptr->chunk); 447 448 assert(!ftl_ppa_invalid(wptr->ppa)); 449 450 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: grp:%d, pu:%d chunk:%d, lbk:%u\n", 451 wptr->ppa.grp, wptr->ppa.pu, wptr->ppa.chk, wptr->ppa.lbk); 452 453 if (wptr->offset >= next_thld && !dev->next_band) { 454 dev->next_band = ftl_next_write_band(dev); 455 } 456 } 457 458 static int 459 ftl_wptr_ready(struct ftl_wptr *wptr) 460 { 461 struct ftl_band *band = wptr->band; 462 463 /* TODO: add handling of empty bands */ 464 465 if (spdk_unlikely(!ftl_chunk_is_writable(wptr->chunk))) { 466 /* Erasing band may fail after it was assigned to wptr. */ 467 if (spdk_unlikely(wptr->chunk->state == FTL_CHUNK_STATE_BAD)) { 468 ftl_wptr_advance(wptr, wptr->dev->xfer_size); 469 } 470 return 0; 471 } 472 473 /* If we're in the process of writing metadata, wait till it is */ 474 /* completed. */ 475 /* TODO: we should probably change bands once we're writing tail md */ 476 if (ftl_band_state_changing(band)) { 477 return 0; 478 } 479 480 if (band->state == FTL_BAND_STATE_FULL) { 481 if (ftl_wptr_close_band(wptr)) { 482 /* TODO: need recovery here */ 483 assert(false); 484 } 485 return 0; 486 } 487 488 if (band->state != FTL_BAND_STATE_OPEN) { 489 if (ftl_wptr_open_band(wptr)) { 490 /* TODO: need recovery here */ 491 assert(false); 492 } 493 return 0; 494 } 495 496 return 1; 497 } 498 499 static const struct spdk_ftl_limit * 500 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 501 { 502 assert(type < SPDK_FTL_LIMIT_MAX); 503 return &dev->conf.defrag.limits[type]; 504 } 505 506 static bool 507 ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 508 { 509 struct ftl_ppa ppa; 510 511 /* If the LBA is invalid don't bother checking the md and l2p */ 512 if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) { 513 return false; 514 } 515 516 ppa = ftl_l2p_get(dev, entry->lba); 517 if (!(ftl_ppa_cached(ppa) && ppa.offset == entry->pos)) { 518 return false; 519 } 520 521 return true; 522 } 523 524 static void 525 ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_rwb_entry *entry) 526 { 527 pthread_spin_lock(&entry->lock); 528 529 if (!ftl_rwb_entry_valid(entry)) { 530 goto unlock; 531 } 532 533 /* If the l2p wasn't updated and still points at the entry, fill it with the */ 534 /* on-disk PPA and clear the cache status bit. Otherwise, skip the l2p update */ 535 /* and just clear the cache status. */ 536 if (!ftl_cache_lba_valid(dev, entry)) { 537 goto clear; 538 } 539 540 ftl_l2p_set(dev, entry->lba, entry->ppa); 541 clear: 542 ftl_rwb_entry_invalidate(entry); 543 unlock: 544 pthread_spin_unlock(&entry->lock); 545 } 546 547 static struct ftl_rwb_entry * 548 ftl_acquire_entry(struct spdk_ftl_dev *dev, int flags) 549 { 550 struct ftl_rwb_entry *entry; 551 552 entry = ftl_rwb_acquire(dev->rwb, ftl_rwb_type_from_flags(flags)); 553 if (!entry) { 554 return NULL; 555 } 556 557 ftl_evict_cache_entry(dev, entry); 558 559 entry->flags = flags; 560 return entry; 561 } 562 563 static void 564 ftl_rwb_pad(struct spdk_ftl_dev *dev, size_t size) 565 { 566 struct ftl_rwb_entry *entry; 567 int flags = FTL_IO_PAD | FTL_IO_INTERNAL; 568 569 for (size_t i = 0; i < size; ++i) { 570 entry = ftl_acquire_entry(dev, flags); 571 if (!entry) { 572 break; 573 } 574 575 entry->lba = FTL_LBA_INVALID; 576 entry->ppa = ftl_to_ppa(FTL_PPA_INVALID); 577 memset(entry->data, 0, FTL_BLOCK_SIZE); 578 ftl_rwb_push(entry); 579 } 580 } 581 582 static void 583 ftl_remove_free_bands(struct spdk_ftl_dev *dev) 584 { 585 while (!LIST_EMPTY(&dev->free_bands)) { 586 LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry); 587 } 588 589 dev->next_band = NULL; 590 } 591 592 static void 593 ftl_process_shutdown(struct spdk_ftl_dev *dev) 594 { 595 size_t size = ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_INTERNAL) + 596 ftl_rwb_num_acquired(dev->rwb, FTL_RWB_TYPE_USER); 597 598 if (size >= dev->xfer_size) { 599 return; 600 } 601 602 /* If we reach this point we need to remove free bands */ 603 /* and pad current wptr band to the end */ 604 ftl_remove_free_bands(dev); 605 606 /* Pad write buffer until band is full */ 607 ftl_rwb_pad(dev, dev->xfer_size - size); 608 } 609 610 static int 611 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 612 { 613 return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) && 614 LIST_EMPTY(&dev->wptr_list); 615 } 616 617 void 618 ftl_apply_limits(struct spdk_ftl_dev *dev) 619 { 620 const struct spdk_ftl_limit *limit; 621 struct ftl_stats *stats = &dev->stats; 622 size_t rwb_limit[FTL_RWB_TYPE_MAX]; 623 int i; 624 625 ftl_rwb_get_limits(dev->rwb, rwb_limit); 626 627 /* Clear existing limit */ 628 dev->limit = SPDK_FTL_LIMIT_MAX; 629 630 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 631 limit = ftl_get_limit(dev, i); 632 633 if (dev->num_free <= limit->thld) { 634 rwb_limit[FTL_RWB_TYPE_USER] = 635 (limit->limit * ftl_rwb_entry_cnt(dev->rwb)) / 100; 636 stats->limits[i]++; 637 dev->limit = i; 638 goto apply; 639 } 640 } 641 642 /* Clear the limits, since we don't need to apply them anymore */ 643 rwb_limit[FTL_RWB_TYPE_USER] = ftl_rwb_entry_cnt(dev->rwb); 644 apply: 645 ftl_trace_limits(dev, rwb_limit, dev->num_free); 646 ftl_rwb_set_limits(dev->rwb, rwb_limit); 647 } 648 649 static int 650 ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 651 { 652 struct ftl_band *band = ftl_band_from_ppa(dev, ppa); 653 struct ftl_md *md = &band->md; 654 uint64_t offset; 655 656 offset = ftl_band_lbkoff_from_ppa(band, ppa); 657 658 /* The bit might be already cleared if two writes are scheduled to the */ 659 /* same LBA at the same time */ 660 if (spdk_bit_array_get(md->vld_map, offset)) { 661 assert(md->num_vld > 0); 662 spdk_bit_array_clear(md->vld_map, offset); 663 md->num_vld--; 664 return 1; 665 } 666 667 return 0; 668 } 669 670 int 671 ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_ppa ppa) 672 { 673 struct ftl_band *band; 674 int rc; 675 676 assert(!ftl_ppa_cached(ppa)); 677 band = ftl_band_from_ppa(dev, ppa); 678 679 pthread_spin_lock(&band->md.lock); 680 rc = ftl_invalidate_addr_unlocked(dev, ppa); 681 pthread_spin_unlock(&band->md.lock); 682 683 return rc; 684 } 685 686 static int 687 ftl_read_retry(int rc) 688 { 689 return rc == -EAGAIN; 690 } 691 692 static int 693 ftl_read_canceled(int rc) 694 { 695 return rc == 0; 696 } 697 698 static void 699 ftl_add_to_retry_queue(struct ftl_io *io) 700 { 701 if (!(io->flags & FTL_IO_RETRY)) { 702 io->flags |= FTL_IO_RETRY; 703 TAILQ_INSERT_TAIL(&io->dev->retry_queue, io, retry_entry); 704 } 705 } 706 707 static int 708 ftl_submit_read(struct ftl_io *io, ftl_next_ppa_fn next_ppa, 709 void *ctx) 710 { 711 struct spdk_ftl_dev *dev = io->dev; 712 struct ftl_ppa ppa; 713 int rc = 0, lbk_cnt; 714 715 while (io->pos < io->lbk_cnt) { 716 /* We might hit the cache here, if so, skip the read */ 717 lbk_cnt = rc = next_ppa(io, &ppa, io->pos, ctx); 718 719 /* We might need to retry the read from scratch (e.g. */ 720 /* because write was under way and completed before */ 721 /* we could read it from rwb */ 722 if (ftl_read_retry(rc)) { 723 continue; 724 } 725 726 /* We don't have to schedule the read, as it was read from cache */ 727 if (ftl_read_canceled(rc)) { 728 ftl_io_advance(io, 1); 729 continue; 730 } 731 732 assert(lbk_cnt > 0); 733 734 ftl_trace_submission(dev, io, ppa, lbk_cnt); 735 rc = spdk_nvme_ns_cmd_read(dev->ns, ftl_get_read_qpair(dev), 736 ftl_io_iovec_addr(io), 737 ftl_ppa_addr_pack(io->dev, ppa), lbk_cnt, 738 ftl_io_cmpl_cb, io, 0); 739 if (rc == -ENOMEM) { 740 ftl_add_to_retry_queue(io); 741 break; 742 } else if (rc) { 743 ftl_io_fail(io, rc); 744 break; 745 } 746 747 ftl_io_inc_req(io); 748 ftl_io_advance(io, lbk_cnt); 749 } 750 751 /* If we didn't have to read anything from the device, */ 752 /* complete the request right away */ 753 if (ftl_io_done(io)) { 754 ftl_io_complete(io); 755 } 756 757 return rc; 758 } 759 760 static int 761 ftl_ppa_cache_read(struct ftl_io *io, uint64_t lba, 762 struct ftl_ppa ppa, void *buf) 763 { 764 struct ftl_rwb *rwb = io->dev->rwb; 765 struct ftl_rwb_entry *entry; 766 struct ftl_ppa nppa; 767 int rc = 0; 768 769 entry = ftl_rwb_entry_from_offset(rwb, ppa.offset); 770 pthread_spin_lock(&entry->lock); 771 772 nppa = ftl_l2p_get(io->dev, lba); 773 if (ppa.ppa != nppa.ppa) { 774 rc = -1; 775 goto out; 776 } 777 778 memcpy(buf, entry->data, FTL_BLOCK_SIZE); 779 out: 780 pthread_spin_unlock(&entry->lock); 781 return rc; 782 } 783 784 static int 785 ftl_lba_read_next_ppa(struct ftl_io *io, struct ftl_ppa *ppa, 786 size_t lbk, void *ctx) 787 { 788 struct spdk_ftl_dev *dev = io->dev; 789 struct ftl_ppa next_ppa; 790 size_t i; 791 792 *ppa = ftl_l2p_get(dev, io->lba + lbk); 793 794 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read ppa:%lx, lba:%lu\n", ppa->ppa, io->lba); 795 796 /* If the PPA is invalid, skip it (the buffer should already be zero'ed) */ 797 if (ftl_ppa_invalid(*ppa)) { 798 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_INVALID); 799 return 0; 800 } 801 802 if (ftl_ppa_cached(*ppa)) { 803 if (!ftl_ppa_cache_read(io, io->lba + lbk, *ppa, ftl_io_iovec_addr(io))) { 804 ftl_trace_completion(io->dev, io, FTL_TRACE_COMPLETION_CACHE); 805 return 0; 806 } 807 808 /* If the state changed, we have to re-read the l2p */ 809 return -EAGAIN; 810 } 811 812 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 813 next_ppa = ftl_l2p_get(dev, io->lba + lbk + i); 814 815 if (ftl_ppa_invalid(next_ppa) || ftl_ppa_cached(next_ppa)) { 816 break; 817 } 818 819 if (ftl_ppa_addr_pack(dev, *ppa) + i != ftl_ppa_addr_pack(dev, next_ppa)) { 820 break; 821 } 822 } 823 824 return i; 825 } 826 827 static void 828 ftl_complete_flush(struct ftl_flush *flush) 829 { 830 assert(flush->num_req == 0); 831 LIST_REMOVE(flush, list_entry); 832 833 flush->cb.fn(flush->cb.ctx, 0); 834 835 spdk_bit_array_free(&flush->bmap); 836 free(flush); 837 } 838 839 static void 840 ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_rwb_batch *batch) 841 { 842 struct ftl_flush *flush, *tflush; 843 size_t offset; 844 845 LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) { 846 offset = ftl_rwb_batch_get_offset(batch); 847 848 if (spdk_bit_array_get(flush->bmap, offset)) { 849 spdk_bit_array_set(flush->bmap, offset); 850 if (!(--flush->num_req)) { 851 ftl_complete_flush(flush); 852 } 853 } 854 } 855 } 856 857 static void 858 ftl_write_fail(struct ftl_io *io, int status) 859 { 860 struct ftl_rwb_batch *batch = io->rwb_batch; 861 struct spdk_ftl_dev *dev = io->dev; 862 struct ftl_rwb_entry *entry; 863 struct ftl_band *band; 864 char buf[128]; 865 866 entry = ftl_rwb_batch_first_entry(batch); 867 868 band = ftl_band_from_ppa(io->dev, entry->ppa); 869 SPDK_ERRLOG("Write failed @ppa: %s, status: %d\n", 870 ftl_ppa2str(entry->ppa, buf, sizeof(buf)), status); 871 872 /* Close the band and, halt wptr and defrag */ 873 ftl_halt_writes(dev, band); 874 875 ftl_rwb_foreach(entry, batch) { 876 /* Invalidate meta set by process_writes() */ 877 ftl_invalidate_addr(dev, entry->ppa); 878 } 879 880 /* Reset the batch back to the the RWB to resend it later */ 881 ftl_rwb_batch_revert(batch); 882 } 883 884 static void 885 ftl_write_cb(void *arg, int status) 886 { 887 struct ftl_io *io = arg; 888 struct spdk_ftl_dev *dev = io->dev; 889 struct ftl_rwb_batch *batch = io->rwb_batch; 890 struct ftl_rwb_entry *entry; 891 892 if (status) { 893 ftl_write_fail(io, status); 894 return; 895 } 896 897 assert(io->lbk_cnt == dev->xfer_size); 898 ftl_rwb_foreach(entry, batch) { 899 if (!(io->flags & FTL_IO_MD) && !(entry->flags & FTL_IO_PAD)) { 900 /* Verify that the LBA is set for user lbks */ 901 assert(entry->lba != FTL_LBA_INVALID); 902 } 903 904 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lu, lba:%lu\n", 905 entry->ppa.ppa, entry->lba); 906 } 907 908 ftl_process_flush(dev, batch); 909 ftl_rwb_batch_release(batch); 910 } 911 912 static void 913 ftl_update_rwb_stats(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry) 914 { 915 if (!ftl_rwb_entry_internal(entry)) { 916 dev->stats.write_user++; 917 } 918 dev->stats.write_total++; 919 } 920 921 static void 922 ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_rwb_entry *entry, 923 struct ftl_ppa ppa) 924 { 925 struct ftl_ppa prev_ppa; 926 struct ftl_rwb_entry *prev; 927 struct ftl_band *band; 928 int valid; 929 930 prev_ppa = ftl_l2p_get(dev, entry->lba); 931 if (ftl_ppa_invalid(prev_ppa)) { 932 ftl_l2p_set(dev, entry->lba, ppa); 933 return; 934 } 935 936 /* If the L2P's PPA is different than what we expected we don't need to */ 937 /* do anything (someone's already overwritten our data). */ 938 if (ftl_rwb_entry_weak(entry) && !ftl_ppa_cmp(prev_ppa, entry->ppa)) { 939 return; 940 } 941 942 if (ftl_ppa_cached(prev_ppa)) { 943 assert(!ftl_rwb_entry_weak(entry)); 944 prev = ftl_rwb_entry_from_offset(dev->rwb, prev_ppa.offset); 945 pthread_spin_lock(&prev->lock); 946 947 /* Re-read the L2P under the lock to protect against updates */ 948 /* to this LBA from other threads */ 949 prev_ppa = ftl_l2p_get(dev, entry->lba); 950 951 /* If the entry is no longer in cache, another write has been */ 952 /* scheduled in the meantime, so we have to invalidate its LBA */ 953 if (!ftl_ppa_cached(prev_ppa)) { 954 ftl_invalidate_addr(dev, prev_ppa); 955 } 956 957 /* If previous entry is part of cache, remove and invalidate it */ 958 if (ftl_rwb_entry_valid(prev)) { 959 ftl_invalidate_addr(dev, prev->ppa); 960 ftl_rwb_entry_invalidate(prev); 961 } 962 963 ftl_l2p_set(dev, entry->lba, ppa); 964 pthread_spin_unlock(&prev->lock); 965 return; 966 } 967 968 /* Lock the band containing previous PPA. This assures atomic changes to */ 969 /* the L2P as wall as metadata. The valid bits in metadata are used to */ 970 /* check weak writes validity. */ 971 band = ftl_band_from_ppa(dev, prev_ppa); 972 pthread_spin_lock(&band->md.lock); 973 974 valid = ftl_invalidate_addr_unlocked(dev, prev_ppa); 975 976 /* If the address has been invalidated already, we don't want to update */ 977 /* the L2P for weak writes, as it means the write is no longer valid. */ 978 if (!ftl_rwb_entry_weak(entry) || valid) { 979 ftl_l2p_set(dev, entry->lba, ppa); 980 } 981 982 pthread_spin_unlock(&band->md.lock); 983 } 984 985 static struct ftl_io * 986 ftl_io_init_child_write(struct ftl_io *parent, struct ftl_ppa ppa, 987 void *data, void *md, spdk_ftl_fn cb) 988 { 989 struct ftl_io *io; 990 struct spdk_ftl_dev *dev = parent->dev; 991 struct ftl_io_init_opts opts = { 992 .dev = dev, 993 .io = NULL, 994 .parent = parent, 995 .rwb_batch = NULL, 996 .band = parent->band, 997 .size = sizeof(struct ftl_io), 998 .flags = 0, 999 .type = FTL_IO_WRITE, 1000 .iov_cnt = 1, 1001 .req_size = dev->xfer_size, 1002 .fn = cb, 1003 .data = data, 1004 .md = md, 1005 }; 1006 1007 io = ftl_io_init_internal(&opts); 1008 if (!io) { 1009 return NULL; 1010 } 1011 1012 io->ppa = ppa; 1013 1014 return io; 1015 } 1016 1017 static void 1018 ftl_io_child_write_cb(void *ctx, int status) 1019 { 1020 struct ftl_chunk *chunk; 1021 struct ftl_io *io = ctx; 1022 1023 chunk = ftl_band_chunk_from_ppa(io->band, io->ppa); 1024 chunk->busy = false; 1025 } 1026 1027 static int 1028 ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io, int lbk_cnt) 1029 { 1030 struct spdk_ftl_dev *dev = io->dev; 1031 struct ftl_io *child; 1032 struct iovec *iov = ftl_io_iovec(io); 1033 int rc; 1034 1035 /* Split IO to child requests and release chunk immediately after child is completed */ 1036 child = ftl_io_init_child_write(io, wptr->ppa, iov[io->iov_pos].iov_base, 1037 ftl_io_get_md(io), ftl_io_child_write_cb); 1038 if (!child) { 1039 return -EAGAIN; 1040 } 1041 1042 rc = spdk_nvme_ns_cmd_write_with_md(dev->ns, ftl_get_write_qpair(dev), 1043 child->iov.iov_base, child->md, 1044 ftl_ppa_addr_pack(dev, wptr->ppa), 1045 lbk_cnt, ftl_io_cmpl_cb, child, 0, 0, 0); 1046 if (rc) { 1047 ftl_io_fail(child, rc); 1048 ftl_io_complete(child); 1049 SPDK_ERRLOG("spdk_nvme_ns_cmd_write failed with status:%d, ppa:%lu\n", 1050 rc, wptr->ppa.ppa); 1051 1052 return -EIO; 1053 } 1054 1055 ftl_io_inc_req(child); 1056 ftl_io_advance(child, lbk_cnt); 1057 1058 return 0; 1059 } 1060 1061 static int 1062 ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io) 1063 { 1064 struct spdk_ftl_dev *dev = io->dev; 1065 struct iovec *iov = ftl_io_iovec(io); 1066 int rc = 0; 1067 size_t lbk_cnt; 1068 1069 while (io->iov_pos < io->iov_cnt) { 1070 lbk_cnt = iov[io->iov_pos].iov_len / PAGE_SIZE; 1071 assert(iov[io->iov_pos].iov_len > 0); 1072 assert(lbk_cnt == dev->xfer_size); 1073 1074 /* There are no guarantees of the order of completion of NVMe IO submission queue */ 1075 /* so wait until chunk is not busy before submitting another write */ 1076 if (wptr->chunk->busy) { 1077 wptr->current_io = io; 1078 rc = -EAGAIN; 1079 break; 1080 } 1081 1082 rc = ftl_submit_child_write(wptr, io, lbk_cnt); 1083 1084 if (rc == -EAGAIN) { 1085 wptr->current_io = io; 1086 break; 1087 } else if (rc) { 1088 ftl_io_fail(io, rc); 1089 break; 1090 } 1091 1092 ftl_trace_submission(dev, io, wptr->ppa, lbk_cnt); 1093 1094 /* Update parent iovec */ 1095 ftl_io_advance(io, lbk_cnt); 1096 1097 ftl_wptr_advance(wptr, lbk_cnt); 1098 } 1099 1100 if (ftl_io_done(io)) { 1101 /* Parent IO will complete after all children are completed */ 1102 ftl_io_complete(io); 1103 } 1104 1105 return rc; 1106 } 1107 1108 static void 1109 ftl_flush_pad_batch(struct spdk_ftl_dev *dev) 1110 { 1111 struct ftl_rwb *rwb = dev->rwb; 1112 size_t size; 1113 1114 size = ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_INTERNAL) + 1115 ftl_rwb_num_acquired(rwb, FTL_RWB_TYPE_USER); 1116 1117 /* There must be something in the RWB, otherwise the flush */ 1118 /* wouldn't be waiting for anything */ 1119 assert(size > 0); 1120 1121 /* Only add padding when there's less than xfer size */ 1122 /* entries in the buffer. Otherwise we just have to wait */ 1123 /* for the entries to become ready. */ 1124 if (size < dev->xfer_size) { 1125 ftl_rwb_pad(dev, dev->xfer_size - (size % dev->xfer_size)); 1126 } 1127 } 1128 1129 static int 1130 ftl_wptr_process_writes(struct ftl_wptr *wptr) 1131 { 1132 struct spdk_ftl_dev *dev = wptr->dev; 1133 struct ftl_rwb_batch *batch; 1134 struct ftl_rwb_entry *entry; 1135 struct ftl_io *io; 1136 struct ftl_ppa ppa, prev_ppa; 1137 1138 if (wptr->current_io) { 1139 if (ftl_submit_write(wptr, wptr->current_io) == -EAGAIN) { 1140 return 0; 1141 } 1142 wptr->current_io = NULL; 1143 } 1144 1145 /* Make sure the band is prepared for writing */ 1146 if (!ftl_wptr_ready(wptr)) { 1147 return 0; 1148 } 1149 1150 if (dev->halt) { 1151 ftl_process_shutdown(dev); 1152 } 1153 1154 batch = ftl_rwb_pop(dev->rwb); 1155 if (!batch) { 1156 /* If there are queued flush requests we need to pad the RWB to */ 1157 /* force out remaining entries */ 1158 if (!LIST_EMPTY(&dev->flush_list)) { 1159 ftl_flush_pad_batch(dev); 1160 } 1161 1162 return 0; 1163 } 1164 1165 io = ftl_io_rwb_init(dev, wptr->band, batch, ftl_write_cb); 1166 if (!io) { 1167 goto error; 1168 } 1169 1170 ppa = wptr->ppa; 1171 ftl_rwb_foreach(entry, batch) { 1172 entry->ppa = ppa; 1173 1174 if (entry->lba != FTL_LBA_INVALID) { 1175 pthread_spin_lock(&entry->lock); 1176 prev_ppa = ftl_l2p_get(dev, entry->lba); 1177 1178 /* If the l2p was updated in the meantime, don't update band's metadata */ 1179 if (ftl_ppa_cached(prev_ppa) && prev_ppa.offset == entry->pos) { 1180 /* Setting entry's cache bit needs to be done after metadata */ 1181 /* within the band is updated to make sure that writes */ 1182 /* invalidating the entry clear the metadata as well */ 1183 ftl_band_set_addr(wptr->band, entry->lba, entry->ppa); 1184 ftl_rwb_entry_set_valid(entry); 1185 } 1186 pthread_spin_unlock(&entry->lock); 1187 } 1188 1189 ftl_trace_rwb_pop(dev, entry); 1190 ftl_update_rwb_stats(dev, entry); 1191 1192 ppa = ftl_band_next_ppa(wptr->band, ppa, 1); 1193 } 1194 1195 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write ppa:%lx, %lx\n", wptr->ppa.ppa, 1196 ftl_ppa_addr_pack(dev, wptr->ppa)); 1197 1198 if (ftl_submit_write(wptr, io)) { 1199 /* TODO: we need some recovery here */ 1200 assert(0 && "Write submit failed"); 1201 if (ftl_io_done(io)) { 1202 ftl_io_free(io); 1203 } 1204 } 1205 1206 return dev->xfer_size; 1207 error: 1208 ftl_rwb_batch_revert(batch); 1209 return 0; 1210 } 1211 1212 static int 1213 ftl_process_writes(struct spdk_ftl_dev *dev) 1214 { 1215 struct ftl_wptr *wptr, *twptr; 1216 size_t num_active = 0; 1217 enum ftl_band_state state; 1218 1219 LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) { 1220 ftl_wptr_process_writes(wptr); 1221 state = wptr->band->state; 1222 1223 if (state != FTL_BAND_STATE_FULL && 1224 state != FTL_BAND_STATE_CLOSING && 1225 state != FTL_BAND_STATE_CLOSED) { 1226 num_active++; 1227 } 1228 } 1229 1230 if (num_active < 1) { 1231 ftl_add_wptr(dev); 1232 } 1233 1234 return 0; 1235 } 1236 1237 static void 1238 ftl_rwb_entry_fill(struct ftl_rwb_entry *entry, struct ftl_io *io) 1239 { 1240 struct ftl_band *band; 1241 1242 memcpy(entry->data, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE); 1243 1244 if (ftl_rwb_entry_weak(entry)) { 1245 band = ftl_band_from_ppa(io->dev, io->ppa); 1246 entry->ppa = ftl_band_next_ppa(band, io->ppa, io->pos); 1247 } 1248 1249 entry->trace = io->trace; 1250 1251 if (entry->md) { 1252 memcpy(entry->md, &entry->lba, sizeof(io->lba)); 1253 } 1254 } 1255 1256 static int 1257 ftl_rwb_fill(struct ftl_io *io) 1258 { 1259 struct spdk_ftl_dev *dev = io->dev; 1260 struct ftl_rwb_entry *entry; 1261 struct ftl_ppa ppa = { .cached = 1 }; 1262 int flags = ftl_rwb_flags_from_io(io); 1263 uint64_t lba; 1264 1265 while (io->pos < io->lbk_cnt) { 1266 lba = ftl_io_current_lba(io); 1267 if (lba == FTL_LBA_INVALID) { 1268 ftl_io_advance(io, 1); 1269 continue; 1270 } 1271 1272 entry = ftl_acquire_entry(dev, flags); 1273 if (!entry) { 1274 return -EAGAIN; 1275 } 1276 1277 entry->lba = lba; 1278 ftl_rwb_entry_fill(entry, io); 1279 1280 ppa.offset = entry->pos; 1281 1282 ftl_io_advance(io, 1); 1283 ftl_update_l2p(dev, entry, ppa); 1284 1285 /* Needs to be done after L2P is updated to avoid race with */ 1286 /* write completion callback when it's processed faster than */ 1287 /* L2P is set in update_l2p(). */ 1288 ftl_rwb_push(entry); 1289 ftl_trace_rwb_fill(dev, io); 1290 } 1291 1292 ftl_io_complete(io); 1293 return 0; 1294 } 1295 1296 static bool 1297 ftl_dev_needs_defrag(struct spdk_ftl_dev *dev) 1298 { 1299 const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 1300 1301 if (ftl_reloc_is_halted(dev->reloc)) { 1302 return false; 1303 } 1304 1305 if (dev->df_band) { 1306 return false; 1307 } 1308 1309 if (dev->num_free <= limit->thld) { 1310 return true; 1311 } 1312 1313 return false; 1314 } 1315 1316 static double 1317 ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid) 1318 { 1319 size_t usable, valid, invalid; 1320 double vld_ratio; 1321 1322 /* If the band doesn't have any usable lbks it's of no use */ 1323 usable = ftl_band_num_usable_lbks(band); 1324 if (usable == 0) { 1325 return 0.0; 1326 } 1327 1328 valid = threshold_valid ? (usable - *threshold_valid) : band->md.num_vld; 1329 invalid = usable - valid; 1330 1331 /* Add one to avoid division by 0 */ 1332 vld_ratio = (double)invalid / (double)(valid + 1); 1333 return vld_ratio * ftl_band_age(band); 1334 } 1335 1336 static bool 1337 ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev) 1338 { 1339 struct spdk_ftl_conf *conf = &dev->conf; 1340 size_t thld_vld; 1341 1342 /* If we're in dire need of free bands, every band is worth defragging */ 1343 if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) { 1344 return true; 1345 } 1346 1347 thld_vld = (ftl_band_num_usable_lbks(band) * conf->defrag.invalid_thld) / 100; 1348 1349 return band->merit > ftl_band_calc_merit(band, &thld_vld); 1350 } 1351 1352 static struct ftl_band * 1353 ftl_select_defrag_band(struct spdk_ftl_dev *dev) 1354 { 1355 struct ftl_band *band, *mband = NULL; 1356 double merit = 0; 1357 1358 LIST_FOREACH(band, &dev->shut_bands, list_entry) { 1359 assert(band->state == FTL_BAND_STATE_CLOSED); 1360 band->merit = ftl_band_calc_merit(band, NULL); 1361 if (band->merit > merit) { 1362 merit = band->merit; 1363 mband = band; 1364 } 1365 } 1366 1367 if (mband && !ftl_band_needs_defrag(mband, dev)) { 1368 mband = NULL; 1369 } 1370 1371 return mband; 1372 } 1373 1374 static void 1375 ftl_process_relocs(struct spdk_ftl_dev *dev) 1376 { 1377 struct ftl_band *band; 1378 1379 if (ftl_dev_needs_defrag(dev)) { 1380 band = dev->df_band = ftl_select_defrag_band(dev); 1381 1382 if (band) { 1383 ftl_reloc_add(dev->reloc, band, 0, ftl_num_band_lbks(dev), 0); 1384 ftl_trace_defrag_band(dev, band); 1385 } 1386 } 1387 1388 ftl_reloc(dev->reloc); 1389 } 1390 1391 int 1392 ftl_current_limit(const struct spdk_ftl_dev *dev) 1393 { 1394 return dev->limit; 1395 } 1396 1397 void 1398 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 1399 { 1400 attrs->uuid = dev->uuid; 1401 attrs->lbk_cnt = dev->num_lbas; 1402 attrs->lbk_size = FTL_BLOCK_SIZE; 1403 attrs->range = dev->range; 1404 attrs->cache_bdev_desc = dev->cache_bdev_desc; 1405 } 1406 1407 static void 1408 _ftl_io_write(void *ctx) 1409 { 1410 ftl_io_write((struct ftl_io *)ctx); 1411 } 1412 1413 int 1414 ftl_io_write(struct ftl_io *io) 1415 { 1416 struct spdk_ftl_dev *dev = io->dev; 1417 1418 /* For normal IOs we just need to copy the data onto the rwb */ 1419 if (!(io->flags & FTL_IO_MD)) { 1420 return ftl_rwb_fill(io); 1421 } 1422 1423 /* Metadata has its own buffer, so it doesn't have to be copied, so just */ 1424 /* send it the the core thread and schedule the write immediately */ 1425 if (ftl_check_core_thread(dev)) { 1426 return ftl_submit_write(ftl_wptr_from_band(io->band), io); 1427 } 1428 1429 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io); 1430 1431 return 0; 1432 } 1433 1434 static int 1435 _spdk_ftl_write(struct ftl_io *io) 1436 { 1437 int rc; 1438 1439 rc = ftl_io_write(io); 1440 if (rc == -EAGAIN) { 1441 spdk_thread_send_msg(spdk_io_channel_get_thread(io->ioch), 1442 _ftl_write, io); 1443 return 0; 1444 } 1445 1446 if (rc) { 1447 ftl_io_free(io); 1448 } 1449 1450 return rc; 1451 } 1452 1453 static void 1454 _ftl_write(void *ctx) 1455 { 1456 _spdk_ftl_write(ctx); 1457 } 1458 1459 int 1460 spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1461 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1462 { 1463 struct ftl_io *io; 1464 1465 if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { 1466 return -EINVAL; 1467 } 1468 1469 if (lba_cnt == 0) { 1470 return -EINVAL; 1471 } 1472 1473 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1474 return -EINVAL; 1475 } 1476 1477 if (!dev->initialized) { 1478 return -EBUSY; 1479 } 1480 1481 io = ftl_io_alloc(ch); 1482 if (!io) { 1483 return -ENOMEM; 1484 } 1485 1486 ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 1487 return _spdk_ftl_write(io); 1488 } 1489 1490 int 1491 ftl_io_read(struct ftl_io *io) 1492 { 1493 struct spdk_ftl_dev *dev = io->dev; 1494 ftl_next_ppa_fn next_ppa; 1495 1496 if (ftl_check_read_thread(dev)) { 1497 if (ftl_io_mode_ppa(io)) { 1498 next_ppa = ftl_ppa_read_next_ppa; 1499 } else { 1500 next_ppa = ftl_lba_read_next_ppa; 1501 } 1502 1503 return ftl_submit_read(io, next_ppa, NULL); 1504 } 1505 1506 spdk_thread_send_msg(ftl_get_read_thread(dev), _ftl_read, io); 1507 return 0; 1508 } 1509 1510 static void 1511 _ftl_read(void *arg) 1512 { 1513 ftl_io_read((struct ftl_io *)arg); 1514 } 1515 1516 int 1517 spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt, 1518 struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 1519 { 1520 struct ftl_io *io; 1521 1522 if (iov_cnt == 0 || iov_cnt > FTL_MAX_IOV) { 1523 return -EINVAL; 1524 } 1525 1526 if (lba_cnt == 0) { 1527 return -EINVAL; 1528 } 1529 1530 if (lba_cnt != ftl_iovec_num_lbks(iov, iov_cnt)) { 1531 return -EINVAL; 1532 } 1533 1534 if (!dev->initialized) { 1535 return -EBUSY; 1536 } 1537 1538 io = ftl_io_alloc(ch); 1539 if (!io) { 1540 return -ENOMEM; 1541 } 1542 1543 ftl_io_user_init(dev, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 1544 ftl_io_read(io); 1545 return 0; 1546 } 1547 1548 static struct ftl_flush * 1549 ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1550 { 1551 struct ftl_flush *flush; 1552 struct ftl_rwb *rwb = dev->rwb; 1553 1554 flush = calloc(1, sizeof(*flush)); 1555 if (!flush) { 1556 return NULL; 1557 } 1558 1559 flush->bmap = spdk_bit_array_create(ftl_rwb_num_batches(rwb)); 1560 if (!flush->bmap) { 1561 goto error; 1562 } 1563 1564 flush->dev = dev; 1565 flush->cb.fn = cb_fn; 1566 flush->cb.ctx = cb_arg; 1567 1568 return flush; 1569 error: 1570 free(flush); 1571 return NULL; 1572 } 1573 1574 static void 1575 _ftl_flush(void *ctx) 1576 { 1577 struct ftl_flush *flush = ctx; 1578 struct spdk_ftl_dev *dev = flush->dev; 1579 struct ftl_rwb *rwb = dev->rwb; 1580 struct ftl_rwb_batch *batch; 1581 1582 /* Attach flush object to all non-empty batches */ 1583 ftl_rwb_foreach_batch(batch, rwb) { 1584 if (!ftl_rwb_batch_empty(batch)) { 1585 spdk_bit_array_set(flush->bmap, ftl_rwb_batch_get_offset(batch)); 1586 flush->num_req++; 1587 } 1588 } 1589 1590 LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry); 1591 1592 /* If the RWB was already empty, the flush can be completed right away */ 1593 if (!flush->num_req) { 1594 ftl_complete_flush(flush); 1595 } 1596 } 1597 1598 int 1599 spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg) 1600 { 1601 struct ftl_flush *flush; 1602 1603 if (!dev->initialized) { 1604 return -EBUSY; 1605 } 1606 1607 flush = ftl_flush_init(dev, cb_fn, cb_arg); 1608 if (!flush) { 1609 return -ENOMEM; 1610 } 1611 1612 spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush); 1613 return 0; 1614 } 1615 1616 void 1617 ftl_process_anm_event(struct ftl_anm_event *event) 1618 { 1619 SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Unconsumed ANM received for dev: %p...\n", event->dev); 1620 ftl_anm_event_complete(event); 1621 } 1622 1623 static void 1624 ftl_process_retry_queue(struct spdk_ftl_dev *dev) 1625 { 1626 struct ftl_io *io; 1627 int rc; 1628 1629 while (!TAILQ_EMPTY(&dev->retry_queue)) { 1630 io = TAILQ_FIRST(&dev->retry_queue); 1631 1632 /* Retry only if IO is still healthy */ 1633 if (spdk_likely(io->status == 0)) { 1634 rc = ftl_io_read(io); 1635 if (rc == -ENOMEM) { 1636 break; 1637 } 1638 } 1639 1640 io->flags &= ~FTL_IO_RETRY; 1641 TAILQ_REMOVE(&dev->retry_queue, io, retry_entry); 1642 1643 if (ftl_io_done(io)) { 1644 ftl_io_complete(io); 1645 } 1646 } 1647 } 1648 1649 int 1650 ftl_task_read(void *ctx) 1651 { 1652 struct ftl_thread *thread = ctx; 1653 struct spdk_ftl_dev *dev = thread->dev; 1654 struct spdk_nvme_qpair *qpair = ftl_get_read_qpair(dev); 1655 size_t num_completed; 1656 1657 if (dev->halt) { 1658 if (ftl_shutdown_complete(dev)) { 1659 spdk_poller_unregister(&thread->poller); 1660 return 0; 1661 } 1662 } 1663 1664 num_completed = spdk_nvme_qpair_process_completions(qpair, 0); 1665 1666 if (num_completed && !TAILQ_EMPTY(&dev->retry_queue)) { 1667 ftl_process_retry_queue(dev); 1668 } 1669 1670 return num_completed; 1671 } 1672 1673 int 1674 ftl_task_core(void *ctx) 1675 { 1676 struct ftl_thread *thread = ctx; 1677 struct spdk_ftl_dev *dev = thread->dev; 1678 struct spdk_nvme_qpair *qpair = ftl_get_write_qpair(dev); 1679 1680 if (dev->halt) { 1681 if (ftl_shutdown_complete(dev)) { 1682 spdk_poller_unregister(&thread->poller); 1683 return 0; 1684 } 1685 } 1686 1687 ftl_process_writes(dev); 1688 spdk_nvme_qpair_process_completions(qpair, 0); 1689 ftl_process_relocs(dev); 1690 1691 return 0; 1692 } 1693 1694 SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE) 1695