1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright (c) Intel Corporation. 3 * All rights reserved. 4 */ 5 6 #include "spdk/likely.h" 7 #include "spdk/stdinc.h" 8 #include "spdk/nvme.h" 9 #include "spdk/thread.h" 10 #include "spdk/bdev_module.h" 11 #include "spdk/string.h" 12 #include "spdk/ftl.h" 13 #include "spdk/crc32.h" 14 15 #include "ftl_core.h" 16 #include "ftl_band.h" 17 #include "ftl_io.h" 18 #include "ftl_debug.h" 19 #include "ftl_internal.h" 20 #include "mngt/ftl_mngt.h" 21 22 23 size_t 24 spdk_ftl_io_size(void) 25 { 26 return sizeof(struct ftl_io); 27 } 28 29 static void 30 ftl_io_cmpl_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg) 31 { 32 struct ftl_io *io = cb_arg; 33 34 if (spdk_unlikely(!success)) { 35 io->status = -EIO; 36 } 37 38 ftl_io_dec_req(io); 39 if (ftl_io_done(io)) { 40 ftl_io_complete(io); 41 } 42 43 spdk_bdev_free_io(bdev_io); 44 } 45 46 static void 47 ftl_band_erase(struct ftl_band *band) 48 { 49 assert(band->md->state == FTL_BAND_STATE_CLOSED || 50 band->md->state == FTL_BAND_STATE_FREE); 51 52 ftl_band_set_state(band, FTL_BAND_STATE_PREP); 53 } 54 55 static size_t 56 ftl_get_limit(const struct spdk_ftl_dev *dev, int type) 57 { 58 assert(type < SPDK_FTL_LIMIT_MAX); 59 return dev->conf.limits[type]; 60 } 61 62 static bool 63 ftl_shutdown_complete(struct spdk_ftl_dev *dev) 64 { 65 uint64_t i; 66 67 if (dev->num_inflight) { 68 return false; 69 } 70 71 if (!ftl_nv_cache_is_halted(&dev->nv_cache)) { 72 ftl_nv_cache_halt(&dev->nv_cache); 73 return false; 74 } 75 76 if (!ftl_writer_is_halted(&dev->writer_user)) { 77 ftl_writer_halt(&dev->writer_user); 78 return false; 79 } 80 81 if (!ftl_reloc_is_halted(dev->reloc)) { 82 ftl_reloc_halt(dev->reloc); 83 return false; 84 } 85 86 if (!ftl_writer_is_halted(&dev->writer_gc)) { 87 ftl_writer_halt(&dev->writer_gc); 88 return false; 89 } 90 91 if (!ftl_nv_cache_chunks_busy(&dev->nv_cache)) { 92 return false; 93 } 94 95 for (i = 0; i < ftl_get_num_bands(dev); ++i) { 96 if (dev->bands[i].queue_depth || 97 dev->bands[i].md->state == FTL_BAND_STATE_CLOSING) { 98 return false; 99 } 100 } 101 102 if (!ftl_l2p_is_halted(dev)) { 103 ftl_l2p_halt(dev); 104 return false; 105 } 106 107 return true; 108 } 109 110 void 111 ftl_apply_limits(struct spdk_ftl_dev *dev) 112 { 113 size_t limit; 114 int i; 115 116 /* Clear existing limit */ 117 dev->limit = SPDK_FTL_LIMIT_MAX; 118 119 for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) { 120 limit = ftl_get_limit(dev, i); 121 122 if (dev->num_free <= limit) { 123 dev->limit = i; 124 break; 125 } 126 } 127 } 128 129 void 130 ftl_invalidate_addr(struct spdk_ftl_dev *dev, ftl_addr addr) 131 { 132 struct ftl_band *band; 133 struct ftl_p2l_map *p2l_map; 134 135 if (ftl_addr_in_nvc(dev, addr)) { 136 ftl_bitmap_clear(dev->valid_map, addr); 137 return; 138 } 139 140 band = ftl_band_from_addr(dev, addr); 141 p2l_map = &band->p2l_map; 142 143 /* The bit might be already cleared if two writes are scheduled to the */ 144 /* same LBA at the same time */ 145 if (ftl_bitmap_get(dev->valid_map, addr)) { 146 assert(p2l_map->num_valid > 0); 147 ftl_bitmap_clear(dev->valid_map, addr); 148 p2l_map->num_valid--; 149 } 150 151 /* Invalidate open/full band p2l_map entry to keep p2l and l2p 152 * consistency when band is going to close state */ 153 if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) { 154 p2l_map->band_map[ftl_band_block_offset_from_addr(band, addr)].lba = FTL_LBA_INVALID; 155 p2l_map->band_map[ftl_band_block_offset_from_addr(band, addr)].seq_id = 0; 156 } 157 } 158 159 static int 160 ftl_read_canceled(int rc) 161 { 162 return rc == -EFAULT; 163 } 164 165 static int 166 ftl_get_next_read_addr(struct ftl_io *io, ftl_addr *addr) 167 { 168 struct spdk_ftl_dev *dev = io->dev; 169 ftl_addr next_addr; 170 size_t i; 171 bool addr_cached = false; 172 173 *addr = ftl_l2p_get(dev, ftl_io_current_lba(io)); 174 io->map[io->pos] = *addr; 175 176 /* If the address is invalid, skip it */ 177 if (*addr == FTL_ADDR_INVALID) { 178 return -EFAULT; 179 } 180 181 addr_cached = ftl_addr_in_nvc(dev, *addr); 182 183 for (i = 1; i < ftl_io_iovec_len_left(io); ++i) { 184 next_addr = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i)); 185 186 if (next_addr == FTL_ADDR_INVALID) { 187 break; 188 } 189 190 /* It's not enough to check for contiguity, if user data is on the last block 191 * of base device and first nvc, then they're 'contiguous', but can't be handled 192 * with one read request. 193 */ 194 if (addr_cached != ftl_addr_in_nvc(dev, next_addr)) { 195 break; 196 } 197 198 if (*addr + i != next_addr) { 199 break; 200 } 201 202 io->map[io->pos + i] = next_addr; 203 } 204 205 return i; 206 } 207 208 static void ftl_submit_read(struct ftl_io *io); 209 210 static void 211 _ftl_submit_read(void *_io) 212 { 213 struct ftl_io *io = _io; 214 215 ftl_submit_read(io); 216 } 217 218 static void 219 ftl_submit_read(struct ftl_io *io) 220 { 221 struct spdk_ftl_dev *dev = io->dev; 222 ftl_addr addr; 223 int rc = 0, num_blocks; 224 225 while (io->pos < io->num_blocks) { 226 num_blocks = ftl_get_next_read_addr(io, &addr); 227 rc = num_blocks; 228 229 /* User LBA doesn't hold valid data (trimmed or never written to), fill with 0 and skip this block */ 230 if (ftl_read_canceled(rc)) { 231 memset(ftl_io_iovec_addr(io), 0, FTL_BLOCK_SIZE); 232 ftl_io_advance(io, 1); 233 continue; 234 } 235 236 assert(num_blocks > 0); 237 238 if (ftl_addr_in_nvc(dev, addr)) { 239 rc = ftl_nv_cache_read(io, addr, num_blocks, ftl_io_cmpl_cb, io); 240 } else { 241 rc = spdk_bdev_read_blocks(dev->base_bdev_desc, dev->base_ioch, 242 ftl_io_iovec_addr(io), 243 addr, num_blocks, ftl_io_cmpl_cb, io); 244 } 245 246 if (spdk_unlikely(rc)) { 247 if (rc == -ENOMEM) { 248 struct spdk_bdev *bdev; 249 struct spdk_io_channel *ch; 250 251 if (ftl_addr_in_nvc(dev, addr)) { 252 bdev = spdk_bdev_desc_get_bdev(dev->nv_cache.bdev_desc); 253 ch = dev->nv_cache.cache_ioch; 254 } else { 255 bdev = spdk_bdev_desc_get_bdev(dev->base_bdev_desc); 256 ch = dev->base_ioch; 257 } 258 io->bdev_io_wait.bdev = bdev; 259 io->bdev_io_wait.cb_fn = _ftl_submit_read; 260 io->bdev_io_wait.cb_arg = io; 261 spdk_bdev_queue_io_wait(bdev, ch, &io->bdev_io_wait); 262 return; 263 } else { 264 ftl_abort(); 265 } 266 } 267 268 ftl_io_inc_req(io); 269 ftl_io_advance(io, num_blocks); 270 } 271 272 /* If we didn't have to read anything from the device, */ 273 /* complete the request right away */ 274 if (ftl_io_done(io)) { 275 ftl_io_complete(io); 276 } 277 } 278 279 bool 280 ftl_needs_reloc(struct spdk_ftl_dev *dev) 281 { 282 size_t limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START); 283 284 if (dev->num_free <= limit) { 285 return true; 286 } 287 288 return false; 289 } 290 291 void 292 spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs) 293 { 294 attrs->num_blocks = dev->num_lbas; 295 attrs->block_size = FTL_BLOCK_SIZE; 296 attrs->optimum_io_size = dev->xfer_size; 297 } 298 299 static void 300 ftl_io_pin_cb(struct spdk_ftl_dev *dev, int status, struct ftl_l2p_pin_ctx *pin_ctx) 301 { 302 struct ftl_io *io = pin_ctx->cb_ctx; 303 304 if (spdk_unlikely(status != 0)) { 305 /* Retry on the internal L2P fault */ 306 io->status = -EAGAIN; 307 ftl_io_complete(io); 308 return; 309 } 310 311 io->flags |= FTL_IO_PINNED; 312 ftl_submit_read(io); 313 } 314 315 static void 316 ftl_io_pin(struct ftl_io *io) 317 { 318 if (spdk_unlikely(io->flags & FTL_IO_PINNED)) { 319 /* 320 * The IO is in a retry path and it had been pinned already. 321 * Continue with further processing. 322 */ 323 ftl_l2p_pin_skip(io->dev, ftl_io_pin_cb, io, &io->l2p_pin_ctx); 324 } else { 325 /* First time when pinning the IO */ 326 ftl_l2p_pin(io->dev, io->lba, io->num_blocks, 327 ftl_io_pin_cb, io, &io->l2p_pin_ctx); 328 } 329 } 330 331 static void 332 start_io(struct ftl_io *io) 333 { 334 struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(io->ioch); 335 struct spdk_ftl_dev *dev = io->dev; 336 337 io->map = ftl_mempool_get(ioch->map_pool); 338 if (spdk_unlikely(!io->map)) { 339 io->status = -ENOMEM; 340 ftl_io_complete(io); 341 return; 342 } 343 344 switch (io->type) { 345 case FTL_IO_READ: 346 TAILQ_INSERT_TAIL(&dev->rd_sq, io, queue_entry); 347 break; 348 case FTL_IO_WRITE: 349 TAILQ_INSERT_TAIL(&dev->wr_sq, io, queue_entry); 350 break; 351 case FTL_IO_UNMAP: 352 default: 353 io->status = -EOPNOTSUPP; 354 ftl_io_complete(io); 355 } 356 } 357 358 static int 359 queue_io(struct spdk_ftl_dev *dev, struct ftl_io *io) 360 { 361 size_t result; 362 struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(io->ioch); 363 364 result = spdk_ring_enqueue(ioch->sq, (void **)&io, 1, NULL); 365 if (spdk_unlikely(0 == result)) { 366 return -EAGAIN; 367 } 368 369 return 0; 370 } 371 372 int 373 spdk_ftl_writev(struct spdk_ftl_dev *dev, struct ftl_io *io, struct spdk_io_channel *ch, 374 uint64_t lba, uint64_t lba_cnt, struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, 375 void *cb_arg) 376 { 377 int rc; 378 379 if (iov_cnt == 0) { 380 return -EINVAL; 381 } 382 383 if (lba_cnt == 0) { 384 return -EINVAL; 385 } 386 387 if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) { 388 FTL_ERRLOG(dev, "Invalid IO vector to handle, device %s, LBA %"PRIu64"\n", 389 dev->conf.name, lba); 390 return -EINVAL; 391 } 392 393 if (!dev->initialized) { 394 return -EBUSY; 395 } 396 397 rc = ftl_io_init(ch, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE); 398 if (rc) { 399 return rc; 400 } 401 402 return queue_io(dev, io); 403 } 404 405 int 406 spdk_ftl_readv(struct spdk_ftl_dev *dev, struct ftl_io *io, struct spdk_io_channel *ch, 407 uint64_t lba, uint64_t lba_cnt, struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg) 408 { 409 int rc; 410 411 if (iov_cnt == 0) { 412 return -EINVAL; 413 } 414 415 if (lba_cnt == 0) { 416 return -EINVAL; 417 } 418 419 if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) { 420 FTL_ERRLOG(dev, "Invalid IO vector to handle, device %s, LBA %"PRIu64"\n", 421 dev->conf.name, lba); 422 return -EINVAL; 423 } 424 425 if (!dev->initialized) { 426 return -EBUSY; 427 } 428 429 rc = ftl_io_init(ch, io, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ); 430 if (rc) { 431 return rc; 432 } 433 434 return queue_io(dev, io); 435 } 436 437 #define FTL_IO_QUEUE_BATCH 16 438 int 439 ftl_io_channel_poll(void *arg) 440 { 441 struct ftl_io_channel *ch = arg; 442 void *ios[FTL_IO_QUEUE_BATCH]; 443 uint64_t i, count; 444 445 count = spdk_ring_dequeue(ch->cq, ios, FTL_IO_QUEUE_BATCH); 446 if (count == 0) { 447 return SPDK_POLLER_IDLE; 448 } 449 450 for (i = 0; i < count; i++) { 451 struct ftl_io *io = ios[i]; 452 io->user_fn(io->cb_ctx, io->status); 453 } 454 455 return SPDK_POLLER_BUSY; 456 } 457 458 static void 459 ftl_process_io_channel(struct spdk_ftl_dev *dev, struct ftl_io_channel *ioch) 460 { 461 void *ios[FTL_IO_QUEUE_BATCH]; 462 size_t count, i; 463 464 count = spdk_ring_dequeue(ioch->sq, ios, FTL_IO_QUEUE_BATCH); 465 if (count == 0) { 466 return; 467 } 468 469 for (i = 0; i < count; i++) { 470 struct ftl_io *io = ios[i]; 471 start_io(io); 472 } 473 } 474 475 static void 476 ftl_process_io_queue(struct spdk_ftl_dev *dev) 477 { 478 struct ftl_io_channel *ioch; 479 struct ftl_io *io; 480 481 /* TODO: Try to figure out a mechanism to batch more requests at the same time, 482 * with keeping enough resources (pinned pages), between reads, writes and gc/compaction 483 */ 484 if (!TAILQ_EMPTY(&dev->rd_sq)) { 485 io = TAILQ_FIRST(&dev->rd_sq); 486 TAILQ_REMOVE(&dev->rd_sq, io, queue_entry); 487 assert(io->type == FTL_IO_READ); 488 ftl_io_pin(io); 489 } 490 491 if (!ftl_nv_cache_full(&dev->nv_cache) && !TAILQ_EMPTY(&dev->wr_sq)) { 492 io = TAILQ_FIRST(&dev->wr_sq); 493 TAILQ_REMOVE(&dev->wr_sq, io, queue_entry); 494 assert(io->type == FTL_IO_WRITE); 495 if (!ftl_nv_cache_write(io)) { 496 TAILQ_INSERT_HEAD(&dev->wr_sq, io, queue_entry); 497 } 498 } 499 500 TAILQ_FOREACH(ioch, &dev->ioch_queue, entry) { 501 ftl_process_io_channel(dev, ioch); 502 } 503 } 504 505 int 506 ftl_core_poller(void *ctx) 507 { 508 struct spdk_ftl_dev *dev = ctx; 509 uint64_t io_activity_total_old = dev->io_activity_total; 510 511 if (dev->halt && ftl_shutdown_complete(dev)) { 512 spdk_poller_unregister(&dev->core_poller); 513 return SPDK_POLLER_IDLE; 514 } 515 516 ftl_process_io_queue(dev); 517 ftl_writer_run(&dev->writer_user); 518 ftl_writer_run(&dev->writer_gc); 519 ftl_reloc(dev->reloc); 520 ftl_nv_cache_process(dev); 521 ftl_l2p_process(dev); 522 523 if (io_activity_total_old != dev->io_activity_total) { 524 return SPDK_POLLER_BUSY; 525 } 526 527 return SPDK_POLLER_IDLE; 528 } 529 530 struct ftl_band * 531 ftl_band_get_next_free(struct spdk_ftl_dev *dev) 532 { 533 struct ftl_band *band = NULL; 534 535 if (!TAILQ_EMPTY(&dev->free_bands)) { 536 band = TAILQ_FIRST(&dev->free_bands); 537 TAILQ_REMOVE(&dev->free_bands, band, queue_entry); 538 ftl_band_erase(band); 539 } 540 541 return band; 542 } 543 544 void *g_ftl_write_buf; 545 void *g_ftl_read_buf; 546 547 int 548 spdk_ftl_init(void) 549 { 550 g_ftl_write_buf = spdk_zmalloc(FTL_ZERO_BUFFER_SIZE, FTL_ZERO_BUFFER_SIZE, NULL, 551 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 552 if (!g_ftl_write_buf) { 553 return -ENOMEM; 554 } 555 556 g_ftl_read_buf = spdk_zmalloc(FTL_ZERO_BUFFER_SIZE, FTL_ZERO_BUFFER_SIZE, NULL, 557 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA); 558 if (!g_ftl_read_buf) { 559 spdk_free(g_ftl_write_buf); 560 g_ftl_write_buf = NULL; 561 return -ENOMEM; 562 } 563 return 0; 564 } 565 566 void 567 spdk_ftl_fini(void) 568 { 569 spdk_free(g_ftl_write_buf); 570 spdk_free(g_ftl_read_buf); 571 } 572 573 void 574 spdk_ftl_dev_set_fast_shutdown(struct spdk_ftl_dev *dev, bool fast_shutdown) 575 { 576 assert(dev); 577 dev->conf.fast_shutdown = fast_shutdown; 578 } 579 580 struct spdk_io_channel * 581 spdk_ftl_get_io_channel(struct spdk_ftl_dev *dev) 582 { 583 return spdk_get_io_channel(dev); 584 } 585 586 SPDK_LOG_REGISTER_COMPONENT(ftl_core) 587