1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright (C) 2022 Intel Corporation.
3 * All rights reserved.
4 */
5
6 #include "spdk/bdev_module.h"
7
8 #include "ftl_nv_cache.h"
9 #include "ftl_core.h"
10 #include "ftl_utils.h"
11 #include "ftl_band.h"
12 #include "ftl_internal.h"
13 #include "ftl_l2p_cache.h"
14 #include "ftl_mngt.h"
15 #include "ftl_mngt_steps.h"
16 #include "utils/ftl_addr_utils.h"
17
18 struct ftl_mngt_recovery_ctx {
19 /* Main recovery FTL management process */
20 struct ftl_mngt_process *main;
21 int status;
22 TAILQ_HEAD(, ftl_band) open_bands;
23 uint64_t open_bands_num;
24 struct {
25 struct ftl_layout_region region;
26 struct ftl_md *md;
27 uint64_t *l2p;
28 uint64_t *seq_id;
29 uint64_t count;
30 } l2p_snippet;
31 struct {
32 uint64_t block_limit;
33 uint64_t lba_first;
34 uint64_t lba_last;
35 uint32_t i;
36 } iter;
37 uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
38 };
39
40 static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
41 static const struct ftl_mngt_process_desc g_desc_recovery;
42 static const struct ftl_mngt_process_desc g_desc_recovery_shm;
43
44 static bool
recovery_iter_done(struct spdk_ftl_dev * dev,struct ftl_mngt_recovery_ctx * ctx)45 recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
46 {
47 return 0 == ctx->l2p_snippet.region.current.blocks;
48 }
49
50 static void
recovery_iter_advance(struct spdk_ftl_dev * dev,struct ftl_mngt_recovery_ctx * ctx)51 recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
52 {
53 struct ftl_layout_region *region, *snippet;
54 uint64_t first_block, last_blocks;
55
56 ctx->iter.i++;
57 region = ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
58 snippet = &ctx->l2p_snippet.region;
59
60 /* Advance processed blocks */
61 snippet->current.offset += snippet->current.blocks;
62 snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
63 snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
64
65 first_block = snippet->current.offset - region->current.offset;
66 ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
67
68 last_blocks = first_block + snippet->current.blocks;
69 ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
70
71 if (ctx->iter.lba_last > dev->num_lbas) {
72 ctx->iter.lba_last = dev->num_lbas;
73 }
74 }
75
76 static void
ftl_mngt_recovery_init(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)77 ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
78 {
79 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
80 const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
81 uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
82 uint64_t l2p_limit_block, seq_limit_block, md_blocks;
83 int md_flags;
84
85 ctx->main = mngt;
86
87 if (ftl_fast_recovery(dev)) {
88 /* If shared memory fast recovery then we don't need temporary buffers */
89 ftl_mngt_next_step(mngt);
90 return;
91 }
92
93 /*
94 * Recovery process allocates temporary buffers, to not exceed memory limit free L2P
95 * metadata buffers if they exist, they will be recreated in L2P initialization phase
96 */
97 ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
98 ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
99 ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
100
101 /* Below values are in byte unit */
102 mem_limit = dev->conf.l2p_dram_limit * MiB;
103 mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
104 MiB) * MiB);
105
106 lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
107 l2p_limit = lba_limit * dev->layout.l2p.addr_size;
108 iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
109
110 ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
111
112 /* Round to block size */
113 ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
114
115 seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
116
117 FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
118 FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
119 FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
120 FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
121 dev->sb->ckpt_seq_id = 0;
122
123 /* Initialize region */
124 ctx->l2p_snippet.region = *ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
125 /* Limit blocks in region, it will be needed for ftl_md_set_region */
126 ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
127
128 l2p_limit_block = ctx->iter.block_limit;
129 seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
130
131 md_blocks = l2p_limit_block + seq_limit_block;
132 md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
133
134 /* Initialize snippet of L2P metadata */
135 ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
136 &ctx->l2p_snippet.region);
137 if (!ctx->l2p_snippet.md) {
138 ftl_mngt_fail_step(mngt);
139 return;
140 }
141
142 ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
143
144 /* Initialize recovery iterator, we call it with blocks set to zero,
145 * it means zero block done (processed), thanks that it will recalculate
146 * offsets and starting LBA to initial position */
147 ctx->l2p_snippet.region.current.blocks = 0;
148 recovery_iter_advance(dev, ctx);
149
150 /* Initialize snippet of sequence IDs */
151 ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
152 (l2p_limit_block * FTL_BLOCK_SIZE));
153
154 TAILQ_INIT(&ctx->open_bands);
155 ftl_mngt_next_step(mngt);
156 }
157
158 static void
ftl_mngt_recovery_deinit(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)159 ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
160 {
161 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
162
163 ftl_md_destroy(ctx->l2p_snippet.md, 0);
164 ctx->l2p_snippet.md = NULL;
165 ctx->l2p_snippet.seq_id = NULL;
166
167 ftl_mngt_next_step(mngt);
168 }
169
170 static void
recovery_iteration_cb(struct spdk_ftl_dev * dev,void * _ctx,int status)171 recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
172 {
173 struct ftl_mngt_recovery_ctx *ctx = _ctx;
174
175 recovery_iter_advance(dev, ctx);
176
177 if (status) {
178 ftl_mngt_fail_step(ctx->main);
179 } else {
180 ftl_mngt_continue_step(ctx->main);
181 }
182 }
183
184 static void
ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)185 ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
186 {
187 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
188
189 if (ftl_fast_recovery(dev)) {
190 ftl_mngt_skip_step(mngt);
191 return;
192 }
193
194 if (recovery_iter_done(dev, ctx)) {
195 ftl_mngt_next_step(mngt);
196 } else {
197 ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
198 }
199 }
200
201 static void
restore_band_state_cb(struct spdk_ftl_dev * dev,struct ftl_md * md,int status)202 restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
203 {
204 struct ftl_mngt_process *mngt = md->owner.cb_ctx;
205 struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
206 struct ftl_band *band;
207 uint64_t num_bands = ftl_get_num_bands(dev);
208 uint64_t i;
209
210 if (status) {
211 /* Restore error, end step */
212 ftl_mngt_fail_step(mngt);
213 return;
214 }
215
216 for (i = 0; i < num_bands; i++) {
217 band = &dev->bands[i];
218
219 switch (band->md->state) {
220 case FTL_BAND_STATE_FREE:
221 ftl_band_initialize_free_state(band);
222 break;
223 case FTL_BAND_STATE_OPEN:
224 TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
225 TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
226 break;
227 case FTL_BAND_STATE_CLOSED:
228 break;
229 default:
230 status = -EINVAL;
231 }
232 }
233
234 if (status) {
235 ftl_mngt_fail_step(mngt);
236 } else {
237 ftl_mngt_next_step(mngt);
238 }
239 }
240
241 static void
ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)242 ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
243 {
244 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
245
246 md->owner.cb_ctx = mngt;
247 md->cb = restore_band_state_cb;
248 ftl_md_restore(md);
249 }
250
251 struct band_md_ctx {
252 int status;
253 uint64_t qd;
254 uint64_t id;
255 };
256
257 static void
ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt,ftl_band_md_cb cb)258 ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt,
259 ftl_band_md_cb cb)
260 {
261 struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
262 uint64_t num_bands = ftl_get_num_bands(dev);
263
264 /*
265 * This function generates a high queue depth and will utilize ftl_mngt_continue_step during completions to make sure all bands
266 * are processed before returning an error (if any were found) or continuing on.
267 */
268 if (0 == sctx->qd && sctx->id == num_bands) {
269 if (sctx->status) {
270 ftl_mngt_fail_step(mngt);
271 } else {
272 ftl_mngt_next_step(mngt);
273 }
274 return;
275 }
276
277 while (sctx->id < num_bands) {
278 struct ftl_band *band = &dev->bands[sctx->id];
279
280 if (FTL_BAND_STATE_FREE == band->md->state) {
281 sctx->id++;
282 continue;
283 }
284
285 if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
286 /* This band is already open and has valid P2L map */
287 sctx->id++;
288 sctx->qd++;
289 ftl_band_acquire_p2l_map(band);
290 cb(band, mngt, FTL_MD_SUCCESS);
291 continue;
292 } else {
293 if (dev->sb->ckpt_seq_id && (band->md->close_seq_id <= dev->sb->ckpt_seq_id)) {
294 sctx->id++;
295 continue;
296 }
297
298 band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
299 if (ftl_band_alloc_p2l_map(band)) {
300 /* No more free P2L map, try later */
301 break;
302 }
303 }
304
305 sctx->id++;
306 ftl_band_read_tail_brq_md(band, cb, mngt);
307 sctx->qd++;
308 }
309
310 if (0 == sctx->qd) {
311 /*
312 * No QD could happen due to all leftover bands being in free state.
313 * For streamlining of all potential error handling (since many bands are reading P2L at the same time),
314 * we're using ftl_mngt_continue_step to arrive at the same spot of checking for mngt step end (see beginning of function).
315 */
316 ftl_mngt_continue_step(mngt);
317 }
318 }
319
320 static void
ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)321 ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
322 {
323 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
324 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
325 uint64_t *trim_map = ftl_md_get_buffer(md);
326 uint64_t page_id, trim_seq_id;
327 uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
328 uint64_t lba, lba_off;
329
330 if (dev->sb->ckpt_seq_id) {
331 FTL_ERRLOG(dev, "Checkpoint recovery not supported!\n");
332 ftl_mngt_fail_step(mngt);
333 return;
334 }
335
336 for (lba = ctx->iter.lba_first; lba < ctx->iter.lba_last; lba++) {
337 lba_off = lba - ctx->iter.lba_first;
338 page_id = lba / lbas_in_page;
339
340 assert(page_id < ftl_md_get_buffer_size(md) / sizeof(*trim_map));
341 assert(page_id < ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P)->current.blocks);
342 assert(lba_off < ctx->l2p_snippet.count);
343
344 trim_seq_id = trim_map[page_id];
345
346 ctx->l2p_snippet.seq_id[lba_off] = trim_seq_id;
347 ftl_addr_store(dev, ctx->l2p_snippet.l2p, lba_off, FTL_ADDR_INVALID);
348 }
349
350 ftl_mngt_next_step(mngt);
351 }
352
353 static void
l2p_cb(struct spdk_ftl_dev * dev,struct ftl_md * md,int status)354 l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
355 {
356 struct ftl_mngt_process *mngt = md->owner.cb_ctx;
357
358 if (status) {
359 ftl_mngt_fail_step(mngt);
360 } else {
361 ftl_mngt_next_step(mngt);
362 }
363 }
364
365 static void
ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)366 ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
367 {
368 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
369 struct ftl_md *md = ctx->l2p_snippet.md;
370 struct ftl_layout_region *region = &ctx->l2p_snippet.region;
371
372 FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
373 FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
374 region->current.offset, region->current.offset + region->current.blocks,
375 ctx->iter.lba_first, ctx->iter.lba_last);
376
377 ftl_md_set_region(md, &ctx->l2p_snippet.region);
378
379 md->owner.cb_ctx = mngt;
380 md->cb = l2p_cb;
381 ftl_md_restore(md);
382 }
383
384 static void
ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)385 ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
386 {
387 struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
388 struct ftl_md *md = ctx->l2p_snippet.md;
389
390 md->owner.cb_ctx = mngt;
391 md->cb = l2p_cb;
392 ftl_md_persist(md);
393 }
394
395 static void
restore_band_l2p_cb(struct ftl_band * band,void * cntx,enum ftl_md_status status)396 restore_band_l2p_cb(struct ftl_band *band, void *cntx, enum ftl_md_status status)
397 {
398 struct ftl_mngt_process *mngt = cntx;
399 struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
400 struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
401 struct spdk_ftl_dev *dev = band->dev;
402 ftl_addr addr, curr_addr;
403 uint64_t i, lba, seq_id, num_blks_in_band;
404 uint32_t band_map_crc;
405 int rc = 0;
406
407 if (status != FTL_MD_SUCCESS) {
408 FTL_ERRLOG(dev, "L2P band restore error, failed to read P2L map\n");
409 rc = -EIO;
410 goto cleanup;
411 }
412
413 band_map_crc = spdk_crc32c_update(band->p2l_map.band_map,
414 ftl_tail_md_num_blocks(band->dev) * FTL_BLOCK_SIZE, 0);
415
416 /* P2L map is only valid if the band state is closed */
417 if (FTL_BAND_STATE_CLOSED == band->md->state && band->md->p2l_map_checksum != band_map_crc) {
418 FTL_ERRLOG(dev, "L2P band restore error, inconsistent P2L map CRC\n");
419 ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_BASE);
420 rc = -EINVAL;
421 goto cleanup;
422 }
423
424 num_blks_in_band = ftl_get_num_blocks_in_band(dev);
425 for (i = 0; i < num_blks_in_band; ++i) {
426 uint64_t lba_off;
427 lba = band->p2l_map.band_map[i].lba;
428 seq_id = band->p2l_map.band_map[i].seq_id;
429
430 if (lba == FTL_LBA_INVALID) {
431 continue;
432 }
433 if (lba >= dev->num_lbas) {
434 FTL_ERRLOG(dev, "L2P band restore ERROR, LBA out of range\n");
435 rc = -EINVAL;
436 break;
437 }
438 if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
439 continue;
440 }
441
442 lba_off = lba - pctx->iter.lba_first;
443 if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
444
445 /* Overlapped band/chunk has newer data - invalidate P2L map on open/full band */
446 if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
447 addr = ftl_band_addr_from_block_offset(band, i);
448 ftl_band_set_p2l(band, FTL_LBA_INVALID, addr, 0);
449 }
450
451 /* Newer data already recovered */
452 continue;
453 }
454
455 addr = ftl_band_addr_from_block_offset(band, i);
456
457 curr_addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
458
459 /* Overlapped band/chunk has newer data - invalidate P2L map on open/full band */
460 if (curr_addr != FTL_ADDR_INVALID && !ftl_addr_in_nvc(dev, curr_addr) && curr_addr != addr) {
461 struct ftl_band *curr_band = ftl_band_from_addr(dev, curr_addr);
462
463 if (FTL_BAND_STATE_OPEN == curr_band->md->state || FTL_BAND_STATE_FULL == curr_band->md->state) {
464 size_t prev_offset = ftl_band_block_offset_from_addr(curr_band, curr_addr);
465 if (curr_band->p2l_map.band_map[prev_offset].lba == lba &&
466 seq_id >= curr_band->p2l_map.band_map[prev_offset].seq_id) {
467 ftl_band_set_p2l(curr_band, FTL_LBA_INVALID, curr_addr, 0);
468 }
469 }
470 }
471
472 ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
473 pctx->l2p_snippet.seq_id[lba_off] = seq_id;
474 }
475
476
477 cleanup:
478 ftl_band_release_p2l_map(band);
479
480 sctx->qd--;
481 if (rc) {
482 sctx->status = rc;
483 }
484
485 ftl_mngt_continue_step(mngt);
486 }
487
488 static void
ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)489 ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev *dev,
490 struct ftl_mngt_process *mngt)
491 {
492 ftl_mngt_recovery_walk_band_tail_md(dev, mngt, restore_band_l2p_cb);
493 }
494
495 static int
restore_chunk_l2p_cb(struct ftl_nv_cache_chunk * chunk,void * ctx)496 restore_chunk_l2p_cb(struct ftl_nv_cache_chunk *chunk, void *ctx)
497 {
498 struct ftl_mngt_recovery_ctx *pctx = ctx;
499 struct spdk_ftl_dev *dev;
500 struct ftl_nv_cache *nv_cache = chunk->nv_cache;
501 ftl_addr addr;
502 const uint64_t seq_id = chunk->md->seq_id;
503 uint64_t i, lba;
504 uint32_t chunk_map_crc;
505
506 dev = SPDK_CONTAINEROF(chunk->nv_cache, struct spdk_ftl_dev, nv_cache);
507
508 chunk_map_crc = spdk_crc32c_update(chunk->p2l_map.chunk_map,
509 ftl_nv_cache_chunk_tail_md_num_blocks(chunk->nv_cache) * FTL_BLOCK_SIZE, 0);
510 if (chunk->md->p2l_map_checksum != chunk_map_crc) {
511 ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_NV_CACHE);
512 return -1;
513 }
514
515 for (i = 0; i < nv_cache->chunk_blocks; ++i) {
516 uint64_t lba_off;
517
518 lba = ftl_chunk_map_get_lba(chunk, i);
519
520 if (lba == FTL_LBA_INVALID) {
521 continue;
522 }
523 if (lba >= dev->num_lbas) {
524 FTL_ERRLOG(dev, "L2P Chunk restore ERROR, LBA out of range\n");
525 return -1;
526 }
527 if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
528 continue;
529 }
530
531 lba_off = lba - pctx->iter.lba_first;
532 if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
533 /* Newer data already recovered */
534 continue;
535 }
536
537 addr = ftl_addr_from_nvc_offset(dev, chunk->offset + i);
538 ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
539 pctx->l2p_snippet.seq_id[lba_off] = seq_id;
540 }
541
542 return 0;
543 }
544
545 static void
ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)546 ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev *dev,
547 struct ftl_mngt_process *mngt)
548 {
549 ftl_mngt_nv_cache_restore_l2p(dev, mngt, restore_chunk_l2p_cb, ftl_mngt_get_caller_ctx(mngt));
550 }
551
552 static void
ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)553 ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
554 struct ftl_mngt_process *mngt)
555 {
556 struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
557 uint64_t lba, lba_off;
558 ftl_addr addr;
559
560 for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
561 lba_off = lba - pctx->iter.lba_first;
562 addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
563
564 if (addr == FTL_ADDR_INVALID) {
565 continue;
566 }
567
568 if (!ftl_addr_in_nvc(dev, addr)) {
569 struct ftl_band *band = ftl_band_from_addr(dev, addr);
570 band->p2l_map.num_valid++;
571 }
572
573 if (ftl_bitmap_get(dev->valid_map, addr)) {
574 assert(false);
575 ftl_mngt_fail_step(mngt);
576 return;
577 } else {
578 ftl_bitmap_set(dev->valid_map, addr);
579 }
580 }
581
582 ftl_mngt_next_step(mngt);
583 }
584
585 static void
p2l_ckpt_preprocess(struct spdk_ftl_dev * dev,struct ftl_mngt_recovery_ctx * pctx)586 p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
587 {
588 uint64_t seq_id;
589 int md_region, ckpt_id;
590
591 for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
592 md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
593 ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
594 seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
595 pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
596 FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
597 }
598 }
599
600 static int
p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx * pctx,struct ftl_band * band)601 p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
602 {
603 uint64_t seq_id;
604 int md_region, ckpt_id;
605
606 memset(band->p2l_map.band_map, -1,
607 FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
608
609 for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
610 md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
611 ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
612 seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
613 if (seq_id == band->md->seq) {
614 FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
615 PRIu64"\n", band->id, ckpt_id, seq_id);
616 return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
617 }
618 }
619
620 /* Band opened but no valid blocks within it, set write pointer to 0 */
621 ftl_band_iter_init(band);
622 FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
623 " match any P2L checkpoint\n", band->id, band->md->seq);
624 return 0;
625 }
626
627 static void
ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)628 ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
629 {
630 struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
631
632 p2l_ckpt_preprocess(dev, pctx);
633 ftl_mngt_next_step(mngt);
634 }
635
636 static void
ftl_mngt_recover_seq_id(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)637 ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
638 {
639 ftl_recover_max_seq(dev);
640 ftl_mngt_next_step(mngt);
641 }
642
643 static void
ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)644 ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
645 {
646 struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
647 struct ftl_band *band;
648
649 if (TAILQ_EMPTY(&pctx->open_bands)) {
650 FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
651 if (pctx->status) {
652 ftl_mngt_fail_step(mngt);
653 } else {
654 ftl_mngt_next_step(mngt);
655 }
656 return;
657 }
658
659 if (!ftl_mngt_get_step_ctx(mngt)) {
660 ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
661
662 /* Step first time called, initialize */
663 TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
664 band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
665 if (ftl_band_alloc_p2l_map(band)) {
666 FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate P2L map\n");
667 ftl_mngt_fail_step(mngt);
668 return;
669 }
670
671 if (p2l_ckpt_restore_p2l(pctx, band)) {
672 FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
673 ftl_mngt_fail_step(mngt);
674 return;
675 }
676
677 if (!band->p2l_map.p2l_ckpt) {
678 band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
679 if (!band->p2l_map.p2l_ckpt) {
680 FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
681 ftl_mngt_fail_step(mngt);
682 return;
683 }
684 }
685 }
686 }
687
688 band = TAILQ_FIRST(&pctx->open_bands);
689
690 if (ftl_band_filled(band, band->md->iter.offset)) {
691 band->md->state = FTL_BAND_STATE_FULL;
692 }
693
694 /* In a next step (finalize band initialization) this band will
695 * be assigned to the writer. So temporary we move this band
696 * to the closed list, and in the next step it will be moved to
697 * the writer from such list.
698 */
699 TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
700 TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
701
702 FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
703 band->id, band->md->seq, band->md->iter.offset);
704
705 ftl_mngt_continue_step(mngt);
706 }
707
708 static void
ftl_mngt_restore_valid_counters(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)709 ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
710 {
711 ftl_valid_map_load_state(dev);
712 ftl_mngt_next_step(mngt);
713 }
714
715 static bool
trim_pending(struct spdk_ftl_dev * dev)716 trim_pending(struct spdk_ftl_dev *dev)
717 {
718 struct ftl_trim_log *log = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG]);
719
720 if (log->hdr.trim.seq_id) {
721 return true;
722 }
723
724 return false;
725 }
726
727 static void
ftl_mngt_recover_trim_cb(struct spdk_ftl_dev * dev,struct ftl_md * md,int status)728 ftl_mngt_recover_trim_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
729 {
730 struct ftl_mngt_process *mngt = md->owner.cb_ctx;
731 if (!status) {
732 ftl_mngt_next_step(mngt);
733 } else {
734 ftl_mngt_fail_step(mngt);
735 }
736 }
737
738 static void
ftl_mngt_complete_trim(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)739 ftl_mngt_complete_trim(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
740 {
741 uint64_t start_lba, num_blocks, seq_id;
742
743 if (dev->sb_shm->trim.in_progress) {
744 start_lba = dev->sb_shm->trim.start_lba;
745 num_blocks = dev->sb_shm->trim.num_blocks;
746 seq_id = dev->sb_shm->trim.seq_id;
747 assert(seq_id <= dev->sb->seq_id);
748 ftl_set_trim_map(dev, start_lba, num_blocks, seq_id);
749 }
750
751 if (trim_pending(dev)) {
752 struct ftl_trim_log *log = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG]);
753 FTL_NOTICELOG(dev, "Incomplete trim detected lba: %"PRIu64" num_blocks: %"PRIu64"\n",
754 log->hdr.trim.start_lba, log->hdr.trim.num_blocks);
755 }
756
757 ftl_mngt_next_step(mngt);
758 }
759
760 static void
ftl_mngt_recover_trim_md(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)761 ftl_mngt_recover_trim_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
762 {
763 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
764
765 md->owner.cb_ctx = mngt;
766 md->cb = ftl_mngt_recover_trim_cb;
767 ftl_md_restore(md);
768 }
769
770 static void
ftl_mngt_recover_trim_md_persist(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)771 ftl_mngt_recover_trim_md_persist(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
772 {
773 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
774
775 if (!trim_pending(dev)) {
776 /* No pending trim logged */
777 ftl_mngt_skip_step(mngt);
778 return;
779 }
780
781 md->owner.cb_ctx = mngt;
782 md->cb = ftl_mngt_recover_trim_cb;
783 ftl_md_persist(md);
784 }
785
786 static void
ftl_mngt_recover_trim_log_cb(struct spdk_ftl_dev * dev,struct ftl_md * md,int status)787 ftl_mngt_recover_trim_log_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
788 {
789 struct ftl_mngt_process *mngt = md->owner.cb_ctx;
790 struct ftl_trim_log *log = ftl_md_get_buffer(md);
791 uint64_t *page;
792
793 if (status) {
794 ftl_mngt_fail_step(mngt);
795 return;
796 }
797
798 if (!trim_pending(dev)) {
799 /* No pending trim logged */
800 ftl_mngt_skip_step(mngt);
801 return;
802 }
803
804 /* Pending trim, complete the trim transaction */
805 const uint64_t seq_id = log->hdr.trim.seq_id;
806 const uint64_t lba = log->hdr.trim.start_lba;
807 const uint64_t num_blocks = log->hdr.trim.num_blocks;
808 const uint64_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
809 const uint64_t first_page = lba / lbas_in_page;
810 const uint64_t num_pages = num_blocks / lbas_in_page;
811
812 page = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD]);
813
814 if (lba % lbas_in_page || num_blocks % lbas_in_page) {
815 FTL_ERRLOG(dev, "Invalid trim log content\n");
816 ftl_mngt_fail_step(mngt);
817 return;
818 }
819
820 for (uint64_t i = first_page; i < first_page + num_pages; ++i) {
821 if (page[i] > seq_id) {
822 FTL_ERRLOG(dev, "Invalid trim metadata content\n");
823 ftl_mngt_fail_step(mngt);
824 return;
825 }
826 page[i] = seq_id;
827 }
828
829 ftl_mngt_next_step(mngt);
830 }
831
832 static void
ftl_mngt_recover_trim_log(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)833 ftl_mngt_recover_trim_log(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
834 {
835 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
836
837 md->owner.cb_ctx = mngt;
838 md->cb = ftl_mngt_recover_trim_log_cb;
839 ftl_md_restore(md);
840 }
841
842 static void
ftl_mngt_recover_trim_persist(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)843 ftl_mngt_recover_trim_persist(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
844 {
845 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
846
847 if (!trim_pending(dev)) {
848 /* No pending trim logged */
849 ftl_mngt_skip_step(mngt);
850 return;
851 }
852
853 md->owner.cb_ctx = mngt;
854 md->cb = ftl_mngt_recover_trim_cb;
855 ftl_md_persist(md);
856 }
857
858 static void
ftl_mngt_recover_trim_log_clear(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)859 ftl_mngt_recover_trim_log_clear(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
860 {
861 struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
862 struct ftl_trim_log *log = ftl_md_get_buffer(md);
863
864 if (!trim_pending(dev)) {
865 /* No pending trim logged */
866 ftl_mngt_skip_step(mngt);
867 return;
868 }
869
870 memset(&log->hdr, 0, sizeof(log->hdr));
871 md->owner.cb_ctx = mngt;
872 md->cb = ftl_mngt_recover_trim_cb;
873 ftl_md_persist(md);
874 }
875
876 static const struct ftl_mngt_process_desc g_desc_trim_recovery = {
877 .name = "FTL trim recovery ",
878 .steps = {
879 {
880 .name = "Recover trim metadata",
881 .action = ftl_mngt_recover_trim_md,
882 },
883 {
884 .name = "Recover trim log",
885 .action = ftl_mngt_recover_trim_log,
886 },
887 {
888 .name = "Persist trim metadata",
889 .action = ftl_mngt_recover_trim_md_persist,
890 },
891 {
892 .name = "Clear trim log",
893 .action = ftl_mngt_recover_trim_log_clear,
894 },
895 {}
896 }
897 };
898
899 static const struct ftl_mngt_process_desc g_desc_trim_shm_recovery = {
900 .name = "FTL trim shared memory recovery ",
901 .steps = {
902 {
903 .name = "Complete trim transaction",
904 .action = ftl_mngt_complete_trim,
905 },
906 {
907 .name = "Persist trim log",
908 .action = ftl_mngt_recover_trim_persist,
909 },
910 {
911 .name = "Persist trim metadata",
912 .action = ftl_mngt_recover_trim_md_persist,
913 },
914 {
915 .name = "Clear trim log",
916 .action = ftl_mngt_recover_trim_log_clear,
917 },
918 {}
919 }
920 };
921
922 static void
ftl_mngt_recover_trim(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)923 ftl_mngt_recover_trim(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
924 {
925 if (ftl_fast_recovery(dev)) {
926 ftl_mngt_skip_step(mngt);
927 return;
928 }
929
930 ftl_mngt_call_process(mngt, &g_desc_trim_recovery, NULL);
931 }
932
933 static void
ftl_mngt_recover_trim_shm(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)934 ftl_mngt_recover_trim_shm(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
935 {
936 ftl_mngt_call_process(mngt, &g_desc_trim_shm_recovery, NULL);
937 }
938
939 static void
ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)940 ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
941 {
942 if (ftl_fast_recovery(dev)) {
943 ftl_mngt_call_process(mngt, &g_desc_recovery_shm, NULL);
944 } else {
945 ftl_mngt_skip_step(mngt);
946 }
947 }
948
949 /*
950 * During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
951 * recreating it all at the same time may take up to much DRAM, so it's done in multiple
952 * iterations. This process describes the recovery of a part of L2P in one iteration.
953 */
954 static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
955 .name = "FTL recovery iteration",
956 .steps = {
957 {
958 .name = "Load L2P",
959 .action = ftl_mngt_recovery_iteration_load_l2p,
960 },
961 {
962 .name = "Initialize sequence IDs",
963 .action = ftl_mngt_recovery_iteration_init_seq_ids,
964 },
965 {
966 .name = "Restore chunk L2P",
967 .action = ftl_mngt_recovery_iteration_restore_chunk_l2p,
968 },
969 {
970 .name = "Restore band L2P",
971 .ctx_size = sizeof(struct band_md_ctx),
972 .action = ftl_mngt_recovery_iteration_restore_band_l2p,
973 },
974 {
975 .name = "Restore valid map",
976 .action = ftl_mngt_recovery_iteration_restore_valid_map,
977 },
978 {
979 .name = "Save L2P",
980 .action = ftl_mngt_recovery_iteration_save_l2p,
981 },
982 {}
983 }
984 };
985
986 /*
987 * Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
988 * iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
989 */
990 static const struct ftl_mngt_process_desc g_desc_recovery = {
991 .name = "FTL recovery",
992 .ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
993 .steps = {
994 {
995 .name = "Initialize recovery",
996 .action = ftl_mngt_recovery_init,
997 .cleanup = ftl_mngt_recovery_deinit
998 },
999 {
1000 .name = "Recover band state",
1001 .action = ftl_mngt_recovery_restore_band_state,
1002 },
1003 {
1004 .name = "Initialize P2L checkpointing",
1005 .action = ftl_mngt_p2l_init_ckpt,
1006 .cleanup = ftl_mngt_p2l_deinit_ckpt
1007 },
1008 {
1009 .name = "Restore P2L checkpoints",
1010 .action = ftl_mngt_p2l_restore_ckpt
1011 },
1012 {
1013 .name = "Preprocess P2L checkpoints",
1014 .action = ftl_mngt_recovery_pre_process_p2l
1015 },
1016 {
1017 .name = "Recover open bands P2L",
1018 .action = ftl_mngt_recovery_open_bands_p2l
1019 },
1020 {
1021 .name = "Recover chunk state",
1022 .action = ftl_mngt_nv_cache_restore_chunk_state
1023 },
1024 {
1025 .name = "Recover max seq ID",
1026 .action = ftl_mngt_recover_seq_id
1027 },
1028 {
1029 .name = "Recover trim",
1030 .action = ftl_mngt_recover_trim
1031 },
1032 {
1033 .name = "Recover open chunks P2L",
1034 .action = ftl_mngt_nv_cache_recover_open_chunk
1035 },
1036 {
1037 .name = "Recovery iterations",
1038 .action = ftl_mngt_recovery_run_iteration,
1039 },
1040 {
1041 .name = "Deinitialize recovery",
1042 .action = ftl_mngt_recovery_deinit
1043 },
1044 {
1045 .name = "Initialize L2P",
1046 .action = ftl_mngt_init_l2p,
1047 .cleanup = ftl_mngt_deinit_l2p
1048 },
1049 {
1050 .name = "Recover L2P from shared memory",
1051 .action = ftl_mngt_recovery_shm_l2p,
1052 },
1053 {
1054 .name = "Finalize band initialization",
1055 .action = ftl_mngt_finalize_init_bands,
1056 },
1057 {
1058 .name = "Start core poller",
1059 .action = ftl_mngt_start_core_poller,
1060 .cleanup = ftl_mngt_stop_core_poller
1061 },
1062 {
1063 .name = "Self test on startup",
1064 .action = ftl_mngt_self_test
1065 },
1066 {
1067 .name = "Finalize initialization",
1068 .action = ftl_mngt_finalize_startup,
1069 },
1070 {}
1071 }
1072 };
1073
1074 /*
1075 * Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
1076 * L2P cache (paged in/out status, dirtiness etc. of individual pages).
1077 */
1078 static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
1079 .name = "FTL recovery from SHM",
1080 .ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
1081 .steps = {
1082 {
1083 .name = "Restore L2P from shared memory",
1084 .action = ftl_mngt_restore_l2p,
1085 },
1086 {
1087 .name = "Restore valid maps counters",
1088 .action = ftl_mngt_restore_valid_counters,
1089 },
1090 {
1091 .name = "Recover trim from shared memory",
1092 .action = ftl_mngt_recover_trim_shm,
1093 },
1094 {}
1095 }
1096 };
1097
1098 void
ftl_mngt_recover(struct spdk_ftl_dev * dev,struct ftl_mngt_process * mngt)1099 ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
1100 {
1101 ftl_mngt_call_process(mngt, &g_desc_recovery, NULL);
1102 }
1103