xref: /spdk/lib/ftl/mngt/ftl_mngt_recovery.c (revision 95d6c9fac17572b107042103439aafd696d60b0e)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/bdev_module.h"
7 
8 #include "ftl_nv_cache.h"
9 #include "ftl_core.h"
10 #include "ftl_utils.h"
11 #include "ftl_band.h"
12 #include "ftl_internal.h"
13 #include "ftl_l2p_cache.h"
14 #include "ftl_mngt.h"
15 #include "ftl_mngt_steps.h"
16 #include "utils/ftl_addr_utils.h"
17 
18 struct ftl_mngt_recovery_ctx {
19 	/* Main recovery FTL management process */
20 	struct ftl_mngt_process *main;
21 	int status;
22 	TAILQ_HEAD(, ftl_band) open_bands;
23 	uint64_t open_bands_num;
24 	struct {
25 		struct ftl_layout_region region;
26 		struct ftl_md *md;
27 		uint64_t *l2p;
28 		uint64_t *seq_id;
29 		uint64_t count;
30 	} l2p_snippet;
31 	struct {
32 		uint64_t block_limit;
33 		uint64_t lba_first;
34 		uint64_t lba_last;
35 		uint32_t i;
36 	} iter;
37 	uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
38 };
39 
40 static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
41 static const struct ftl_mngt_process_desc g_desc_recovery;
42 static const struct ftl_mngt_process_desc g_desc_recovery_shm;
43 
44 static bool
45 recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
46 {
47 	return 0 == ctx->l2p_snippet.region.current.blocks;
48 }
49 
50 static void
51 recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
52 {
53 	struct ftl_layout_region *region, *snippet;
54 	uint64_t first_block, last_blocks;
55 
56 	ctx->iter.i++;
57 	region = ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
58 	snippet = &ctx->l2p_snippet.region;
59 
60 	/* Advance processed blocks */
61 	snippet->current.offset += snippet->current.blocks;
62 	snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
63 	snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
64 
65 	first_block = snippet->current.offset - region->current.offset;
66 	ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
67 
68 	last_blocks = first_block + snippet->current.blocks;
69 	ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
70 
71 	if (ctx->iter.lba_last > dev->num_lbas) {
72 		ctx->iter.lba_last = dev->num_lbas;
73 	}
74 }
75 
76 static void
77 ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
78 {
79 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
80 	const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
81 	uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
82 	uint64_t l2p_limit_block, seq_limit_block, md_blocks;
83 	int md_flags;
84 
85 	ctx->main = mngt;
86 
87 	if (ftl_fast_recovery(dev)) {
88 		/* If shared memory fast recovery then we don't need temporary buffers */
89 		ftl_mngt_next_step(mngt);
90 		return;
91 	}
92 
93 	/*
94 	 * Recovery process allocates temporary buffers, to not exceed memory limit free L2P
95 	 * metadata buffers if they exist, they will be recreated in L2P initialization phase
96 	 */
97 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
98 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
99 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
100 
101 	/* Below values are in byte unit */
102 	mem_limit = dev->conf.l2p_dram_limit * MiB;
103 	mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
104 			     MiB) * MiB);
105 
106 	lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
107 	l2p_limit = lba_limit * dev->layout.l2p.addr_size;
108 	iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
109 
110 	ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
111 
112 	/* Round to block size */
113 	ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
114 
115 	seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
116 
117 	FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
118 	FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
119 	FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
120 	FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
121 	dev->sb->ckpt_seq_id = 0;
122 
123 	/* Initialize region */
124 	ctx->l2p_snippet.region = *ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
125 	/* Limit blocks in region, it will be needed for ftl_md_set_region */
126 	ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
127 
128 	l2p_limit_block = ctx->iter.block_limit;
129 	seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
130 
131 	md_blocks = l2p_limit_block + seq_limit_block;
132 	md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
133 
134 	/* Initialize snippet of L2P metadata */
135 	ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
136 					    &ctx->l2p_snippet.region);
137 	if (!ctx->l2p_snippet.md) {
138 		ftl_mngt_fail_step(mngt);
139 		return;
140 	}
141 
142 	ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
143 
144 	/* Initialize recovery iterator, we call it with blocks set to zero,
145 	 * it means zero block done (processed), thanks that it will recalculate
146 	 *  offsets and starting LBA to initial position */
147 	ctx->l2p_snippet.region.current.blocks = 0;
148 	recovery_iter_advance(dev, ctx);
149 
150 	/* Initialize snippet of sequence IDs */
151 	ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
152 					       (l2p_limit_block * FTL_BLOCK_SIZE));
153 
154 	TAILQ_INIT(&ctx->open_bands);
155 	ftl_mngt_next_step(mngt);
156 }
157 
158 static void
159 ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
160 {
161 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
162 
163 	ftl_md_destroy(ctx->l2p_snippet.md, 0);
164 	ctx->l2p_snippet.md = NULL;
165 	ctx->l2p_snippet.seq_id = NULL;
166 
167 	ftl_mngt_next_step(mngt);
168 }
169 
170 static void
171 recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
172 {
173 	struct ftl_mngt_recovery_ctx *ctx = _ctx;
174 
175 	recovery_iter_advance(dev, ctx);
176 
177 	if (status) {
178 		ftl_mngt_fail_step(ctx->main);
179 	} else {
180 		ftl_mngt_continue_step(ctx->main);
181 	}
182 }
183 
184 static void
185 ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
186 {
187 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
188 
189 	if (ftl_fast_recovery(dev)) {
190 		ftl_mngt_skip_step(mngt);
191 		return;
192 	}
193 
194 	if (recovery_iter_done(dev, ctx)) {
195 		ftl_mngt_next_step(mngt);
196 	} else {
197 		ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
198 	}
199 }
200 
201 static void
202 restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
203 {
204 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
205 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
206 	struct ftl_band *band;
207 	uint64_t num_bands = ftl_get_num_bands(dev);
208 	uint64_t i;
209 
210 	if (status) {
211 		/* Restore error, end step */
212 		ftl_mngt_fail_step(mngt);
213 		return;
214 	}
215 
216 	for (i = 0; i < num_bands; i++) {
217 		band = &dev->bands[i];
218 
219 		switch (band->md->state) {
220 		case FTL_BAND_STATE_FREE:
221 			ftl_band_initialize_free_state(band);
222 			break;
223 		case FTL_BAND_STATE_OPEN:
224 			TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
225 			TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
226 			break;
227 		case FTL_BAND_STATE_CLOSED:
228 			break;
229 		default:
230 			status = -EINVAL;
231 		}
232 	}
233 
234 	if (status) {
235 		ftl_mngt_fail_step(mngt);
236 	} else {
237 		ftl_mngt_next_step(mngt);
238 	}
239 }
240 
241 static void
242 ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
243 {
244 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
245 
246 	md->owner.cb_ctx = mngt;
247 	md->cb = restore_band_state_cb;
248 	ftl_md_restore(md);
249 }
250 
251 struct band_md_ctx {
252 	int status;
253 	uint64_t qd;
254 	uint64_t id;
255 };
256 
257 static void
258 ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt,
259 				    ftl_band_md_cb cb)
260 {
261 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
262 	uint64_t num_bands = ftl_get_num_bands(dev);
263 
264 	/*
265 	 * This function generates a high queue depth and will utilize ftl_mngt_continue_step during completions to make sure all bands
266 	 * are processed before returning an error (if any were found) or continuing on.
267 	 */
268 	if (0 == sctx->qd && sctx->id == num_bands) {
269 		if (sctx->status) {
270 			ftl_mngt_fail_step(mngt);
271 		} else {
272 			ftl_mngt_next_step(mngt);
273 		}
274 		return;
275 	}
276 
277 	while (sctx->id < num_bands) {
278 		struct ftl_band *band = &dev->bands[sctx->id];
279 
280 		if (FTL_BAND_STATE_FREE == band->md->state) {
281 			sctx->id++;
282 			continue;
283 		}
284 
285 		if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
286 			/* This band is already open and has valid P2L map */
287 			sctx->id++;
288 			sctx->qd++;
289 			ftl_band_acquire_p2l_map(band);
290 			cb(band, mngt, FTL_MD_SUCCESS);
291 			continue;
292 		} else {
293 			if (dev->sb->ckpt_seq_id && (band->md->close_seq_id <= dev->sb->ckpt_seq_id)) {
294 				sctx->id++;
295 				continue;
296 			}
297 
298 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
299 			if (ftl_band_alloc_p2l_map(band)) {
300 				/* No more free P2L map, try later */
301 				break;
302 			}
303 		}
304 
305 		sctx->id++;
306 		ftl_band_read_tail_brq_md(band, cb, mngt);
307 		sctx->qd++;
308 	}
309 
310 	if (0 == sctx->qd) {
311 		/*
312 		 * No QD could happen due to all leftover bands being in free state.
313 		 * For streamlining of all potential error handling (since many bands are reading P2L at the same time),
314 		 * we're using ftl_mngt_continue_step to arrive at the same spot of checking for mngt step end (see beginning of function).
315 		 */
316 		ftl_mngt_continue_step(mngt);
317 	}
318 }
319 
320 static void
321 ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
322 {
323 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
324 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
325 	uint64_t *trim_map = ftl_md_get_buffer(md);
326 	uint64_t page_id, trim_seq_id;
327 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
328 	uint64_t lba, lba_off;
329 
330 	if (dev->sb->ckpt_seq_id) {
331 		FTL_ERRLOG(dev, "Checkpoint recovery not supported!\n");
332 		ftl_mngt_fail_step(mngt);
333 		return;
334 	}
335 
336 	for (lba = ctx->iter.lba_first; lba < ctx->iter.lba_last; lba++) {
337 		lba_off = lba - ctx->iter.lba_first;
338 		page_id = lba / lbas_in_page;
339 
340 		assert(page_id < ftl_md_get_buffer_size(md) / sizeof(*trim_map));
341 		assert(page_id < ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P)->current.blocks);
342 		assert(lba_off < ctx->l2p_snippet.count);
343 
344 		trim_seq_id = trim_map[page_id];
345 
346 		ctx->l2p_snippet.seq_id[lba_off] = trim_seq_id;
347 		ftl_addr_store(dev, ctx->l2p_snippet.l2p, lba_off, FTL_ADDR_INVALID);
348 	}
349 
350 	ftl_mngt_next_step(mngt);
351 }
352 
353 static void
354 l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
355 {
356 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
357 
358 	if (status) {
359 		ftl_mngt_fail_step(mngt);
360 	} else {
361 		ftl_mngt_next_step(mngt);
362 	}
363 }
364 
365 static void
366 ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
367 {
368 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
369 	struct ftl_md *md = ctx->l2p_snippet.md;
370 	struct ftl_layout_region *region = &ctx->l2p_snippet.region;
371 
372 	FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
373 	FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
374 		      region->current.offset, region->current.offset + region->current.blocks,
375 		      ctx->iter.lba_first, ctx->iter.lba_last);
376 
377 	ftl_md_set_region(md, &ctx->l2p_snippet.region);
378 
379 	md->owner.cb_ctx = mngt;
380 	md->cb = l2p_cb;
381 	ftl_md_restore(md);
382 }
383 
384 static void
385 ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
386 {
387 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
388 	struct ftl_md *md = ctx->l2p_snippet.md;
389 
390 	md->owner.cb_ctx = mngt;
391 	md->cb = l2p_cb;
392 	ftl_md_persist(md);
393 }
394 
395 static void
396 restore_band_l2p_cb(struct ftl_band *band, void *cntx, enum ftl_md_status status)
397 {
398 	struct ftl_mngt_process *mngt = cntx;
399 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
400 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
401 	struct spdk_ftl_dev *dev = band->dev;
402 	ftl_addr addr, curr_addr;
403 	uint64_t i, lba, seq_id, num_blks_in_band;
404 	uint32_t band_map_crc;
405 	int rc = 0;
406 
407 	if (status != FTL_MD_SUCCESS) {
408 		FTL_ERRLOG(dev, "L2P band restore error, failed to read P2L map\n");
409 		rc = -EIO;
410 		goto cleanup;
411 	}
412 
413 	band_map_crc = spdk_crc32c_update(band->p2l_map.band_map,
414 					  ftl_tail_md_num_blocks(band->dev) * FTL_BLOCK_SIZE, 0);
415 
416 	/* P2L map is only valid if the band state is closed */
417 	if (FTL_BAND_STATE_CLOSED == band->md->state && band->md->p2l_map_checksum != band_map_crc) {
418 		FTL_ERRLOG(dev, "L2P band restore error, inconsistent P2L map CRC\n");
419 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_BASE);
420 		rc = -EINVAL;
421 		goto cleanup;
422 	}
423 
424 	num_blks_in_band = ftl_get_num_blocks_in_band(dev);
425 	for (i = 0; i < num_blks_in_band; ++i) {
426 		uint64_t lba_off;
427 		lba = band->p2l_map.band_map[i].lba;
428 		seq_id = band->p2l_map.band_map[i].seq_id;
429 
430 		if (lba == FTL_LBA_INVALID) {
431 			continue;
432 		}
433 		if (lba >= dev->num_lbas) {
434 			FTL_ERRLOG(dev, "L2P band restore ERROR, LBA out of range\n");
435 			rc = -EINVAL;
436 			break;
437 		}
438 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
439 			continue;
440 		}
441 
442 		lba_off = lba - pctx->iter.lba_first;
443 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
444 
445 			/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
446 			if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
447 				addr = ftl_band_addr_from_block_offset(band, i);
448 				ftl_band_set_p2l(band, FTL_LBA_INVALID, addr, 0);
449 			}
450 
451 			/* Newer data already recovered */
452 			continue;
453 		}
454 
455 		addr = ftl_band_addr_from_block_offset(band, i);
456 
457 		curr_addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
458 
459 		/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
460 		if (curr_addr != FTL_ADDR_INVALID && !ftl_addr_in_nvc(dev, curr_addr) && curr_addr != addr) {
461 			struct ftl_band *curr_band = ftl_band_from_addr(dev, curr_addr);
462 
463 			if (FTL_BAND_STATE_OPEN == curr_band->md->state || FTL_BAND_STATE_FULL == curr_band->md->state) {
464 				size_t prev_offset = ftl_band_block_offset_from_addr(curr_band, curr_addr);
465 				if (curr_band->p2l_map.band_map[prev_offset].lba == lba &&
466 				    seq_id >= curr_band->p2l_map.band_map[prev_offset].seq_id) {
467 					ftl_band_set_p2l(curr_band, FTL_LBA_INVALID, curr_addr, 0);
468 				}
469 			}
470 		}
471 
472 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
473 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
474 	}
475 
476 
477 cleanup:
478 	ftl_band_release_p2l_map(band);
479 
480 	sctx->qd--;
481 	if (rc) {
482 		sctx->status = rc;
483 	}
484 
485 	ftl_mngt_continue_step(mngt);
486 }
487 
488 static void
489 ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev *dev,
490 		struct ftl_mngt_process *mngt)
491 {
492 	ftl_mngt_recovery_walk_band_tail_md(dev, mngt, restore_band_l2p_cb);
493 }
494 
495 static int
496 restore_chunk_l2p_cb(struct ftl_nv_cache_chunk *chunk, void *ctx)
497 {
498 	struct ftl_mngt_recovery_ctx *pctx = ctx;
499 	struct spdk_ftl_dev *dev;
500 	struct ftl_nv_cache *nv_cache = chunk->nv_cache;
501 	ftl_addr addr;
502 	const uint64_t seq_id = chunk->md->seq_id;
503 	uint64_t i, lba;
504 	uint32_t chunk_map_crc;
505 
506 	dev = SPDK_CONTAINEROF(chunk->nv_cache, struct spdk_ftl_dev, nv_cache);
507 
508 	chunk_map_crc = spdk_crc32c_update(chunk->p2l_map.chunk_map,
509 					   ftl_nv_cache_chunk_tail_md_num_blocks(chunk->nv_cache) * FTL_BLOCK_SIZE, 0);
510 	if (chunk->md->p2l_map_checksum != chunk_map_crc) {
511 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_NV_CACHE);
512 		return -1;
513 	}
514 
515 	for (i = 0; i < nv_cache->chunk_blocks; ++i) {
516 		uint64_t lba_off;
517 
518 		lba = ftl_chunk_map_get_lba(chunk, i);
519 
520 		if (lba == FTL_LBA_INVALID) {
521 			continue;
522 		}
523 		if (lba >= dev->num_lbas) {
524 			FTL_ERRLOG(dev, "L2P Chunk restore ERROR, LBA out of range\n");
525 			return -1;
526 		}
527 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
528 			continue;
529 		}
530 
531 		lba_off = lba - pctx->iter.lba_first;
532 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
533 			/* Newer data already recovered */
534 			continue;
535 		}
536 
537 		addr = ftl_addr_from_nvc_offset(dev, chunk->offset + i);
538 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
539 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
540 	}
541 
542 	return 0;
543 }
544 
545 static void
546 ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev *dev,
547 		struct ftl_mngt_process *mngt)
548 {
549 	ftl_mngt_nv_cache_restore_l2p(dev, mngt, restore_chunk_l2p_cb, ftl_mngt_get_caller_ctx(mngt));
550 }
551 
552 static void
553 ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
554 		struct ftl_mngt_process *mngt)
555 {
556 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
557 	uint64_t lba, lba_off;
558 	ftl_addr addr;
559 
560 	for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
561 		lba_off = lba - pctx->iter.lba_first;
562 		addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
563 
564 		if (addr == FTL_ADDR_INVALID) {
565 			continue;
566 		}
567 
568 		if (!ftl_addr_in_nvc(dev, addr)) {
569 			struct ftl_band *band = ftl_band_from_addr(dev, addr);
570 			band->p2l_map.num_valid++;
571 		}
572 
573 		if (ftl_bitmap_get(dev->valid_map, addr)) {
574 			assert(false);
575 			ftl_mngt_fail_step(mngt);
576 			return;
577 		} else {
578 			ftl_bitmap_set(dev->valid_map, addr);
579 		}
580 	}
581 
582 	ftl_mngt_next_step(mngt);
583 }
584 
585 static void
586 p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
587 {
588 	uint64_t seq_id;
589 	int md_region, ckpt_id;
590 
591 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
592 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
593 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
594 		seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
595 		pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
596 		FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
597 	}
598 }
599 
600 static int
601 p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
602 {
603 	uint64_t seq_id;
604 	int md_region, ckpt_id;
605 
606 	memset(band->p2l_map.band_map, -1,
607 	       FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
608 
609 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
610 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
611 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
612 		seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
613 		if (seq_id == band->md->seq) {
614 			FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
615 				      PRIu64"\n", band->id, ckpt_id, seq_id);
616 			return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
617 		}
618 	}
619 
620 	/* Band opened but no valid blocks within it, set write pointer to 0 */
621 	ftl_band_iter_init(band);
622 	FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
623 		      " match any P2L checkpoint\n", band->id, band->md->seq);
624 	return 0;
625 }
626 
627 static void
628 ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
629 {
630 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
631 
632 	p2l_ckpt_preprocess(dev, pctx);
633 	ftl_mngt_next_step(mngt);
634 }
635 
636 static void
637 ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
638 {
639 	ftl_recover_max_seq(dev);
640 	ftl_mngt_next_step(mngt);
641 }
642 
643 static void
644 ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
645 {
646 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
647 	struct ftl_band *band;
648 
649 	if (TAILQ_EMPTY(&pctx->open_bands)) {
650 		FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
651 		if (pctx->status) {
652 			ftl_mngt_fail_step(mngt);
653 		} else {
654 			ftl_mngt_next_step(mngt);
655 		}
656 		return;
657 	}
658 
659 	if (!ftl_mngt_get_step_ctx(mngt)) {
660 		ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
661 
662 		/* Step first time called, initialize */
663 		TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
664 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
665 			if (ftl_band_alloc_p2l_map(band)) {
666 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate P2L map\n");
667 				ftl_mngt_fail_step(mngt);
668 				return;
669 			}
670 
671 			if (p2l_ckpt_restore_p2l(pctx, band)) {
672 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
673 				ftl_mngt_fail_step(mngt);
674 				return;
675 			}
676 
677 			if (!band->p2l_map.p2l_ckpt) {
678 				band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
679 				if (!band->p2l_map.p2l_ckpt) {
680 					FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
681 					ftl_mngt_fail_step(mngt);
682 					return;
683 				}
684 			}
685 		}
686 	}
687 
688 	band = TAILQ_FIRST(&pctx->open_bands);
689 
690 	if (ftl_band_filled(band, band->md->iter.offset)) {
691 		band->md->state = FTL_BAND_STATE_FULL;
692 	}
693 
694 	/* In a next step (finalize band initialization) this band will
695 	 * be assigned to the writer. So temporary we move this band
696 	 * to the closed list, and in the next step it will be moved to
697 	 * the writer from such list.
698 	 */
699 	TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
700 	TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
701 
702 	FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
703 		      band->id, band->md->seq, band->md->iter.offset);
704 
705 	ftl_mngt_continue_step(mngt);
706 }
707 
708 static void
709 ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
710 {
711 	ftl_valid_map_load_state(dev);
712 	ftl_mngt_next_step(mngt);
713 }
714 
715 static bool
716 trim_pending(struct spdk_ftl_dev *dev)
717 {
718 	struct ftl_trim_log *log = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG]);
719 
720 	if (log->hdr.trim.seq_id) {
721 		return true;
722 	}
723 
724 	return false;
725 }
726 
727 static void
728 ftl_mngt_recover_trim_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
729 {
730 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
731 	if (!status) {
732 		ftl_mngt_next_step(mngt);
733 	} else {
734 		ftl_mngt_fail_step(mngt);
735 	}
736 }
737 
738 static void
739 ftl_mngt_complete_trim(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
740 {
741 	uint64_t start_lba, num_blocks, seq_id;
742 
743 	if (dev->sb_shm->trim.in_progress) {
744 		start_lba = dev->sb_shm->trim.start_lba;
745 		num_blocks = dev->sb_shm->trim.num_blocks;
746 		seq_id = dev->sb_shm->trim.seq_id;
747 		assert(seq_id <= dev->sb->seq_id);
748 		ftl_set_trim_map(dev, start_lba, num_blocks, seq_id);
749 	}
750 
751 	if (trim_pending(dev)) {
752 		struct ftl_trim_log *log = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG]);
753 		FTL_NOTICELOG(dev, "Incomplete trim detected lba: %"PRIu64" num_blocks: %"PRIu64"\n",
754 			      log->hdr.trim.start_lba, log->hdr.trim.num_blocks);
755 	}
756 
757 	ftl_mngt_next_step(mngt);
758 }
759 
760 static void
761 ftl_mngt_recover_trim_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
762 {
763 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
764 
765 	md->owner.cb_ctx = mngt;
766 	md->cb = ftl_mngt_recover_trim_cb;
767 	ftl_md_restore(md);
768 }
769 
770 static void
771 ftl_mngt_recover_trim_md_persist(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
772 {
773 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
774 
775 	if (!trim_pending(dev)) {
776 		/* No pending trim logged */
777 		ftl_mngt_skip_step(mngt);
778 		return;
779 	}
780 
781 	md->owner.cb_ctx = mngt;
782 	md->cb = ftl_mngt_recover_trim_cb;
783 	ftl_md_persist(md);
784 }
785 
786 static void
787 ftl_mngt_recover_trim_log_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
788 {
789 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
790 	struct ftl_trim_log *log = ftl_md_get_buffer(md);
791 	uint64_t *page;
792 
793 	if (status) {
794 		ftl_mngt_fail_step(mngt);
795 		return;
796 	}
797 
798 	if (!trim_pending(dev)) {
799 		/* No pending trim logged */
800 		ftl_mngt_skip_step(mngt);
801 		return;
802 	}
803 
804 	/* Pending trim, complete the trim transaction */
805 	const uint64_t seq_id = log->hdr.trim.seq_id;
806 	const uint64_t lba = log->hdr.trim.start_lba;
807 	const uint64_t num_blocks = log->hdr.trim.num_blocks;
808 	const uint64_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
809 	const uint64_t first_page = lba / lbas_in_page;
810 	const uint64_t num_pages = num_blocks / lbas_in_page;
811 
812 	page = ftl_md_get_buffer(dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD]);
813 
814 	if (lba % lbas_in_page || num_blocks % lbas_in_page) {
815 		FTL_ERRLOG(dev, "Invalid trim log content\n");
816 		ftl_mngt_fail_step(mngt);
817 		return;
818 	}
819 
820 	for (uint64_t i = first_page; i < first_page + num_pages; ++i) {
821 		if (page[i] > seq_id) {
822 			FTL_ERRLOG(dev, "Invalid trim metadata content\n");
823 			ftl_mngt_fail_step(mngt);
824 			return;
825 		}
826 		page[i] = seq_id;
827 	}
828 
829 	ftl_mngt_next_step(mngt);
830 }
831 
832 static void
833 ftl_mngt_recover_trim_log(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
834 {
835 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
836 
837 	md->owner.cb_ctx = mngt;
838 	md->cb = ftl_mngt_recover_trim_log_cb;
839 	ftl_md_restore(md);
840 }
841 
842 static void
843 ftl_mngt_recover_trim_persist(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
844 {
845 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
846 
847 	if (!trim_pending(dev)) {
848 		/* No pending trim logged */
849 		ftl_mngt_skip_step(mngt);
850 		return;
851 	}
852 
853 	md->owner.cb_ctx = mngt;
854 	md->cb = ftl_mngt_recover_trim_cb;
855 	ftl_md_persist(md);
856 }
857 
858 static void
859 ftl_mngt_recover_trim_log_clear(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
860 {
861 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_LOG];
862 	struct ftl_trim_log *log = ftl_md_get_buffer(md);
863 
864 	if (!trim_pending(dev)) {
865 		/* No pending trim logged */
866 		ftl_mngt_skip_step(mngt);
867 		return;
868 	}
869 
870 	memset(&log->hdr, 0, sizeof(log->hdr));
871 	md->owner.cb_ctx = mngt;
872 	md->cb = ftl_mngt_recover_trim_cb;
873 	ftl_md_persist(md);
874 }
875 
876 static const struct ftl_mngt_process_desc g_desc_trim_recovery = {
877 	.name = "FTL trim recovery ",
878 	.steps = {
879 		{
880 			.name = "Recover trim metadata",
881 			.action = ftl_mngt_recover_trim_md,
882 		},
883 		{
884 			.name = "Recover trim log",
885 			.action = ftl_mngt_recover_trim_log,
886 		},
887 		{
888 			.name = "Persist trim metadata",
889 			.action = ftl_mngt_recover_trim_md_persist,
890 		},
891 		{
892 			.name = "Clear trim log",
893 			.action = ftl_mngt_recover_trim_log_clear,
894 		},
895 		{}
896 	}
897 };
898 
899 static const struct ftl_mngt_process_desc g_desc_trim_shm_recovery = {
900 	.name = "FTL trim shared memory recovery ",
901 	.steps = {
902 		{
903 			.name = "Complete trim transaction",
904 			.action = ftl_mngt_complete_trim,
905 		},
906 		{
907 			.name = "Persist trim log",
908 			.action = ftl_mngt_recover_trim_persist,
909 		},
910 		{
911 			.name = "Persist trim metadata",
912 			.action = ftl_mngt_recover_trim_md_persist,
913 		},
914 		{
915 			.name = "Clear trim log",
916 			.action = ftl_mngt_recover_trim_log_clear,
917 		},
918 		{}
919 	}
920 };
921 
922 static void
923 ftl_mngt_recover_trim(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
924 {
925 	if (ftl_fast_recovery(dev)) {
926 		ftl_mngt_skip_step(mngt);
927 		return;
928 	}
929 
930 	ftl_mngt_call_process(mngt, &g_desc_trim_recovery, NULL);
931 }
932 
933 static void
934 ftl_mngt_recover_trim_shm(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
935 {
936 	ftl_mngt_call_process(mngt, &g_desc_trim_shm_recovery, NULL);
937 }
938 
939 static void
940 ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
941 {
942 	if (ftl_fast_recovery(dev)) {
943 		ftl_mngt_call_process(mngt, &g_desc_recovery_shm, NULL);
944 	} else {
945 		ftl_mngt_skip_step(mngt);
946 	}
947 }
948 
949 /*
950  * During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
951  * recreating it all at the same time may take up to much DRAM, so it's done in multiple
952  * iterations. This process describes the recovery of a part of L2P in one iteration.
953  */
954 static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
955 	.name = "FTL recovery iteration",
956 	.steps = {
957 		{
958 			.name = "Load L2P",
959 			.action = ftl_mngt_recovery_iteration_load_l2p,
960 		},
961 		{
962 			.name = "Initialize sequence IDs",
963 			.action = ftl_mngt_recovery_iteration_init_seq_ids,
964 		},
965 		{
966 			.name = "Restore chunk L2P",
967 			.action = ftl_mngt_recovery_iteration_restore_chunk_l2p,
968 		},
969 		{
970 			.name = "Restore band L2P",
971 			.ctx_size = sizeof(struct band_md_ctx),
972 			.action = ftl_mngt_recovery_iteration_restore_band_l2p,
973 		},
974 		{
975 			.name = "Restore valid map",
976 			.action = ftl_mngt_recovery_iteration_restore_valid_map,
977 		},
978 		{
979 			.name = "Save L2P",
980 			.action = ftl_mngt_recovery_iteration_save_l2p,
981 		},
982 		{}
983 	}
984 };
985 
986 /*
987  * Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
988  * iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
989  */
990 static const struct ftl_mngt_process_desc g_desc_recovery = {
991 	.name = "FTL recovery",
992 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
993 	.steps = {
994 		{
995 			.name = "Initialize recovery",
996 			.action = ftl_mngt_recovery_init,
997 			.cleanup = ftl_mngt_recovery_deinit
998 		},
999 		{
1000 			.name = "Recover band state",
1001 			.action = ftl_mngt_recovery_restore_band_state,
1002 		},
1003 		{
1004 			.name = "Initialize P2L checkpointing",
1005 			.action = ftl_mngt_p2l_init_ckpt,
1006 			.cleanup = ftl_mngt_p2l_deinit_ckpt
1007 		},
1008 		{
1009 			.name = "Restore P2L checkpoints",
1010 			.action = ftl_mngt_p2l_restore_ckpt
1011 		},
1012 		{
1013 			.name = "Preprocess P2L checkpoints",
1014 			.action = ftl_mngt_recovery_pre_process_p2l
1015 		},
1016 		{
1017 			.name = "Recover open bands P2L",
1018 			.action = ftl_mngt_recovery_open_bands_p2l
1019 		},
1020 		{
1021 			.name = "Recover chunk state",
1022 			.action = ftl_mngt_nv_cache_restore_chunk_state
1023 		},
1024 		{
1025 			.name = "Recover max seq ID",
1026 			.action = ftl_mngt_recover_seq_id
1027 		},
1028 		{
1029 			.name = "Recover trim",
1030 			.action = ftl_mngt_recover_trim
1031 		},
1032 		{
1033 			.name = "Recover open chunks P2L",
1034 			.action = ftl_mngt_nv_cache_recover_open_chunk
1035 		},
1036 		{
1037 			.name = "Recovery iterations",
1038 			.action = ftl_mngt_recovery_run_iteration,
1039 		},
1040 		{
1041 			.name = "Deinitialize recovery",
1042 			.action = ftl_mngt_recovery_deinit
1043 		},
1044 		{
1045 			.name = "Initialize L2P",
1046 			.action = ftl_mngt_init_l2p,
1047 			.cleanup = ftl_mngt_deinit_l2p
1048 		},
1049 		{
1050 			.name = "Recover L2P from shared memory",
1051 			.action = ftl_mngt_recovery_shm_l2p,
1052 		},
1053 		{
1054 			.name = "Finalize band initialization",
1055 			.action = ftl_mngt_finalize_init_bands,
1056 		},
1057 		{
1058 			.name = "Start core poller",
1059 			.action = ftl_mngt_start_core_poller,
1060 			.cleanup = ftl_mngt_stop_core_poller
1061 		},
1062 		{
1063 			.name = "Self test on startup",
1064 			.action = ftl_mngt_self_test
1065 		},
1066 		{
1067 			.name = "Finalize initialization",
1068 			.action = ftl_mngt_finalize_startup,
1069 		},
1070 		{}
1071 	}
1072 };
1073 
1074 /*
1075  * Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
1076  * L2P cache (paged in/out status, dirtiness etc. of individual pages).
1077  */
1078 static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
1079 	.name = "FTL recovery from SHM",
1080 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
1081 	.steps = {
1082 		{
1083 			.name = "Restore L2P from shared memory",
1084 			.action = ftl_mngt_restore_l2p,
1085 		},
1086 		{
1087 			.name = "Restore valid maps counters",
1088 			.action = ftl_mngt_restore_valid_counters,
1089 		},
1090 		{
1091 			.name = "Recover trim from shared memory",
1092 			.action = ftl_mngt_recover_trim_shm,
1093 		},
1094 		{}
1095 	}
1096 };
1097 
1098 void
1099 ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
1100 {
1101 	ftl_mngt_call_process(mngt, &g_desc_recovery, NULL);
1102 }
1103