xref: /spdk/lib/ftl/mngt/ftl_mngt_recovery.c (revision 42fd001310188f0635a3953f3b0ea0b33a840902)
1 /*   SPDX-License-Identifier: BSD-3-Clause
2  *   Copyright (C) 2022 Intel Corporation.
3  *   All rights reserved.
4  */
5 
6 #include "spdk/bdev_module.h"
7 
8 #include "ftl_nv_cache.h"
9 #include "ftl_core.h"
10 #include "ftl_utils.h"
11 #include "ftl_band.h"
12 #include "ftl_internal.h"
13 #include "ftl_l2p_cache.h"
14 #include "ftl_mngt.h"
15 #include "ftl_mngt_steps.h"
16 #include "utils/ftl_addr_utils.h"
17 
18 struct ftl_mngt_recovery_ctx {
19 	/* Main recovery FTL management process */
20 	struct ftl_mngt_process *main;
21 	int status;
22 	TAILQ_HEAD(, ftl_band) open_bands;
23 	uint64_t open_bands_num;
24 	struct {
25 		struct ftl_layout_region region;
26 		struct ftl_md *md;
27 		uint64_t *l2p;
28 		uint64_t *seq_id;
29 		uint64_t count;
30 	} l2p_snippet;
31 	struct {
32 		uint64_t block_limit;
33 		uint64_t lba_first;
34 		uint64_t lba_last;
35 		uint32_t i;
36 	} iter;
37 	uint64_t p2l_ckpt_seq_id[FTL_LAYOUT_REGION_TYPE_P2L_COUNT];
38 };
39 
40 static const struct ftl_mngt_process_desc g_desc_recovery_iteration;
41 static const struct ftl_mngt_process_desc g_desc_recovery;
42 static const struct ftl_mngt_process_desc g_desc_recovery_shm;
43 
44 static bool
45 recovery_iter_done(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
46 {
47 	return 0 == ctx->l2p_snippet.region.current.blocks;
48 }
49 
50 static void
51 recovery_iter_advance(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *ctx)
52 {
53 	struct ftl_layout_region *region, *snippet;
54 	uint64_t first_block, last_blocks;
55 
56 	ctx->iter.i++;
57 	region = ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
58 	snippet = &ctx->l2p_snippet.region;
59 
60 	/* Advance processed blocks */
61 	snippet->current.offset += snippet->current.blocks;
62 	snippet->current.blocks = region->current.offset + region->current.blocks - snippet->current.offset;
63 	snippet->current.blocks = spdk_min(snippet->current.blocks, ctx->iter.block_limit);
64 
65 	first_block = snippet->current.offset - region->current.offset;
66 	ctx->iter.lba_first = first_block * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
67 
68 	last_blocks = first_block + snippet->current.blocks;
69 	ctx->iter.lba_last = last_blocks * (FTL_BLOCK_SIZE / dev->layout.l2p.addr_size);
70 
71 	if (ctx->iter.lba_last > dev->num_lbas) {
72 		ctx->iter.lba_last = dev->num_lbas;
73 	}
74 }
75 
76 static void
77 ftl_mngt_recovery_init(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
78 {
79 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
80 	const uint64_t lbas_in_block = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
81 	uint64_t mem_limit, lba_limit, l2p_limit, iterations, seq_limit;
82 	uint64_t l2p_limit_block, seq_limit_block, md_blocks;
83 	int md_flags;
84 
85 	ctx->main = mngt;
86 
87 	if (ftl_fast_recovery(dev)) {
88 		/* If shared memory fast recovery then we don't need temporary buffers */
89 		ftl_mngt_next_step(mngt);
90 		return;
91 	}
92 
93 	/*
94 	 * Recovery process allocates temporary buffers, to not exceed memory limit free L2P
95 	 * metadata buffers if they exist, they will be recreated in L2P initialization phase
96 	 */
97 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L1, ftl_md_create_shm_flags(dev));
98 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2, ftl_md_create_shm_flags(dev));
99 	ftl_md_unlink(dev, FTL_L2P_CACHE_MD_NAME_L2_CTX, ftl_md_create_shm_flags(dev));
100 
101 	/* Below values are in byte unit */
102 	mem_limit = dev->conf.l2p_dram_limit * MiB;
103 	mem_limit = spdk_min(mem_limit, spdk_divide_round_up(dev->num_lbas * dev->layout.l2p.addr_size,
104 			     MiB) * MiB);
105 
106 	lba_limit = mem_limit / (sizeof(uint64_t) + dev->layout.l2p.addr_size);
107 	l2p_limit = lba_limit * dev->layout.l2p.addr_size;
108 	iterations = spdk_divide_round_up(dev->num_lbas, lba_limit);
109 
110 	ctx->iter.block_limit = spdk_divide_round_up(l2p_limit, FTL_BLOCK_SIZE);
111 
112 	/* Round to block size */
113 	ctx->l2p_snippet.count = ctx->iter.block_limit * lbas_in_block;
114 
115 	seq_limit = ctx->l2p_snippet.count * sizeof(uint64_t);
116 
117 	FTL_NOTICELOG(dev, "Recovery memory limit: %"PRIu64"MiB\n", (uint64_t)(mem_limit / MiB));
118 	FTL_NOTICELOG(dev, "L2P resident size: %"PRIu64"MiB\n", (uint64_t)(l2p_limit / MiB));
119 	FTL_NOTICELOG(dev, "Seq ID resident size: %"PRIu64"MiB\n", (uint64_t)(seq_limit / MiB));
120 	FTL_NOTICELOG(dev, "Recovery iterations: %"PRIu64"\n", iterations);
121 	dev->sb->ckpt_seq_id = 0;
122 
123 	/* Initialize region */
124 	ctx->l2p_snippet.region = *ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P);
125 	/* Limit blocks in region, it will be needed for ftl_md_set_region */
126 	ctx->l2p_snippet.region.current.blocks = ctx->iter.block_limit;
127 
128 	l2p_limit_block = ctx->iter.block_limit;
129 	seq_limit_block = spdk_divide_round_up(seq_limit, FTL_BLOCK_SIZE);
130 
131 	md_blocks = l2p_limit_block + seq_limit_block;
132 	md_flags = FTL_MD_CREATE_SHM | FTL_MD_CREATE_SHM_NEW;
133 
134 	/* Initialize snippet of L2P metadata */
135 	ctx->l2p_snippet.md = ftl_md_create(dev, md_blocks, 0, "l2p_recovery", md_flags,
136 					    &ctx->l2p_snippet.region);
137 	if (!ctx->l2p_snippet.md) {
138 		ftl_mngt_fail_step(mngt);
139 		return;
140 	}
141 
142 	ctx->l2p_snippet.l2p = ftl_md_get_buffer(ctx->l2p_snippet.md);
143 
144 	/* Initialize recovery iterator, we call it with blocks set to zero,
145 	 * it means zero block done (processed), thanks that it will recalculate
146 	 *  offsets and starting LBA to initial position */
147 	ctx->l2p_snippet.region.current.blocks = 0;
148 	recovery_iter_advance(dev, ctx);
149 
150 	/* Initialize snippet of sequence IDs */
151 	ctx->l2p_snippet.seq_id = (uint64_t *)((char *)ftl_md_get_buffer(ctx->l2p_snippet.md) +
152 					       (l2p_limit_block * FTL_BLOCK_SIZE));
153 
154 	TAILQ_INIT(&ctx->open_bands);
155 	ftl_mngt_next_step(mngt);
156 }
157 
158 static void
159 ftl_mngt_recovery_deinit(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
160 {
161 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
162 
163 	ftl_md_destroy(ctx->l2p_snippet.md, 0);
164 	ctx->l2p_snippet.md = NULL;
165 	ctx->l2p_snippet.seq_id = NULL;
166 
167 	ftl_mngt_next_step(mngt);
168 }
169 
170 static void
171 recovery_iteration_cb(struct spdk_ftl_dev *dev, void *_ctx, int status)
172 {
173 	struct ftl_mngt_recovery_ctx *ctx = _ctx;
174 
175 	recovery_iter_advance(dev, ctx);
176 
177 	if (status) {
178 		ftl_mngt_fail_step(ctx->main);
179 	} else {
180 		ftl_mngt_continue_step(ctx->main);
181 	}
182 }
183 
184 static void
185 ftl_mngt_recovery_run_iteration(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
186 {
187 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_process_ctx(mngt);
188 
189 	if (ftl_fast_recovery(dev)) {
190 		ftl_mngt_skip_step(mngt);
191 		return;
192 	}
193 
194 	if (recovery_iter_done(dev, ctx)) {
195 		ftl_mngt_next_step(mngt);
196 	} else {
197 		ftl_mngt_process_execute(dev, &g_desc_recovery_iteration, recovery_iteration_cb, ctx);
198 	}
199 }
200 
201 static void
202 restore_band_state_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
203 {
204 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
205 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
206 	struct ftl_band *band;
207 	uint64_t num_bands = ftl_get_num_bands(dev);
208 	uint64_t i;
209 
210 	if (status) {
211 		/* Restore error, end step */
212 		ftl_mngt_fail_step(mngt);
213 		return;
214 	}
215 
216 	for (i = 0; i < num_bands; i++) {
217 		band = &dev->bands[i];
218 
219 		switch (band->md->state) {
220 		case FTL_BAND_STATE_FREE:
221 			ftl_band_initialize_free_state(band);
222 			break;
223 		case FTL_BAND_STATE_OPEN:
224 			TAILQ_REMOVE(&band->dev->shut_bands, band, queue_entry);
225 			TAILQ_INSERT_HEAD(&pctx->open_bands, band, queue_entry);
226 			break;
227 		case FTL_BAND_STATE_CLOSED:
228 			break;
229 		default:
230 			status = -EINVAL;
231 		}
232 	}
233 
234 	if (status) {
235 		ftl_mngt_fail_step(mngt);
236 	} else {
237 		ftl_mngt_next_step(mngt);
238 	}
239 }
240 
241 static void
242 ftl_mngt_recovery_restore_band_state(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
243 {
244 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_BAND_MD];
245 
246 	md->owner.cb_ctx = mngt;
247 	md->cb = restore_band_state_cb;
248 	ftl_md_restore(md);
249 }
250 
251 struct band_md_ctx {
252 	int status;
253 	uint64_t qd;
254 	uint64_t id;
255 };
256 
257 static void
258 ftl_mngt_recovery_walk_band_tail_md(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt,
259 				    ftl_band_md_cb cb)
260 {
261 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
262 	uint64_t num_bands = ftl_get_num_bands(dev);
263 
264 	/*
265 	 * This function generates a high queue depth and will utilize ftl_mngt_continue_step during completions to make sure all bands
266 	 * are processed before returning an error (if any were found) or continuing on.
267 	 */
268 	if (0 == sctx->qd && sctx->id == num_bands) {
269 		if (sctx->status) {
270 			ftl_mngt_fail_step(mngt);
271 		} else {
272 			ftl_mngt_next_step(mngt);
273 		}
274 		return;
275 	}
276 
277 	while (sctx->id < num_bands) {
278 		struct ftl_band *band = &dev->bands[sctx->id];
279 
280 		if (FTL_BAND_STATE_FREE == band->md->state) {
281 			sctx->id++;
282 			continue;
283 		}
284 
285 		if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
286 			/* This band is already open and has valid P2L map */
287 			sctx->id++;
288 			sctx->qd++;
289 			ftl_band_acquire_p2l_map(band);
290 			cb(band, mngt, FTL_MD_SUCCESS);
291 			continue;
292 		} else {
293 			if (dev->sb->ckpt_seq_id && (band->md->close_seq_id <= dev->sb->ckpt_seq_id)) {
294 				sctx->id++;
295 				continue;
296 			}
297 
298 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
299 			if (ftl_band_alloc_p2l_map(band)) {
300 				/* No more free P2L map, try later */
301 				break;
302 			}
303 		}
304 
305 		sctx->id++;
306 		ftl_band_read_tail_brq_md(band, cb, mngt);
307 		sctx->qd++;
308 	}
309 
310 	if (0 == sctx->qd) {
311 		/*
312 		 * No QD could happen due to all leftover bands being in free state.
313 		 * For streamlining of all potential error handling (since many bands are reading P2L at the same time),
314 		 * we're using ftl_mngt_continue_step to arrive at the same spot of checking for mngt step end (see beginning of function).
315 		 */
316 		ftl_mngt_continue_step(mngt);
317 	}
318 }
319 
320 static void
321 ftl_mngt_recovery_iteration_init_seq_ids(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
322 {
323 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
324 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
325 	uint64_t *trim_map = ftl_md_get_buffer(md);
326 	uint64_t page_id, trim_seq_id;
327 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
328 	uint64_t lba, lba_off;
329 
330 	if (dev->sb->ckpt_seq_id) {
331 		FTL_ERRLOG(dev, "Checkpoint recovery not supported!\n");
332 		ftl_mngt_fail_step(mngt);
333 		return;
334 	}
335 
336 	for (lba = ctx->iter.lba_first; lba < ctx->iter.lba_last; lba++) {
337 		lba_off = lba - ctx->iter.lba_first;
338 		page_id = lba / lbas_in_page;
339 
340 		assert(page_id < ftl_md_get_buffer_size(md) / sizeof(*trim_map));
341 		assert(page_id < ftl_layout_region_get(dev, FTL_LAYOUT_REGION_TYPE_L2P)->current.blocks);
342 		assert(lba_off < ctx->l2p_snippet.count);
343 
344 		trim_seq_id = trim_map[page_id];
345 
346 		ctx->l2p_snippet.seq_id[lba_off] = trim_seq_id;
347 		ftl_addr_store(dev, ctx->l2p_snippet.l2p, lba_off, FTL_ADDR_INVALID);
348 	}
349 
350 	ftl_mngt_next_step(mngt);
351 }
352 
353 static void
354 l2p_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
355 {
356 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
357 
358 	if (status) {
359 		ftl_mngt_fail_step(mngt);
360 	} else {
361 		ftl_mngt_next_step(mngt);
362 	}
363 }
364 
365 static void
366 ftl_mngt_recovery_iteration_load_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
367 {
368 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
369 	struct ftl_md *md = ctx->l2p_snippet.md;
370 	struct ftl_layout_region *region = &ctx->l2p_snippet.region;
371 
372 	FTL_NOTICELOG(dev, "L2P recovery, iteration %u\n", ctx->iter.i);
373 	FTL_NOTICELOG(dev, "Load L2P, blocks [%"PRIu64", %"PRIu64"), LBAs [%"PRIu64", %"PRIu64")\n",
374 		      region->current.offset, region->current.offset + region->current.blocks,
375 		      ctx->iter.lba_first, ctx->iter.lba_last);
376 
377 	ftl_md_set_region(md, &ctx->l2p_snippet.region);
378 
379 	md->owner.cb_ctx = mngt;
380 	md->cb = l2p_cb;
381 	ftl_md_restore(md);
382 }
383 
384 static void
385 ftl_mngt_recovery_iteration_save_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
386 {
387 	struct ftl_mngt_recovery_ctx *ctx = ftl_mngt_get_caller_ctx(mngt);
388 	struct ftl_md *md = ctx->l2p_snippet.md;
389 
390 	md->owner.cb_ctx = mngt;
391 	md->cb = l2p_cb;
392 	ftl_md_persist(md);
393 }
394 
395 static void
396 restore_band_l2p_cb(struct ftl_band *band, void *cntx, enum ftl_md_status status)
397 {
398 	struct ftl_mngt_process *mngt = cntx;
399 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
400 	struct band_md_ctx *sctx = ftl_mngt_get_step_ctx(mngt);
401 	struct spdk_ftl_dev *dev = band->dev;
402 	ftl_addr addr, curr_addr;
403 	uint64_t i, lba, seq_id, num_blks_in_band;
404 	uint32_t band_map_crc;
405 	int rc = 0;
406 
407 	if (status != FTL_MD_SUCCESS) {
408 		FTL_ERRLOG(dev, "L2P band restore error, failed to read P2L map\n");
409 		rc = -EIO;
410 		goto cleanup;
411 	}
412 
413 	band_map_crc = spdk_crc32c_update(band->p2l_map.band_map,
414 					  ftl_tail_md_num_blocks(band->dev) * FTL_BLOCK_SIZE, 0);
415 
416 	/* P2L map is only valid if the band state is closed */
417 	if (FTL_BAND_STATE_CLOSED == band->md->state && band->md->p2l_map_checksum != band_map_crc) {
418 		FTL_ERRLOG(dev, "L2P band restore error, inconsistent P2L map CRC\n");
419 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_BASE);
420 		rc = -EINVAL;
421 		goto cleanup;
422 	}
423 
424 	num_blks_in_band = ftl_get_num_blocks_in_band(dev);
425 	for (i = 0; i < num_blks_in_band; ++i) {
426 		uint64_t lba_off;
427 		lba = band->p2l_map.band_map[i].lba;
428 		seq_id = band->p2l_map.band_map[i].seq_id;
429 
430 		if (lba == FTL_LBA_INVALID) {
431 			continue;
432 		}
433 		if (lba >= dev->num_lbas) {
434 			FTL_ERRLOG(dev, "L2P band restore ERROR, LBA out of range\n");
435 			rc = -EINVAL;
436 			break;
437 		}
438 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
439 			continue;
440 		}
441 
442 		lba_off = lba - pctx->iter.lba_first;
443 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
444 
445 			/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
446 			if (FTL_BAND_STATE_OPEN == band->md->state || FTL_BAND_STATE_FULL == band->md->state) {
447 				addr = ftl_band_addr_from_block_offset(band, i);
448 				ftl_band_set_p2l(band, FTL_LBA_INVALID, addr, 0);
449 			}
450 
451 			/* Newer data already recovered */
452 			continue;
453 		}
454 
455 		addr = ftl_band_addr_from_block_offset(band, i);
456 
457 		curr_addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
458 
459 		/* Overlapped band/chunk has newer data - invalidate P2L map on open/full band  */
460 		if (curr_addr != FTL_ADDR_INVALID && !ftl_addr_in_nvc(dev, curr_addr) && curr_addr != addr) {
461 			struct ftl_band *curr_band = ftl_band_from_addr(dev, curr_addr);
462 
463 			if (FTL_BAND_STATE_OPEN == curr_band->md->state || FTL_BAND_STATE_FULL == curr_band->md->state) {
464 				size_t prev_offset = ftl_band_block_offset_from_addr(curr_band, curr_addr);
465 				if (curr_band->p2l_map.band_map[prev_offset].lba == lba &&
466 				    seq_id >= curr_band->p2l_map.band_map[prev_offset].seq_id) {
467 					ftl_band_set_p2l(curr_band, FTL_LBA_INVALID, curr_addr, 0);
468 				}
469 			}
470 		}
471 
472 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
473 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
474 	}
475 
476 
477 cleanup:
478 	ftl_band_release_p2l_map(band);
479 
480 	sctx->qd--;
481 	if (rc) {
482 		sctx->status = rc;
483 	}
484 
485 	ftl_mngt_continue_step(mngt);
486 }
487 
488 static void
489 ftl_mngt_recovery_iteration_restore_band_l2p(struct spdk_ftl_dev *dev,
490 		struct ftl_mngt_process *mngt)
491 {
492 	ftl_mngt_recovery_walk_band_tail_md(dev, mngt, restore_band_l2p_cb);
493 }
494 
495 static int
496 restore_chunk_l2p_cb(struct ftl_nv_cache_chunk *chunk, void *ctx)
497 {
498 	struct ftl_mngt_recovery_ctx *pctx = ctx;
499 	struct spdk_ftl_dev *dev;
500 	struct ftl_nv_cache *nv_cache = chunk->nv_cache;
501 	ftl_addr addr;
502 	const uint64_t seq_id = chunk->md->seq_id;
503 	uint64_t i, lba;
504 	uint32_t chunk_map_crc;
505 
506 	dev = SPDK_CONTAINEROF(chunk->nv_cache, struct spdk_ftl_dev, nv_cache);
507 
508 	chunk_map_crc = spdk_crc32c_update(chunk->p2l_map.chunk_map,
509 					   ftl_nv_cache_chunk_tail_md_num_blocks(chunk->nv_cache) * FTL_BLOCK_SIZE, 0);
510 	if (chunk->md->p2l_map_checksum != chunk_map_crc) {
511 		ftl_stats_crc_error(dev, FTL_STATS_TYPE_MD_NV_CACHE);
512 		return -1;
513 	}
514 
515 	for (i = 0; i < nv_cache->chunk_blocks; ++i) {
516 		uint64_t lba_off;
517 
518 		lba = ftl_chunk_map_get_lba(chunk, i);
519 
520 		if (lba == FTL_LBA_INVALID) {
521 			continue;
522 		}
523 		if (lba >= dev->num_lbas) {
524 			FTL_ERRLOG(dev, "L2P Chunk restore ERROR, LBA out of range\n");
525 			return -1;
526 		}
527 		if (lba < pctx->iter.lba_first || lba >= pctx->iter.lba_last) {
528 			continue;
529 		}
530 
531 		lba_off = lba - pctx->iter.lba_first;
532 		if (seq_id < pctx->l2p_snippet.seq_id[lba_off]) {
533 			/* Newer data already recovered */
534 			continue;
535 		}
536 
537 		addr = ftl_addr_from_nvc_offset(dev, chunk->offset + i);
538 		ftl_addr_store(dev, pctx->l2p_snippet.l2p, lba_off, addr);
539 		pctx->l2p_snippet.seq_id[lba_off] = seq_id;
540 	}
541 
542 	return 0;
543 }
544 
545 static void
546 ftl_mngt_recovery_iteration_restore_chunk_l2p(struct spdk_ftl_dev *dev,
547 		struct ftl_mngt_process *mngt)
548 {
549 	ftl_mngt_nv_cache_restore_l2p(dev, mngt, restore_chunk_l2p_cb, ftl_mngt_get_caller_ctx(mngt));
550 }
551 
552 static void
553 ftl_mngt_recovery_iteration_restore_valid_map(struct spdk_ftl_dev *dev,
554 		struct ftl_mngt_process *mngt)
555 {
556 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_caller_ctx(mngt);
557 	uint64_t lba, lba_off;
558 	ftl_addr addr;
559 
560 	for (lba = pctx->iter.lba_first; lba < pctx->iter.lba_last; lba++) {
561 		lba_off = lba - pctx->iter.lba_first;
562 		addr = ftl_addr_load(dev, pctx->l2p_snippet.l2p, lba_off);
563 
564 		if (addr == FTL_ADDR_INVALID) {
565 			continue;
566 		}
567 
568 		if (!ftl_addr_in_nvc(dev, addr)) {
569 			struct ftl_band *band = ftl_band_from_addr(dev, addr);
570 			band->p2l_map.num_valid++;
571 		}
572 
573 		if (ftl_bitmap_get(dev->valid_map, addr)) {
574 			assert(false);
575 			ftl_mngt_fail_step(mngt);
576 			return;
577 		} else {
578 			ftl_bitmap_set(dev->valid_map, addr);
579 		}
580 	}
581 
582 	ftl_mngt_next_step(mngt);
583 }
584 
585 static void
586 p2l_ckpt_preprocess(struct spdk_ftl_dev *dev, struct ftl_mngt_recovery_ctx *pctx)
587 {
588 	uint64_t seq_id;
589 	int md_region, ckpt_id;
590 
591 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
592 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
593 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
594 		seq_id = ftl_mngt_p2l_ckpt_get_seq_id(dev, md_region);
595 		pctx->p2l_ckpt_seq_id[ckpt_id] = seq_id;
596 		FTL_NOTICELOG(dev, "P2L ckpt_id=%d found seq_id=%"PRIu64"\n", ckpt_id, seq_id);
597 	}
598 }
599 
600 static int
601 p2l_ckpt_restore_p2l(struct ftl_mngt_recovery_ctx *pctx, struct ftl_band *band)
602 {
603 	uint64_t seq_id;
604 	int md_region, ckpt_id;
605 
606 	memset(band->p2l_map.band_map, -1,
607 	       FTL_BLOCK_SIZE * ftl_p2l_map_num_blocks(band->dev));
608 
609 	for (md_region = FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
610 	     md_region <= FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MAX; md_region++) {
611 		ckpt_id = md_region - FTL_LAYOUT_REGION_TYPE_P2L_CKPT_MIN;
612 		seq_id = pctx->p2l_ckpt_seq_id[ckpt_id];
613 		if (seq_id == band->md->seq) {
614 			FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u ckpt_id=%d seq_id=%"
615 				      PRIu64"\n", band->id, ckpt_id, seq_id);
616 			return ftl_mngt_p2l_ckpt_restore(band, md_region, seq_id);
617 		}
618 	}
619 
620 	/* Band opened but no valid blocks within it, set write pointer to 0 */
621 	ftl_band_iter_init(band);
622 	FTL_NOTICELOG(band->dev, "Restore band P2L band_id=%u, band_seq_id=%"PRIu64" does not"
623 		      " match any P2L checkpoint\n", band->id, band->md->seq);
624 	return 0;
625 }
626 
627 static void
628 ftl_mngt_recovery_pre_process_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
629 {
630 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
631 
632 	p2l_ckpt_preprocess(dev, pctx);
633 	ftl_mngt_next_step(mngt);
634 }
635 
636 static void
637 ftl_mngt_recover_seq_id(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
638 {
639 	ftl_recover_max_seq(dev);
640 	ftl_mngt_next_step(mngt);
641 }
642 
643 static void
644 ftl_mngt_recovery_open_bands_p2l(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
645 {
646 	struct ftl_mngt_recovery_ctx *pctx = ftl_mngt_get_process_ctx(mngt);
647 	struct ftl_band *band;
648 
649 	if (TAILQ_EMPTY(&pctx->open_bands)) {
650 		FTL_NOTICELOG(dev, "No more open bands to recover from P2L\n");
651 		if (pctx->status) {
652 			ftl_mngt_fail_step(mngt);
653 		} else {
654 			ftl_mngt_next_step(mngt);
655 		}
656 		return;
657 	}
658 
659 	if (!ftl_mngt_get_step_ctx(mngt)) {
660 		ftl_mngt_alloc_step_ctx(mngt, sizeof(bool));
661 
662 		/* Step first time called, initialize */
663 		TAILQ_FOREACH(band, &pctx->open_bands, queue_entry) {
664 			band->md->df_p2l_map = FTL_DF_OBJ_ID_INVALID;
665 			if (ftl_band_alloc_p2l_map(band)) {
666 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot allocate P2L map\n");
667 				ftl_mngt_fail_step(mngt);
668 				return;
669 			}
670 
671 			if (p2l_ckpt_restore_p2l(pctx, band)) {
672 				FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot restore P2L\n");
673 				ftl_mngt_fail_step(mngt);
674 				return;
675 			}
676 
677 			if (!band->p2l_map.p2l_ckpt) {
678 				band->p2l_map.p2l_ckpt = ftl_p2l_ckpt_acquire_region_type(dev, band->md->p2l_md_region);
679 				if (!band->p2l_map.p2l_ckpt) {
680 					FTL_ERRLOG(dev, "Open band recovery ERROR, Cannot acquire P2L\n");
681 					ftl_mngt_fail_step(mngt);
682 					return;
683 				}
684 			}
685 		}
686 	}
687 
688 	band = TAILQ_FIRST(&pctx->open_bands);
689 
690 	if (ftl_band_filled(band, band->md->iter.offset)) {
691 		band->md->state = FTL_BAND_STATE_FULL;
692 	}
693 
694 	/* In a next step (finalize band initialization) this band will
695 	 * be assigned to the writer. So temporary we move this band
696 	 * to the closed list, and in the next step it will be moved to
697 	 * the writer from such list.
698 	 */
699 	TAILQ_REMOVE(&pctx->open_bands, band, queue_entry);
700 	TAILQ_INSERT_TAIL(&dev->shut_bands, band, queue_entry);
701 
702 	FTL_NOTICELOG(dev, "Open band recovered, id = %u, seq id %"PRIu64", write offset %"PRIu64"\n",
703 		      band->id, band->md->seq, band->md->iter.offset);
704 
705 	ftl_mngt_continue_step(mngt);
706 }
707 
708 static void
709 ftl_mngt_restore_valid_counters(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
710 {
711 	ftl_valid_map_load_state(dev);
712 	ftl_mngt_next_step(mngt);
713 }
714 
715 static void
716 ftl_mngt_complete_unmap_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
717 {
718 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
719 
720 	dev->sb_shm->trim.in_progress = false;
721 
722 	if (!status) {
723 		ftl_mngt_next_step(mngt);
724 	} else {
725 		ftl_mngt_fail_step(mngt);
726 	}
727 }
728 
729 static void
730 ftl_mngt_complete_unmap(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
731 {
732 	uint64_t start_lba, num_blocks, seq_id;
733 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
734 
735 	if (dev->sb_shm->trim.in_progress) {
736 		start_lba = dev->sb_shm->trim.start_lba;
737 		num_blocks = dev->sb_shm->trim.num_blocks;
738 		seq_id = dev->sb_shm->trim.seq_id;
739 
740 		assert(seq_id <= dev->sb->seq_id);
741 
742 		FTL_NOTICELOG(dev, "Incomplete unmap detected lba: %"PRIu64" num_blocks: %"PRIu64"\n",
743 			      start_lba, num_blocks);
744 
745 		ftl_set_unmap_map(dev, start_lba, num_blocks, seq_id);
746 	}
747 
748 	md->owner.cb_ctx = mngt;
749 	md->cb = ftl_mngt_complete_unmap_cb;
750 
751 	ftl_md_persist(md);
752 }
753 
754 static void
755 ftl_mngt_recover_unmap_map_cb(struct spdk_ftl_dev *dev, struct ftl_md *md, int status)
756 {
757 	struct ftl_mngt_process *mngt = md->owner.cb_ctx;
758 	uint64_t num_md_blocks, first_page, num_pages;
759 	uint32_t lbas_in_page = FTL_BLOCK_SIZE / dev->layout.l2p.addr_size;
760 	uint64_t *page = ftl_md_get_buffer(md);
761 	union ftl_md_vss *page_vss = ftl_md_get_vss_buffer(md);
762 	uint64_t lba, num_blocks, vss_seq_id;
763 	size_t i, j;
764 
765 	if (status) {
766 		ftl_mngt_fail_step(mngt);
767 		return;
768 	}
769 
770 	num_md_blocks = ftl_md_get_buffer_size(md) / FTL_BLOCK_SIZE;
771 
772 	for (i = 0; i < num_md_blocks; ++i, page_vss++) {
773 		lba = page_vss->unmap.start_lba;
774 		num_blocks = page_vss->unmap.num_blocks;
775 		vss_seq_id = page_vss->unmap.seq_id;
776 
777 		first_page = lba / lbas_in_page;
778 		num_pages = num_blocks / lbas_in_page;
779 
780 		if (lba % lbas_in_page || num_blocks % lbas_in_page) {
781 			ftl_mngt_fail_step(mngt);
782 			return;
783 		}
784 
785 		for (j = first_page; j < first_page + num_pages; ++j) {
786 			page[j] = spdk_max(vss_seq_id, page[j]);
787 		}
788 	}
789 
790 	ftl_mngt_next_step(mngt);
791 }
792 
793 static void
794 ftl_mngt_recover_unmap_map(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
795 {
796 	struct ftl_md *md = dev->layout.md[FTL_LAYOUT_REGION_TYPE_TRIM_MD];
797 
798 	if (ftl_fast_recovery(dev)) {
799 		FTL_DEBUGLOG(dev, "SHM: skipping unmap map recovery\n");
800 		ftl_mngt_next_step(mngt);
801 		return;
802 	}
803 
804 	md->owner.cb_ctx = mngt;
805 	md->cb = ftl_mngt_recover_unmap_map_cb;
806 	ftl_md_restore(md);
807 }
808 
809 static void
810 ftl_mngt_recovery_shm_l2p(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
811 {
812 	if (ftl_fast_recovery(dev)) {
813 		ftl_mngt_call_process(mngt, &g_desc_recovery_shm);
814 	} else {
815 		ftl_mngt_skip_step(mngt);
816 	}
817 }
818 
819 /*
820  * During dirty shutdown recovery, the whole L2P needs to be reconstructed. However,
821  * recreating it all at the same time may take up to much DRAM, so it's done in multiple
822  * iterations. This process describes the recovery of a part of L2P in one iteration.
823  */
824 static const struct ftl_mngt_process_desc g_desc_recovery_iteration = {
825 	.name = "FTL recovery iteration",
826 	.steps = {
827 		{
828 			.name = "Load L2P",
829 			.action = ftl_mngt_recovery_iteration_load_l2p,
830 		},
831 		{
832 			.name = "Initialize sequence IDs",
833 			.action = ftl_mngt_recovery_iteration_init_seq_ids,
834 		},
835 		{
836 			.name = "Restore chunk L2P",
837 			.action = ftl_mngt_recovery_iteration_restore_chunk_l2p,
838 		},
839 		{
840 			.name = "Restore band L2P",
841 			.ctx_size = sizeof(struct band_md_ctx),
842 			.action = ftl_mngt_recovery_iteration_restore_band_l2p,
843 		},
844 		{
845 			.name = "Restore valid map",
846 			.action = ftl_mngt_recovery_iteration_restore_valid_map,
847 		},
848 		{
849 			.name = "Save L2P",
850 			.action = ftl_mngt_recovery_iteration_save_l2p,
851 		},
852 		{}
853 	}
854 };
855 
856 /*
857  * Loading of FTL after dirty shutdown. Recovers metadata, L2P, decides on amount of recovery
858  * iterations to be executed (dependent on ratio of L2P cache size and total L2P size)
859  */
860 static const struct ftl_mngt_process_desc g_desc_recovery = {
861 	.name = "FTL recovery",
862 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
863 	.steps = {
864 		{
865 			.name = "Initialize recovery",
866 			.action = ftl_mngt_recovery_init,
867 			.cleanup = ftl_mngt_recovery_deinit
868 		},
869 		{
870 			.name = "Recover band state",
871 			.action = ftl_mngt_recovery_restore_band_state,
872 		},
873 		{
874 			.name = "Initialize P2L checkpointing",
875 			.action = ftl_mngt_p2l_init_ckpt,
876 			.cleanup = ftl_mngt_p2l_deinit_ckpt
877 		},
878 		{
879 			.name = "Restore P2L checkpoints",
880 			.action = ftl_mngt_p2l_restore_ckpt
881 		},
882 		{
883 			.name = "Preprocess P2L checkpoints",
884 			.action = ftl_mngt_recovery_pre_process_p2l
885 		},
886 		{
887 			.name = "Recover open bands P2L",
888 			.action = ftl_mngt_recovery_open_bands_p2l
889 		},
890 		{
891 			.name = "Recover chunk state",
892 			.action = ftl_mngt_nv_cache_restore_chunk_state
893 		},
894 		{
895 			.name = "Recover max seq ID",
896 			.action = ftl_mngt_recover_seq_id
897 		},
898 		{
899 			.name = "Recover unmap map",
900 			.action = ftl_mngt_recover_unmap_map
901 		},
902 		{
903 			.name = "Recover open chunks P2L",
904 			.action = ftl_mngt_nv_cache_recover_open_chunk
905 		},
906 		{
907 			.name = "Recovery iterations",
908 			.action = ftl_mngt_recovery_run_iteration,
909 		},
910 		{
911 			.name = "Deinitialize recovery",
912 			.action = ftl_mngt_recovery_deinit
913 		},
914 		{
915 			.name = "Initialize L2P",
916 			.action = ftl_mngt_init_l2p,
917 			.cleanup = ftl_mngt_deinit_l2p
918 		},
919 		{
920 			.name = "Recover L2P from shared memory",
921 			.action = ftl_mngt_recovery_shm_l2p,
922 		},
923 		{
924 			.name = "Finalize band initialization",
925 			.action = ftl_mngt_finalize_init_bands,
926 		},
927 		{
928 			.name = "Free P2L region bufs",
929 			.action = ftl_mngt_p2l_free_bufs,
930 		},
931 		{
932 			.name = "Start core poller",
933 			.action = ftl_mngt_start_core_poller,
934 			.cleanup = ftl_mngt_stop_core_poller
935 		},
936 		{
937 			.name = "Self test on startup",
938 			.action = ftl_mngt_self_test
939 		},
940 		{
941 			.name = "Finalize initialization",
942 			.action = ftl_mngt_finalize_startup,
943 		},
944 		{}
945 	}
946 };
947 
948 /*
949  * Shared memory specific steps for dirty shutdown recovery - main task is rebuilding the state of
950  * L2P cache (paged in/out status, dirtiness etc. of individual pages).
951  */
952 static const struct ftl_mngt_process_desc g_desc_recovery_shm = {
953 	.name = "FTL recovery from SHM",
954 	.ctx_size = sizeof(struct ftl_mngt_recovery_ctx),
955 	.steps = {
956 		{
957 			.name = "Restore L2P from SHM",
958 			.action = ftl_mngt_restore_l2p,
959 		},
960 		{
961 			.name = "Restore valid maps counters",
962 			.action = ftl_mngt_restore_valid_counters,
963 		},
964 		{
965 			.name = "Complete unmap transaction",
966 			.action = ftl_mngt_complete_unmap,
967 		},
968 		{}
969 	}
970 };
971 
972 void
973 ftl_mngt_recover(struct spdk_ftl_dev *dev, struct ftl_mngt_process *mngt)
974 {
975 	ftl_mngt_call_process(mngt, &g_desc_recovery);
976 }
977